The goal of this project is to determine whether face-selective neurons in TopoNets are necessary and sufficient for accurate face recognition.

In [None]:

!pip install git+https://github.com/toponets/toponets.git #download pretrained toponets checkpoint


In [None]:
import toponets
topo_resnet18 = toponets.resnet18(tau=10.0, checkpoint_path = f"resnet18_tau_{10}.pt")
topo_resnet50 = toponets.resnet50(tau=30.0, checkpoint_path = f"resnet50_tau_{30}.pt")


In [None]:
!pip install datasets huggingface_hub



In [None]:
from datasets import load_dataset
# Load CelebA dataset from Hugging Face
dataset = load_dataset("flwrlabs/celeba")


## **Imports**

In [None]:
import torch
import numpy as np
from datasets import load_dataset
from torchvision import transforms
from PIL import Image
import toponets
from sklearn.decomposition import PCA

# DataSet+Preprocessing

In [None]:


# Load CelebA dataset
dataset = load_dataset("flwrlabs/celeba")

# Define ImageNet preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),  #resize for ResNet input
    transforms.ToTensor(),  #convert to PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  #imagenet normalization
])






# Load Pretrained TopoNets ResNet18 Model

In [None]:
# Load pre-trained TopoNets ResNet18 model
checkpoint_path = "resnet18_tau_10.pt"
topo_resnet18 = toponets.resnet18(tau=10.0)
checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))
topo_resnet18.load_state_dict(checkpoint)  # Directly load checkpoint if it's a state_dict

topo_resnet18.eval()

# Register a Hook to Capture Activations

In [None]:
class ForwardHook:
    def __init__(self, module):
        """
        a nice forward hook
        """
        self.hook = module.register_forward_hook(self.hook_fn)
        self.input = None
        self.output = None

    def hook_fn(self, module, input, output):
        self.input = input
        self.output = output

    def close(self):
        self.hook.remove()
hook = ForwardHook(topo_resnet18.layer4)

# Process Multiple Images & Collect Activations

In [None]:
!pip install einops

In [None]:
from einops import reduce

In [None]:
# Process multiple images and collect activations
num_images = 1000  # Ensure enough samples for PCA
activations_list = [] #store activations of each image

for i in range(num_images):
    image = dataset["train"][i]["image"]  #load image
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)  #convert NumPy array to PIL if needed
    input_tensor = transform(image).unsqueeze(0)  #Applies preprocessing

    with torch.no_grad():
        _ = topo_resnet18(input_tensor)  # Run inference
        #Feeds the image into the model, triggering the forward hook

        mean_along_hw = reduce(
        hook.output.numpy(),
        "batch channels height width -> batch channels",
        reduction="mean"
    )
    #print(mean_along_hw[0][0])

    #activations_list.append(activation["face_neurons"].numpy().flatten())
    activations_list.append(mean_along_hw.squeeze(0))

    #print(activation["face_neurons"].numpy()) # Store activations




# Convert activations list to NumPy array
flattened_activations = np.array(activations_list)
print(flattened_activations)


print("Activations Shape for PCA:", flattened_activations.shape)  # num_images, feature_size

# PCA

In [None]:
#Ensure PCA has at least 2 samples
#PCA's role: identify the most important neurons in the network that respond to faces
#Each neuron in your model has some level of activity when processing faces. Some neurons respond strongly and consistently to face images, while others contribute less significantly.
#PCA helps identify which neurons contribute the most to variations in neural activity when the model processes faces

num_samples, num_features = flattened_activations.shape
if num_samples < 2:
    raise ValueError(f"PCA requires at least 2 samples, but got {num_samples}.")

# Set n_components safely
#reduces the dataset to a lower-dimensional representation.

n_components = min(512, num_samples, num_features)
pca = PCA(n_components=n_components) #create pc model with n comp

principal_components = pca.fit_transform(flattened_activations)
#Transforms the activations into a lower-dimensional space (300 × n_components).

# Print explained variance ratio
#explained variance ratio:measure how much important information is kept when simplifying complex data.

import matplotlib.pyplot as plt



print(f"Using {n_components} principal components.")
print("Explained Variance Ratios:", pca.explained_variance_ratio_, "\n")
cumulative_sum = np.cumsum(pca.explained_variance_ratio_)
print("Cumulative Explained Variance Ratios:", cumulative_sum)

plt.plot(cumulative_sum)
plt.xlabel("Number of Components")
plt.ylabel("Cumulative Explained Variance")
plt.title("Cumulative Explained Variance vs. Number of Components")
plt.show()



#output
#with 20 PCA: PC1 captured 4.86%, PC2 captured 3.16%...
#adding up all 20 = 32.9


#	Only a small portion of the total information is retained-->meaning many neurons contribute to face recognition.
# suggests that face recognition is not concentrated in just a few neurons. Instead, many neurons contribute to recognizing faces.

#with 50
#PC1=4.86, PC2=3.16, PC3=3, PC4=2.9... PC50=0.4
#adding up all = 54%

#with 100
#PC1=4.86...PC100=0.25
#adding up all = 71%

# still 29% variance remaining-->indicating that face recognition is not dominated by just a few neurons but involves a broad set of neural activations.


In [None]:


import matplotlib.pyplot as plt
from torchvision import transforms
from datasets import load_dataset
from einops import reduce


checkpoint_path = "resnet18_tau_10.pt"
topo_resnet18 = toponets.resnet18(tau=10.0)
checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))
topo_resnet18.load_state_dict(checkpoint)
topo_resnet18.eval()


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
dataset = load_dataset("flwrlabs/celeba")

num_images = 1000  # chnage


class ForwardHook:
    def __init__(self, module, lesion_channels=None):
        self.hook = module.register_forward_hook(self.hook_fn)
        self.lesion_channels = lesion_channels
        self.output = None

    def hook_fn(self, module, input, output):
        if self.lesion_channels is not None:
            output[:, self.lesion_channels, :, :] = 0
        self.output = output.detach()

    def close(self):
        self.hook.remove()


def collect_activations(model, dataset, transform, hook, num_images):
    activations = []
    for i in range(num_images):
        image = dataset["train"][i]["image"]
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)
        input_tensor = transform(image).unsqueeze(0)
        with torch.no_grad():
            _ = model(input_tensor)
            reduced = reduce(
                hook.output.numpy(),
                "batch channels height width -> batch channels",
                reduction="mean"
            )
            activations.append(reduced.squeeze(0))
    return np.array(activations)


baseline_hook = ForwardHook(topo_resnet18.layer4)
baseline_activations = collect_activations(topo_resnet18, dataset, transform, baseline_hook, num_images)
baseline_hook.close()


face_means = np.mean(baseline_activations, axis=0)
threshold = np.percentile(face_means, 95)
face_selective_channels = np.where(face_means > threshold)[0]
print("Face-selective channels:", face_selective_channels)


lesion_hook = ForwardHook(topo_resnet18.layer4, lesion_channels=face_selective_channels)
lesioned_activations = collect_activations(topo_resnet18, dataset, transform, lesion_hook, num_images)
lesion_hook.close()


def run_pca_and_plot(data, label):
    pca = PCA(n_components=min(512, *data.shape))
    pc = pca.fit_transform(data)
    cum_var = np.cumsum(pca.explained_variance_ratio_)
    plt.plot(cum_var, label=label)
    return cum_var

plt.figure(figsize=(8,5))
run_pca_and_plot(baseline_activations, label="Baseline")
run_pca_and_plot(lesioned_activations, label="Lesioned (Face-Selective Zeroed)")
plt.xlabel("Number of PCA Components")
plt.ylabel("Cumulative Explained Variance")
plt.title("PCA: Baseline vs Lesioned Face-Selective Channels")
plt.legend()
plt.grid(True)
plt.show()


# Graph Meaning

-Graph shows that there is a drop in cum explained variance after face-selective neurons were lesioned out
-like Kanwisher's fMRI studies, but in a neural network: localizing a “face area” and showing its importance by “virtually lesioning” it, shows how  human brain areas like the Fusiform Face Area (FFA) work, and how damaging that area leads to face perception deficits (prosopagnosia).
-zeroing out face-sensitive neurons kinda simulates “brain damage” to the face-recognition part of the network
-drop in the orange curve (lesioned) vs. blue (baseline) shows those neurons mattered.




In [None]:
#try lesioning random neurons and see what happens

import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt


np.random.seed(42)
total_channels = flattened_activations.shape[1]
random_channels = np.random.choice(
    total_channels,
    size=len(face_selective_channels),
    replace=False
)
print("Random control channels:", random_channels)

class LesionHook:
    def __init__(self, module, lesion_channels):
        self.hook = module.register_forward_hook(self.hook_fn)
        self.lesion_channels = lesion_channels
        self.output = None

    def hook_fn(self, module, input, output):
        output[:, self.lesion_channels, :, :] = 0  # zero-out selected channels
        self.output = output.detach()

    def close(self):
        self.hook.remove()

random_lesion_hook = LesionHook(topo_resnet18.layer4, lesion_channels=random_channels)
random_lesioned_activations = []

for i in range(num_images):
    image = dataset["train"][i]["image"]
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)
    input_tensor = transform(image).unsqueeze(0)

    with torch.no_grad():
        _ = topo_resnet18(input_tensor)
        reduced = reduce(
            random_lesion_hook.output.numpy(),
            "batch channels height width -> batch channels",
            reduction="mean"
        )
        random_lesioned_activations.append(reduced.squeeze(0))

random_lesion_hook.close()
random_lesioned_activations = np.array(random_lesioned_activations)


def run_pca_and_plot(data, label):
    pca = PCA(n_components=min(512, *data.shape))
    pc = pca.fit_transform(data)
    cum_var = np.cumsum(pca.explained_variance_ratio_)
    plt.plot(cum_var, label=label)
    return cum_var

plt.figure(figsize=(8,5))
run_pca_and_plot(flattened_activations, label="Baseline")
run_pca_and_plot(lesioned_activations, label="Face-Selective Lesioned")
run_pca_and_plot(random_lesioned_activations, label="Random Neurons Lesioned")
plt.xlabel("Number of PCA Components")
plt.ylabel("Cumulative Explained Variance")
plt.title("PCA: Baseline vs Lesioned (Face vs Random)")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
#image
image = dataset["train"][0]["image"]
if not isinstance(image, Image.Image):
    image = Image.fromarray(image)
input_tensor = transform(image).unsqueeze(0)

#process through model
hook = ForwardHook(topo_resnet18.layer4)
_ = topo_resnet18(input_tensor)

#visualize 1 face neuron
channel_idx = face_selective_channels[0]
activation_map = hook.output[0, channel_idx].numpy()

plt.imshow(activation_map, cmap='viridis')
plt.title(f"Activation Map – Channel {channel_idx}")
plt.colorbar()
plt.show()

hook.close()


# Face vs Scene Image Selectivity Experiment

In [None]:
# --- Install dependencies ---
!pip install git+https://github.com/toponets/toponets.git
!pip install datasets einops torchvision matplotlib

# --- Imports ---
import torch
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from einops import reduce
from sklearn.decomposition import PCA
from torchvision import transforms
from torchvision.datasets import Places365
from datasets import load_dataset
import toponets

# --- Set device ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Load TopoNets pretrained model ---
checkpoint_path = "resnet18_tau_10.pt"
topo_resnet18 = toponets.resnet18(tau=10.0)
checkpoint = torch.load(checkpoint_path, map_location=device)
topo_resnet18.load_state_dict(checkpoint)
topo_resnet18.to(device)
topo_resnet18.eval()

# --- Preprocessing (ImageNet style) ---
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# --- Forward hook for activation capture ---
class ForwardHook:
    def __init__(self, module, lesion_channels=None):
        self.hook = module.register_forward_hook(self.hook_fn)
        self.lesion_channels = lesion_channels
        self.output = None

    def hook_fn(self, module, input, output):
        if self.lesion_channels is not None:
            output[:, self.lesion_channels, :, :] = 0
        self.output = output.detach()

    def close(self):
        self.hook.remove()

def collect_activations(model, dataset, transform, hook, num_images):
    activations = []
    count = 0
    for i in range(len(dataset)):
        image = dataset[i]["image"] if isinstance(dataset[i], dict) else dataset[i][0]

        # Convert from tensor to PIL if needed
        if isinstance(image, torch.Tensor):
            image = transforms.ToPILImage()(image)

        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)

        input_tensor = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            _ = model(input_tensor)
            reduced = reduce(
                hook.output.cpu().numpy(),
                "batch channels height width -> batch channels",
                reduction="mean"
            )
            activations.append(reduced.squeeze(0))

        count += 1
        if count >= num_images:
            break
    return np.array(activations)


# --- Load Face Dataset (CelebA) ---
face_dataset = load_dataset("flwrlabs/celeba", split="train")

# --- Load Scene Dataset (Places365) ---
scene_dataset = Places365(root='places365_data',
                          split='val',
                          small=True,
                          download=True,
                          transform=transform)

# --- Collect activations from face images ---
num_images = 1000
face_hook = ForwardHook(topo_resnet18.layer4)
face_activations = collect_activations(topo_resnet18, face_dataset, transform, face_hook, num_images)
face_hook.close()

# --- Identify face-selective neurons ---
face_means = np.mean(face_activations, axis=0)
threshold = np.percentile(face_means, 95)  # Top 5%
face_selective_channels = np.where(face_means > threshold)[0]
print(f"Identified {len(face_selective_channels)} face-selective channels.")

# --- Collect activations from scene images ---
scene_hook = ForwardHook(topo_resnet18.layer4)
scene_activations = collect_activations(topo_resnet18, scene_dataset, transform, scene_hook, num_images)
scene_hook.close()

# --- Compare activation strengths ---
face_selective_on_faces = face_activations[:, face_selective_channels]
face_selective_on_scenes = scene_activations[:, face_selective_channels]

face_avg = np.mean(face_selective_on_faces)
scene_avg = np.mean(face_selective_on_scenes)

print(f"\nMean activation of face-selective neurons:")
print(f"- On face images:  {face_avg:.4f}")
print(f"- On scene images: {scene_avg:.4f}")

# --- PCA Plot ---
def run_pca_and_plot(data, label, color):
    pca = PCA(n_components=2)
    pc = pca.fit_transform(data)
    plt.scatter(pc[:, 0], pc[:, 1], label=label, alpha=0.4, s=10, c=color)

plt.figure(figsize=(8,6))
run_pca_and_plot(face_selective_on_faces, "Face Images", "blue")
run_pca_and_plot(face_selective_on_scenes, "Scene Images", "green")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.title("PCA on Face-Selective Neurons")
plt.legend()
plt.grid(True)
plt.show()


# teste face vs scene vs random

We’ll compare the activation distributions of the face-selective neurons when shown:

Face images (CelebA)

Scene images (Places365)

Random object images (CIFAR-10)

Goal:
If the neurons are really face-selective, their activations should be:

High on face images

Low on scenes and objects

We'll visualize this with a violin plot or box plot, one for each group.

In [None]:
# --- Install CIFAR10 (non-face object images) ---
from torchvision.datasets import CIFAR10

cifar_dataset = CIFAR10(root="./data", train=False, download=True, transform=transform)

# --- Extract activations on CIFAR (object) images ---
cifar_hook = ForwardHook(topo_resnet18.layer4)
cifar_activations = collect_activations(topo_resnet18, cifar_dataset, transform, cifar_hook, num_images)
cifar_hook.close()

# --- Get activations from face-selective neurons ---
face_vals = face_activations[:, face_selective_channels].mean(axis=1)
scene_vals = scene_activations[:, face_selective_channels].mean(axis=1)
cifar_vals = cifar_activations[:, face_selective_channels].mean(axis=1)

# --- Plotting ---
import seaborn as sns
import pandas as pd

# Combine into a DataFrame for seaborn
df = pd.DataFrame({
    "Activation": np.concatenate([face_vals, scene_vals, cifar_vals]),
    "Category": (["Face"] * len(face_vals)) +
                (["Scene"] * len(scene_vals)) +
                (["Object"] * len(cifar_vals))
})

plt.figure(figsize=(8,6))
sns.boxplot(data=df, x="Category", y="Activation", palette="pastel")
plt.title("Mean Activation of Face-Selective Neurons Across Categories")
plt.ylabel("Activation Magnitude")
plt.grid(True)
plt.show()


This boxplot illustrates the average activation magnitude of face-selective neurons in response to three different image categories: Face, Scene, and Object. The neurons exhibit the highest activation for face images, with a median around 1.8 and several data points exceeding 2, indicating strong and consistent responsiveness. In contrast, the activations for scene and object categories are much lower, with medians around 0.3–0.4 and a tighter spread. This suggests that these neurons are specifically tuned to face stimuli and are relatively unresponsive to other types of visual input, reinforcing their selectivity and potential importance in face recognition tasks.

In [None]:
!pip install torch-dreams

In [None]:
import torch
import matplotlib.pyplot as plt
from torch_dreams.dreamer import Dreamer

# Ensure your topo_resnet18 model is set to evaluation mode
model = topo_resnet18
model.eval()

# Initialize Dreamer with the model and device
dreamer = Dreamer(model=model, device=device)

# Select a face-selective channel and the corresponding layer
target_neuron = face_selective_channels[0]  # Choose the first face-selective neuron
target_layer = model.layer4  # The layer where the neuron resides

# Define a custom loss function to maximize the activation of the target neuron
def custom_loss(layer_outputs):
    # layer_outputs is a list; we access the output of the target layer
    layer_output = layer_outputs[0]
    # Calculate the mean activation of the target neuron
    loss = -layer_output[:, target_neuron].mean()
    return loss

# Generate an image that maximally activates the selected neuron
dream_image = dreamer.render(
    layers=[target_layer],  # Pass the target layer in a list
    custom_func=custom_loss,  # Use the custom loss function
    width=224,
    height=224,
    iters=100,
    lr=0.05,
    rotate_degrees=15,
    scale_max=1.2,
    scale_min=0.5,
    translate_x=0.2,
    translate_y=0.2,
    weight_decay=1e-2,
    grad_clip=1.0,
)

# Display the generated image directly using matplotlib
plt.imshow(dream_image)
plt.axis("off")
plt.title(f"Visualization of Neuron {target_neuron}")
plt.show()
