In [None]:
import numpy as np
from os import path
import matplotlib.pyplot as plt

In [None]:
### Load data

# imresps.npy is of shape (1573, 2, 15363), where 1573 is number of images, 2 repeats each, and 15363 neurons recorded
# stimids.npy has the image id (matching the image dataset ~selection1866~) for each stimulus number, 
# so of you want to see what image was presented on imresps[502] you would check stim_ids[502]

PATH_TO_DATA = '../../data/neural'

imresps = np.load(path.join(PATH_TO_DATA, 'imresps.npy'))
stimids = np.load(path.join(PATH_TO_DATA, 'stimids.npy'))

print(imresps.shape) # (1573, 2, 15363)
print(stimids.shape) # (1573,)

In [None]:
def compute_signal_related_variance(resp_a, resp_b, mean_center=True):
    """
    compute the fraction of signal-related variance for each neuron,
    as per Stringer et al Nature 2019. Cross-validated by splitting
    responses into two halves. Note, this only is "correct" if resp_a
    and resp_b are *not* averages of many trials.

    Args:
        resp_a (ndarray): n_stimuli, n_cells
        resp_b (ndarray): n_stimuli, n_cells

    Returns:
        fraction_of_stimulus_variance: 0-1, 0 is non-stimulus-caring, 1 is only-stimulus-caring neurons
        stim_to_noise_ratio: ratio of the stim-related variance to all other variance
    """
    if len(resp_a.shape) > 2:
        # if the stimulus is multi-dimensional, flatten across all stimuli
        resp_a = resp_a.reshape(-1, resp_a.shape[-1])
        resp_b = resp_b.reshape(-1, resp_b.shape[-1])
    ns, nc = resp_a.shape
    if mean_center:
        # mean-center the activity of each cell
        resp_a = resp_a - resp_a.mean(axis=0)
        resp_b = resp_b - resp_b.mean(axis=0)
    
    # compute the cross-trial stimulus covariance of each cell
    # dot-product each cell's (n_stim, ) vector from one half
    # with its own (n_stim, ) vector on the other half

    covariance = (resp_a * resp_b).sum(axis=0) / ns

    # compute the variance of each cell across both halves
    resp_a_variance = (resp_a**2).sum(axis=0) / ns
    resp_b_variance = (resp_b**2).sum(axis=0) / ns
    total_variance = (resp_a_variance + resp_b_variance) / 2

    if np.any(total_variance < 1e-12):
        print(f"Warning: Near-zero total variance for neurons: {np.where(total_variance < 1e-12)[0]}")

    # compute the fraction of the total variance that is
    # captured in the covariance
    fraction_of_stimulus_variance = covariance / total_variance

    # if you want, you can compute SNR as well:
    stim_to_noise_ratio = fraction_of_stimulus_variance / (
        1 - fraction_of_stimulus_variance
    )

    return fraction_of_stimulus_variance, stim_to_noise_ratio

In [None]:
### Compute the null distribution of SRV values for all neurons

# imresps shape = (1573, 2, 15363)
# responses in imresps shape = (2, 15363)
num_stimuli = imresps.shape[0] # 1573
num_repeats = imresps.shape[1] # 2
num_neurons = imresps.shape[2] # 15363
n_shuffles = 100

null_srv_all_neurons = [] # shape (n_shuffles, num_neurons)

for _ in range(n_shuffles):
    # Shuffle stimulus indices *twice* to create two independent splits!
    shuffled_indices_A = np.random.permutation(num_stimuli)
    shuffled_indices_B = np.random.permutation(num_stimuli)

    # Now for the splits, we can just use fixed repeat indices, 
    # because for each split, at index N the responses correspond to different stimuli
    # e.g. split_A = [ stim_100_repeat_1, stim_2_repeat_1, stim_19_repeat_1, ... ]
    # e.g. split_B = [ stim_543_repeat_2, stim_345_repeat_2, stim_3_repeat_2, ... ]
    split_A = imresps[shuffled_indices_A, 0, :]
    split_B = imresps[shuffled_indices_B, 1, :]

    # Compute SRV for the shuffled data
    fraction_of_stimulus_variance, _ = compute_signal_related_variance(split_A, split_B)
    null_srv_all_neurons.append(fraction_of_stimulus_variance)

null_srv_all_neurons = np.array(null_srv_all_neurons)
null_srv_all_neurons.shape # (100, 15363)

print(null_srv_all_neurons[0])
print(null_srv_all_neurons[33])

# e.g. if neuron_index = 0, it will plot the SRV value for neuron 0 across all shuffles
neuron_index = 0
plt.hist([srv[neuron_index] for srv in null_srv_all_neurons], bins=100, color='blue', alpha=0.7)
plt.xlabel("Fraction of Stimulus-Related Variance (SRV)")
plt.ylabel("Number of Shuffles")
plt.title(f"Null Distribution of SRV for Neuron {neuron_index}")
plt.show()

In [None]:
### Compute the real SRV for each neuron

# split_A_real = imresps[:, 0, :] # First repeat for each stimulus
# split_B_real = imresps[:, 1, :] # Second repeat for each stimulus

split_A, split_B = [], []
for responses in imresps: # responses shape: (2, n_neurons)
    indices = np.random.permutation(2) # Randomly shuffle [0, 1]
    split_A.append(responses[indices[0]]) # Assign one repeat to split_A
    split_B.append(responses[indices[1]]) # Assign the other to split_B

split_A = np.array(split_A)  # Shape: (n_stimuli, n_neurons)
split_B = np.array(split_B)  # Shape: (n_stimuli, n_neurons)

# Compute SRV for real data
real_srv_all_neurons, stim_to_noise_ratio = compute_signal_related_variance(split_A, split_B)

print(real_srv_all_neurons)
print(stim_to_noise_ratio)

print("Real SRV shape:", real_srv_all_neurons.shape) # Should be (15363,)

plt.hist(real_srv_all_neurons, bins=100, color='blue', alpha=0.7)
plt.xlabel("Fraction of Stimulus-Related Variance (SRV)")
plt.ylabel("Number of Shuffles")
plt.title(f"Null Distribution of SRV for Neuron {neuron_index}")
plt.show()

In [None]:
### Filter neurons whose real SRV is in the top 90th percentile of its null distribution

# This gives the 90th-percentile SRV value of the null distribution for each neuron
# In other words the threshold for each neuron to be considered reliable
# e.g. if neuron 0 has a null distribution of SRVs across 10 shuffles 
# [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1], the threshold would be 0.9
top_99th_percentile_null = np.percentile(null_srv_all_neurons, 99, axis=0)
print(top_99th_percentile_null) # [0.03651716 0.03126347 0.03325775 ... 0.02738261 0.03546677 0.0333109 ]

# Get indices of reliable neurons
reliable_neuron_indices = np.where(real_srv_all_neurons >= top_99th_percentile_null)[0]

# Print results
print(f"Number of reliable neurons: {len(reliable_neuron_indices)}") # 5654
print(f"Indices of reliable neurons: {reliable_neuron_indices}") # [   14    29    48 ... 15357 15358 15360]

plt.hist(real_srv_all_neurons, bins=100, color='red', alpha=0.7)
plt.hist(real_srv_all_neurons[reliable_neuron_indices], bins=100, color='blue', alpha=0.7)
plt.xlabel("Fraction of Stimulus-Related Variance (SRV)")
plt.ylabel("Number of Shuffles")
plt.title("All Neurons: SRV all vs. SRV reliable")
plt.show()

plt.hist(real_srv_all_neurons[reliable_neuron_indices], bins=100, color='blue', alpha=0.7)
plt.xlabel("Fraction of Stimulus-Related Variance (SRV)")
plt.ylabel("Number of Neurons")
plt.title("SRV Distribution for Reliable Neurons")
plt.show()

In [None]:
### Filter only the top Y neurons (SRV)

num_neurons = 500

reliable_srv_scores = real_srv_all_neurons[reliable_neuron_indices]
sorted_indices = np.argsort(reliable_srv_scores)[::-1]
most_reliable_neurons = reliable_neuron_indices[sorted_indices[:num_neurons]]
highest_srv_scores = real_srv_all_neurons[most_reliable_neurons]
neural_responses = imresps[:, :, most_reliable_neurons]
neural_responses_mean = neural_responses.mean(axis=1)

assert most_reliable_neurons.shape[0] == num_neurons, "Mismatch in neuron selection!"
print("Dimensionality of neural responses:", neural_responses_mean.shape)
print("Top 500 reliable neuron indices:", most_reliable_neurons[:10])
print("Corresponding SRV scores:", highest_srv_scores[:10])
print("Top 500 neural responses shape:", neural_responses.shape) # (1573, 2, 500)
print("Averaged top 500 neural responses shape:", neural_responses_mean.shape) # (1573, 500)

In [None]:
### Get first PC of neural data
from sklearn.decomposition import PCA

pca = PCA(100)
neural_data_pcs = pca.fit_transform(neural_responses_mean)
pc1_neural_data = neural_data_pcs[:, 0]
pc2_neural_data = neural_data_pcs[:, 1]
pc3_neural_data = neural_data_pcs[:, 2]
pc4_neural_data = neural_data_pcs[:, 3]

print(pc1_neural_data.shape)

In [None]:
### Load and preprocess images

import os
from scipy.io import loadmat
import matplotlib.pyplot as plt
import numpy as np
from torchvision.transforms import Normalize, Compose, Resize, CenterCrop
import torch
from torch.utils.data import TensorDataset
from torchvision import utils as torch_utils
 
PATH_TO_DATA = '../../data/selection1866'

file_list = sorted(f for f in os.listdir(PATH_TO_DATA) if f.endswith('.mat'))
stim_ids = stimids.astype(int)

print(stim_ids)
print(stimids)

transform = Compose([
    Resize(96), # Resize shortest edge to 96 (cut off the rightmost part of the image)
    CenterCrop((96, 96)), # Crop to (96, 96)
    Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), # !! Normalize expects input is already in the range [0, 1]
])

img_tensors, labels = [], []

print('List:', file_list)

# we have 1866 images here, but the neural response data only uses 1573 of them
# because some ~300 images didn't have two repeats, so were disposed
# therefore we filter the full set here so that we only use the relevant 1573
for stim_id in stim_ids:
    filename = 'img' + str(stim_id) + '.mat'
    data = loadmat(os.path.join(PATH_TO_DATA, filename))

    img = data['img'][:, :500] # Take leftmost part of the image
    rgb_img = np.stack([img] * 3, axis=-1) # Convert grayscale to RGB for SimCLR
    tensor = torch.tensor(rgb_img, dtype=torch.float32).permute(2, 0, 1) # Shape (C, H, W)
    
    # Min-max scale the tensor to [0, 1]
    tensor_min = tensor.min()
    tensor_max = tensor.max()
    tensor = (tensor - tensor_min) / (tensor_max - tensor_min)

    # Clamp to [0, 1] to ensure no outliers due to numerical precision
    tensor = torch.clamp(tensor, 0.0, 1.0)

    transformed_tensor = transform(tensor) # Normalize and resize for SimCLR
    img_tensors.append(transformed_tensor)
    labels.append(stim_id)

image_dataset = TensorDataset(torch.stack(img_tensors), torch.tensor(labels))

images, labels = image_dataset.tensors
print("Processed image labels (stim id):", labels[:30])
print("Stim IDs from neural data:", stim_ids[:30])
print("Processed dataset shape:", images.shape) # (N, C, 96, 96)
print(f"Min pixel value (processed): {torch.min(images)}")
print(f"Max pixel value (processed): {torch.max(images)}")

# Show a sample of processed images
img_grid = torch_utils.make_grid(images[:12], nrow=6, normalize=True, pad_value=0.9)
img_grid = img_grid.permute(1, 2, 0).numpy()
plt.figure(figsize=(10, 5))
plt.title('Processed images: sample')
plt.imshow(img_grid)
plt.axis('off')
plt.show()
plt.close()

filename = 'img20.mat'
data = loadmat(os.path.join(PATH_TO_DATA, filename))
img = data['img'][:, :500]

import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
plt.imshow(img, cmap='gray')  # Adjust cmap as needed ('viridis', 'jet', etc.)
plt.colorbar(label="Pixel Intensity")
plt.title("Rendered Image")
plt.axis("off")  # Hide axis for better visualization
plt.show()

In [None]:
### Run images through a pretrained SimCLR model and extract features

import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision
from tqdm.notebook import tqdm
from typing import Dict
from torch.utils.data import Dataset
import urllib.request
from urllib.error import HTTPError
from collections import defaultdict

class SimCLR(nn.Module):
    def __init__(self, hidden_dim=128):
        super().__init__()

        # Base ResNet18 backbone (pretrained=False, because we load custom weights later, from the SimCLR checkpoint file)
        self.convnet = torchvision.models.resnet18(pretrained=False)
        
        # This is the projection head, only needed during training. For downstream tasks it is disposed of
        # and the final linear layer output is used (Chen et al., 2020) 
        self.convnet.fc = nn.Sequential(
            nn.Linear(self.convnet.fc.in_features, 4 * hidden_dim),
            nn.ReLU(inplace=True),
            nn.Linear(4 * hidden_dim, hidden_dim)
        )

        self.intermediate_layers_to_capture =[]
        self.intermediate_layer_features = {}
        self.num_workers = os.cpu_count()
        self.device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

    def load_pretrained(self):
        """
        Load pretrained SimCLR weights
        """
        base_url = "https://raw.githubusercontent.com/phlippe/saved_models/main/tutorial17/"
        models_dir = "../../models"
        pretrained_simclr_filename = "SimCLR.ckpt"
        pretrained_simclr_path = os.path.join(models_dir, pretrained_simclr_filename)
        os.makedirs(models_dir, exist_ok=True)

        # Check whether the pretrained model file already exists locally. If not, try downloading it
        file_url = base_url + pretrained_simclr_filename
        if not os.path.isfile(pretrained_simclr_path):
            print(f"Downloading pretrained SimCLR model {file_url}...")
            try:
                urllib.request.urlretrieve(file_url, pretrained_simclr_path)
            except HTTPError as e:
                print("Something went wrong. Please try to download the file from the GDrive folder, or contact the author with the full output including the following error:\n", e)

        print(f"Already downloaded pretrained model: {file_url}")

        # Load pretrained model
        checkpoint = torch.load(pretrained_simclr_path, map_location=self.device)
        self.load_state_dict(checkpoint['state_dict'])
        self.to(self.device)
        self.eval()
    
    def set_intermediate_layers_to_capture(self, layers):
        """
        Register hooks to capture features from intermediate layers
        """
        # Just check the layers specified are actually in the convnet
        top_level_block_layers = [name for name, _ in self.convnet.named_children()]
        if not all(layer in top_level_block_layers for layer in layers):
            print('You have specified convnet layers that are not top-level blocks - make sure your layer names are valid')
        
        self.intermediate_layers_to_capture = layers
        intermediate_layer_features = {}

        def get_hook(layer_name):
            def hook(module, input, output):
                intermediate_layer_features[layer_name] = output.detach()
            return hook

        for layer_name in layers:
            layer = dict([*self.convnet.named_modules()])[layer_name]
            layer.register_forward_hook(get_hook(layer_name))

        self.intermediate_layer_features = intermediate_layer_features

    @torch.no_grad()
    def extract_features(self, dataset: Dataset) -> Dict[str, torch.Tensor]:
        """
        Run the pretrained SimCLR model on the image data, and capture features from final layer and intermediate layers.

        Args:
            dataset (Dataset): A PyTorch Dataset containing input images and labels. The image data should have shape (N, C, H, W)

        Returns:
            Dict[str, torch.Tensor]: A dictionary containing:
                - Intermediate layer features as tensors.
                - Final layer features under 'final_layer'.
                - Labels under 'labels'.
            Features from a given layer has shape (N, F) where N is num images, F is number of features - flattened version of (C, H, W).
        """
        self.convnet.fc = nn.Identity()  # Removing projection head g(.)
        self.eval()
        self.to(self.device)
        
        # Encode all images
        data_loader = DataLoader(dataset, batch_size=64, num_workers=self.num_workers, shuffle=False, drop_last=False)
        feats, labels, intermediate_features = [], [], {layer: [] for layer in self.intermediate_layers_to_capture}

        for batch_idx, (batch_imgs, batch_labels) in enumerate(tqdm(data_loader)):
            batch_imgs = batch_imgs.to(self.device)
            batch_feats = self.convnet(batch_imgs)
            
            feats.append(batch_feats.detach().cpu())
            labels.append(batch_labels)

            # Collect intermediate layer outputs
            for layer in self.intermediate_layers_to_capture:
                # Final linear layer outputs a 2d tensor; but intermediate layers don't, so we flatten them (ready for PCA etc.)
                # layer_output_flattened = self.intermediate_layer_features[layer].view(self.intermediate_layer_features[layer].size(0), -1) 
                # intermediate_features[layer].append(layer_output_flattened.cpu())

                # DON'T FLATTEN - IT CAUSES PROBLEMS WHEN VISUALISING FEATURES LATER
                intermediate_features[layer].append(self.intermediate_layer_features[layer].cpu())

        
        # Concatenate results for each layer
        feats = torch.cat(feats, dim=0)
        labels = torch.cat(labels, dim=0)
        intermediate_features = {layer: torch.cat(intermediate_features[layer], dim=0) for layer in self.intermediate_layers_to_capture}

        # Debugging log after concatenation
        print("✅ Feature extraction complete. Final feature shapes:")
        print(f"Final layer: {feats.shape}")
        for layer, feature in intermediate_features.items():
            print(f"{layer}: {feature.shape}")  # Check final stored shape

        return {**intermediate_features, 'final_layer': feats, 'labels': labels}

intermediate_layers = ['layer1', 'layer2', 'layer3', 'layer4']

sim_clr = SimCLR()
sim_clr.load_pretrained()
sim_clr.set_intermediate_layers_to_capture(intermediate_layers)
feats = sim_clr.extract_features(image_dataset)

for layer in ["layer1", "layer2", "layer3", "layer4"]:
    if layer in feats:
        variance = np.var(feats[layer].numpy())
        print(f"{layer} variance: {variance:.6f}")

# Our original images are grayscale, but SimCLR expects 3-channel RGB input.
# To meet this requirement, we duplicated the grayscale values across all three RGB channels.
# However, for PCA, we only need a single channel, so we extract just the first channel (Red).
flattened_images = images[:, 0, :, :].view(images.shape[0], -1) # shape: [1573, 50176] (1573 images, 224x224 pixels)

layer1_feats = feats['layer1'] # Shape: torch.Size([1573, 200704]) (n_images, n_features)
layer2_feats = feats['layer2']
layer3_feats = feats['layer3']
layer4_feats = feats['layer4']
final_layer_feats = feats['final_layer'] # Shape: torch.Size([1573, 512])

print('flattened_images shape', flattened_images.shape)
print('layer1 shape', layer1_feats.shape)
print('final layer shape', final_layer_feats.shape)

In [None]:
### Regression from layer feats to PCs of neural data
from sklearn.linear_model import Ridge, RidgeCV

def regressor(X, Y):
    alphas = np.logspace(1, 7, 20)
    ridge = RidgeCV(alphas=alphas, store_cv_values=True)
    ridge.fit(X, Y)

    Y_pred = ridge.predict(X)

    print("Best alpha:", ridge.alpha_)
    print("Ridge regression coefficients:", ridge.coef_.mean(axis=0))
    print("Ridge regression pred:", Y_pred)
    print("Ridge regression score:", ridge.score(X, Y))

    return ridge

def l2_penalty(img, lam=0.01):
    l2_penalty = lam * torch.sum(img ** 2)
    return l2_penalty

def generate_synthetic_img(layer_name, ridge, iterations=200, regularise=True):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    sim_clr = SimCLR()
    sim_clr.load_pretrained()
    sim_clr.eval().to(device)

    # Hook layer2
    intermediate_features = {}
    def hook_fn(module, input, output):
        intermediate_features[layer_name] = output

    layer = dict([*sim_clr.convnet.named_modules()])[layer_name]
    hook_handle = layer.register_forward_hook(hook_fn)

    # Convert ridge regressor weights to torch
    ridge_weights = torch.tensor(ridge.coef_, dtype=torch.float32, device=device).unsqueeze(0) # (1, D)

    synthetic_image = torch.randn(1, 1, 96, 96, device=device, requires_grad=True)
    optimizer = torch.optim.Adam([synthetic_image], lr=0.05, weight_decay=1e-6)

    for _ in range(iterations):
        optimizer.zero_grad()
        input_img = synthetic_image.repeat(1, 3, 1, 1)
        _ = sim_clr.convnet(input_img)

        feats = intermediate_features[layer_name].view(1, -1)
        score = torch.matmul(feats, ridge_weights.t()).squeeze()
        loss = -score + (l2_penalty(synthetic_image) if regularise else 0) 
        
        loss.backward()
        optimizer.step()

        synthetic_image.data.clamp_(-1, 1)

    img_np = synthetic_image.detach().cpu().squeeze().numpy()
    img_np = (img_np - img_np.min()) / (img_np.max() - img_np.min())

    hook_handle.remove()

    return img_np

In [None]:
# For layers 1 and 2, generate synthetic image for all PCs from 1-100

# every 10th PC
for pc_index in range(0, 101, 10):
    pc_neural_data = neural_data_pcs[:, pc_index]

    ridge_layer1 = regressor(layer1_feats.view(layer1_feats.size(0), -1), pc_neural_data)
    img_layer1 = generate_synthetic_img('layer1', ridge_layer1)

    ridge_layer2 = regressor(layer2_feats.view(layer2_feats.size(0), -1), pc_neural_data)
    img_layer2 = generate_synthetic_img('layer2', ridge_layer2)

    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    axs[0].imshow(img_layer1, cmap='gray')
    axs[0].axis('off')
    axs[0].set_title("Layer 1")

    axs[1].imshow(img_layer2, cmap='gray')
    axs[1].axis('off')
    axs[1].set_title("Layer 2")

    plt.suptitle(f"Synthetic Image Maximising PC{pc_index+1}", fontsize=16)
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()

In [None]:
### Layer1: Compute cosine similarity between images to reveal variance

import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt

synthetic_images = []

for pc_index in range(0, 100):
    pc_neural_data = neural_data_pcs[:, pc_index]
    ridge = regressor(layer1_feats.view(layer1_feats.size(0), -1), pc_neural_data)
    img_np = generate_synthetic_img('layer1', ridge)
    synthetic_images.append(img_np.flatten()) # Flatten each image to 1D

# Stack them into a matrix (100, 96*96)
synthetic_images = np.stack(synthetic_images)

# Compute cosine similarity matrix
cosine_sim_matrix = cosine_similarity(synthetic_images)

# Calculate average cosine similarity (excluding diagonal)
num_images = len(synthetic_images)
avg_cosine_similarity = (np.sum(cosine_sim_matrix) - np.trace(cosine_sim_matrix)) / (num_images * (num_images - 1))

print(f"Average Cosine Similarity Layer 1 (L2 Regularized): {avg_cosine_similarity:.4f}")

plt.imshow(cosine_sim_matrix, cmap='viridis')
plt.title(f"Cosine Similarity Matrix Layer 1 (L2 Regularized, Avg: {avg_cosine_similarity:.4f})")
plt.colorbar()
plt.show()

In [None]:
### Layer2: Compute cosine similarity between images to reveal variance

import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt

synthetic_images = []

for pc_index in range(0, 100):
    pc_neural_data = neural_data_pcs[:, pc_index]
    ridge = regressor(layer2_feats.view(layer2_feats.size(0), -1), pc_neural_data)
    img_np = generate_synthetic_img('layer2', ridge)
    synthetic_images.append(img_np.flatten()) # Flatten each image to 1D

# Stack them into a matrix (100, 96*96)
synthetic_images = np.stack(synthetic_images)

# Compute cosine similarity matrix
cosine_sim_matrix = cosine_similarity(synthetic_images)

# Calculate average cosine similarity (excluding diagonal)
num_images = len(synthetic_images)
avg_cosine_similarity = (np.sum(cosine_sim_matrix) - np.trace(cosine_sim_matrix)) / (num_images * (num_images - 1))

print(f"Average Cosine Similarity Layer 2 (L2 Regularized): {avg_cosine_similarity:.4f}")

plt.imshow(cosine_sim_matrix, cmap='viridis')
plt.title(f"Cosine Similarity Matrix Layer 2 (L2 Regularized, Avg: {avg_cosine_similarity:.4f})")
plt.colorbar()
plt.show()