# FEV vs. Alpha Analysis

For each SimCLR layer, this notebook plots **mean FEV** vs. **alpha** (where alpha is the slope of the log–log PCA variance spectrum).

- **Mean FEV** is taken from the regression: SimCLR features (reduced to N PCs) → Neural data (original dimensionality)

- For each layer, we select the **best FEV** (i.e., highest across PC counts)

In [1]:
from cortexlib.power_law import PowerLawAlphaEstimator
from cortexlib.mouse import CortexlabMouse
from cortexlib.utils import file as futils
from cortexlib.utils.logging import Logger
from cortexlib.utils.random import set_global_seed, GLOBAL_SEED
from cortexlib.utils.plotting import simclr_colours
from sklearn.decomposition import PCA
from adjustText import adjust_text
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch

logger = Logger()
set_global_seed()

In [2]:
MOUSE_ID = futils.get_mouse_id()
logger.info(f"This notebook is running for mouse {MOUSE_ID}")

[1;37m17:47:04 | INFO     | ℹ️ This notebook is running for mouse m02_d3[0m


In [3]:
neural_plae = PowerLawAlphaEstimator()
mouse = CortexlabMouse(mouse_id=MOUSE_ID)

logger.progress(f"Computing null distributions for all neurons in mouse {mouse.id}")
null_srv_all_neurons = mouse.compute_null_all_neurons(n_shuffles=100)
logger.success(f"Null distributions computed")

real_srv_all_neurons = mouse.compute_real_srv_all_neurons()
reliable_neuron_indices = mouse.get_reliable_neuron_indices(
            null_srv_all_neurons, real_srv_all_neurons, percentile_threshold=99)
neural_responses_mean, neural_responses, _ = mouse.get_responses_for_reliable_neurons(reliable_neuron_indices, real_srv_all_neurons, num_neurons=500)

logger.info(f"Neural responses shape: {neural_responses.shape}")

neural_alpha = neural_plae.compute_alpha(neural_responses_mean)

[1;37m17:47:05 | INFO     | ⏳ Computing null distributions for all neurons in mouse m02_d3...[0m
[1;32m17:47:35 | SUCCESS  | ✅ Null distributions computed![0m
[1;37m17:47:35 | INFO     | ℹ️ Neural responses shape: (1585, 2, 500)[0m


xmin progress: 99%

In [4]:
neural_alpha['mouse_id'] = MOUSE_ID
futils.write_json_file(neural_alpha, './neural_dimensionality.json')

In [5]:
fev_results = futils.read_json_file_as_dataframe('./prediction.json')
best_fev_per_layer = fev_results.loc[fev_results.groupby("layer")["mean_fev"].idxmax()]

best_fev_per_layer

Unnamed: 0,layer,n_pcs,test_r2,mean_fev
36,fc,,0.021191,0.169101
3,layer1,50.0,0.03069,0.162739
9,layer2,,0.03962,0.211574
18,layer3,,0.04412,0.193365
27,layer4,,0.027735,0.144667


In [6]:
simclr_data = futils.load_model_features(futils.Model.SIMCLR, MOUSE_ID)
simclr_feats = simclr_data['features']

[1;37m17:47:35 | INFO     | ℹ️ Loading model features from /Users/callummessiter/workspace/msc-neuro/research-project/analysis/mouse_m02_d3/_model_features/simclr_features_mouse_m02_d3.pt[0m


In [7]:
import numpy as np
from sklearn.decomposition import PCA
import torch
import random

raw_features = simclr_feats
layer_names = list(raw_features.keys())
subsample_size = 500
thresholds = [0.80, 0.90]
n_repeats = 20  # number of subsamples per layer
random.seed(42)

def run_pca(data, n_components=subsample_size):
    pca = PCA(n_components=n_components, random_state=GLOBAL_SEED)
    pca.fit(data)
    return np.cumsum(pca.explained_variance_ratio_), pca.explained_variance_ratio_

def num_components_for_variance(cumulative_ev, threshold):
    return np.argmax(cumulative_ev >= threshold) + 1

def subsample_features(feats, indices):
    if feats.ndim > 2:
        feats = feats.reshape(feats.shape[0], -1)
    return feats[:, indices] if feats.shape[1] >= len(indices) else feats

json_data = []

for layer in layer_names:
    layer_data = {
        "mouse_id": MOUSE_ID,
        "model": futils.get_model(),
        "layer": layer,
        "original_dimensions": raw_features[layer].shape[0],
        "subsampled_dimensions": subsample_size,
        "subsamples": []
    }

    for _ in range(n_repeats):
        rand_indices = torch.randperm(subsample_size)[:subsample_size]
        feats = subsample_features(raw_features[layer], rand_indices)
        cum_ev, _ = run_pca(feats)

        layer_data["subsamples"].append({
            "num_principal_components_80pc_ev": int(num_components_for_variance(cum_ev, 0.80)),
            "num_principal_components_90pc_ev": int(num_components_for_variance(cum_ev, 0.90)),
            "cumulative_ev": cum_ev.tolist()
        })

    json_data.append(layer_data)

futils.write_json_file(json_data, './explained_variance.json')

In [8]:
neural_ev = {
    "mouse_id": MOUSE_ID,
    "model": "neural",
    "layer": "neural",
    "original_dimensions": neural_responses_mean.shape[1],
    "subsampled_dimensions": subsample_size,
    "subsamples": []
}

rand_indices = torch.randperm(neural_responses_mean.shape[0])[:subsample_size]
feats = neural_responses_mean[rand_indices.numpy(), :]
cum_ev, _ = run_pca(feats)
neural_ev["subsamples"].append({
    "num_principal_components_80pc_ev": int(num_components_for_variance(cum_ev, 0.80)),
    "num_principal_components_90pc_ev": int(num_components_for_variance(cum_ev, 0.90)),
    "cumulative_ev": cum_ev.tolist()
})

futils.write_json_file(neural_ev, './neural_explained_variance.json')

In [None]:
power_law_alpha_estimator = PowerLawAlphaEstimator()

alpha_results = []

logger.progress(f"Computing α for SimCLR features across layers")
for layer, feats in tqdm(simclr_feats.items(), desc="α for all SimCLR layers"):
    logger.progress(f"SimCLR {layer}")
    
    feats = feats if feats.dim() <= 2 else feats.view(feats.size(0), -1)
    alpha = power_law_alpha_estimator.compute_alpha(feats)

    alpha_results.append({
        'layer': layer,
        'alpha': alpha['alpha'],
        'alpha_no_pc1': alpha['alpha_no_pc1'],
    })

logger.success(f"Alphas computed")

[1;37m17:47:45 | INFO     | ⏳ Computing α for SimCLR features across layers...[0m


α for all SimCLR layers:   0%|          | 0/5 [00:00<?, ?it/s]

[1;37m17:47:45 | INFO     | ⏳ SimCLR layer1...[0m


In [None]:
futils.write_json_file(alpha_results, './dimensionality.json')

In [None]:
alpha_results_df = pd.DataFrame(alpha_results)
alpha_results_df

In [None]:
merged_results = pd.merge(best_fev_per_layer, alpha_results_df, on='layer')
merged_results

In [None]:
colours = merged_results['layer'].map(simclr_colours)

plt.figure(figsize=(10, 6))
plt.scatter(merged_results['alpha'], merged_results['mean_fev'], c=colours)

texts = []
for _, row in merged_results.iterrows():
    texts.append(
        plt.text(row['alpha'], row['mean_fev'], row['layer'],
                 color=simclr_colours[row['layer']], fontsize=10)
    )

adjust_text(texts, arrowprops=dict(arrowstyle='-', color='gray'))

plt.xlabel('Alpha (Power Law Exponent)')
plt.ylabel('Mean FEV')
plt.title('SimCLR Layers: Alpha vs. Predictive Power (Neural Data)')
plt.grid(True)
plt.tight_layout()
plt.show()