# FEV vs. Semanticity Analysis

For each VGG19 layer, this notebook plots **mean FEV** vs. **silhouette score**.

- **Mean FEV** is taken from the regression: VGG19 features (reduced to N PCs) → Neural data (original dimensionality)

- For each layer, we select the **best FEV** (i.e., highest across PC counts)
  
- Silhouette score is calculated from TSNE-transformed, PCA-reduced VGG19 features. The number of PCs used is normalised across layers

- VGG19 features are extracted from 1,000 images from the STL10 dataset (10 classes, 100 images per class)

In [None]:
from cortexlib.stl10 import STL10FewShotDataset
from cortexlib.vgg19 import PreTrainedVGG19Model, PoolingMode
from cortexlib.tsne import TSNEVisualizer
from cortexlib.utils import file as futils
from cortexlib.utils.plotting import vgg19_colours
from cortexlib.utils.logging import Logger
import matplotlib.pyplot as plt
from adjustText import adjust_text
import pandas as pd

logger = Logger()

In [None]:
MOUSE_ID = futils.get_mouse_id()
logger.info(f"This notebook is running for mouse {MOUSE_ID}")

In [None]:
results = futils.read_json_file_as_dataframe('./prediction.json')
best_fev_per_layer = results.loc[results.groupby("layer")["mean_fev"].idxmax()]

best_fev_per_layer

In [None]:
vgg19 = PreTrainedVGG19Model(pooling_mode=PoolingMode.AVGPOOL)

settings = vgg19.get_image_settings()

logger.progress("Loading STL10 images")
stl10 = STL10FewShotDataset(
    n_per_class=100, 
    size=settings['size'],
    channels=settings['channels'],
    normalise_mean=settings['mean'], 
    normalise_std=settings['std'],
)
logger.success("Loaded STL10 images")

In [None]:
logger.progress("VGG19: extracting features from STL10 images")
vgg19_feats, labels = vgg19.extract_features_with_pooling(stl10.dataset)
logger.success("VGG19 features extracted")

for layer, feats in vgg19_feats.items():
    logger.info(f"{layer} feats shape: {tuple(feats.shape)}")

In [None]:
# Map each label to its corresponding class name
class_labels = [stl10.class_names[label] for label in labels]

tsne_visualiser = TSNEVisualizer()

logger.progress("Computing TSNE features from VGG19 features")
tsne_feats = tsne_visualiser.compute_tsne(vgg19_feats, n_pca_components=50)
logger.success("TSNE features computed")

silhouette_scores = tsne_visualiser.compute_silhouette_scores(tsne_feats, class_labels)

In [None]:
# json.dump() cannot serialize NumPy float32 types; convert to native Python floats first
silhouette_scores = {k: float(v) for k, v in silhouette_scores.items()}
futils.write_json_file(silhouette_scores, './semanticity.json')

In [None]:
silhouette_df = pd.DataFrame(list(silhouette_scores.items()), columns=['layer', 'silhouette_score'])
silhouette_df

In [None]:
ordered_class_names = ['car', 'truck', 'ship', 'airplane', 'bird', 'cat',
                       'dog', 'deer', 'horse', 'monkey']

tsne_visualiser.plot_clusters_all_layers(tsne_feats, class_labels, custom_legend_order=ordered_class_names)

In [None]:
merged_results = pd.merge(best_fev_per_layer, silhouette_df, on='layer')
merged_results

In [None]:
colours = merged_results['layer'].map(vgg19_colours)

plt.figure(figsize=(10, 6))
plt.scatter(merged_results['silhouette_score'], merged_results['mean_fev'], c=colours)

texts = []
for _, row in merged_results.iterrows():
    texts.append(
        plt.text(row['silhouette_score'], row['mean_fev'], row['layer'],
                 color=vgg19_colours[row['layer']], fontsize=10)
        )            

adjust_text(texts, arrowprops=dict(arrowstyle='-', color='gray'))

plt.xlabel('Silhouette Score')
plt.ylabel('Mean FEV')
plt.title('VGG19 Layers: Semanticity vs. Predictive Power (Neural Data)')
plt.grid(True)
plt.tight_layout()
plt.show()