In [1]:
import os
import h5py
import cortex
import copy
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.stats.multitest import multipletests

In [None]:
P_VALUE_THRESHOLD = 0.05

In [2]:
surfaces = dict(
# F = 'fMRI_story_F',
# G = 'fMRI_story_G',
# H = 'fMRI_story_H',
I = 'fMRI_story_I',
# J = 'fMRI_story_J',
# K = 'fMRI_story_K',
# L = 'fMRI_story_L',
# M = 'fMRI_story_M',
# N = 'fMRI_story_N'
)

transforms = dict(
# F = 'F_ars',
# G = 'G_ars',
# H = 'H_ars',
I = 'I_ars',
# J = 'J_ars',
# K = 'K_ars',
# L = 'L_ars',
# M = 'M_ars',
# N = 'N_ars'
)

new_transforms = dict(
# F = 'F_ars_auto2',
# G = 'G_ars_auto2',
# H = 'H_ars_auto2',
I = 'I_ars_auto2',
# J = 'J_ars_auto2',
# K = 'K_ars_auto2',
# L = 'L_ars_auto2',
# M = 'M_ars_auto2',
# N = 'N_ars_auto2'
)

sub = 'I'

In [3]:
mask = cortex.db.get_mask(surfaces[sub], new_transforms[sub], 'thin')
print('num voxels in new_transform for I: {}'.format(np.sum(mask)))

num voxels in new_transform for I: 25263


## Setup results path

In [4]:
# base_path = "/Users/mosorio/Documents/CAJAL-NeuroAI/Project/cajal_llm_project/results/"
base_path = "/Users/camilakolling/work/git/cajal_llm_project/results/"

experiment_name = "shuffled_words/percentage0.25"
results_path = os.path.join(base_path, experiment_name)
# experiment_name = "shuffled_words/percentage0.5"
# experiment_name = "shuffled_words/percentage1.0"

# experiment_name = "shuffled_sentences/percentage0.25"
# experiment_name = "shuffled_sentences/percentage0.5"
# experiment_name = "shuffled_sentences/percentage1.0"

experiment_baseline_name = "original_code"
results_baseline_path = os.path.join(base_path, experiment_baseline_name)

## SHUFFLED

In [5]:
# Open in read mode
with h5py.File(os.path.join(results_path, "results_encoding_model.h5"), 'r') as f:
    # Read metadata
    model_name = f.attrs['model_name']
    print("Model name:", model_name)

    # List all groups
    print("Groups available:", list(f.keys()))

    # Choose your group
    representation_name = list(f.keys())[0]  # or set explicitly
    group = f[representation_name]

    # Read datasets
    predictions = group['predictions'][:]
    ground_truth = group['ground_truth'][:]
    correlations = group['correlations'][:]
    p_values = group['p_values'][:]
    coefficients = group['coefficients'][:]
    alphas = group['alphas'][:]

    print("Predictions shape:", predictions.shape)

Model name: Llama-3.2-1B-Full-Chapter
Groups available: ['layer -7']
Predictions shape: (300, 25263)


In [None]:
_, p_values, _, _ = multipletests(p_values, alpha=P_VALUE_THRESHOLD, method="fdr_bh")  # 'holm' for Holm-Bonferroni

In [7]:
correlations.mean()

np.float32(0.05976337)

In [8]:
np.sum(correlations > 0.2)

np.int64(2003)

In [None]:
# significant_voxels = np.where(p_values < P_VALUE_THRESHOLD, voxel_values_to_plot, 0.)
significant_voxels = copy.deepcopy(correlations)
significant_voxels[p_values > P_VALUE_THRESHOLD] = 0
significant_voxels.shape

(25263,)

## BASELINE

In [10]:
# Open in read mode
with h5py.File(os.path.join(results_baseline_path, "results_encoding_model.h5"), 'r') as f:
    # Read metadata
    model_name_baseline = f.attrs['model_name']
    print("Model name:", model_name_baseline)

    # List all groups
    print("Groups available:", list(f.keys()))

    # Choose your group
    representation_name_baseline = list(f.keys())[0]  # or set explicitly
    group = f[representation_name_baseline]

    # Read datasets
    predictions_baseline = group['predictions'][:]
    ground_truth_baseline = group['ground_truth'][:]
    correlations_baseline = group['correlations'][:]
    p_values_baseline = group['p_values'][:]
    coefficients_baseline = group['coefficients'][:]
    alphas_baseline = group['alphas'][:]

    print("Predictions shape:", predictions_baseline.shape)

Model name: Llama-3.2-1B-Full-Chapter
Groups available: ['layer -7']
Predictions shape: (300, 25263)


In [None]:
_, p_values_baseline, _, _ = multipletests(p_values_baseline, alpha=P_VALUE_THRESHOLD, method="fdr_bh")  # 'holm' for Holm-Bonferroni

In [12]:
correlations_baseline.mean(),

(np.float32(0.12602845),)

In [13]:
np.sum(correlations_baseline > 0.2)

np.int64(6243)

In [None]:
# significant_voxels = np.where(p_values < P_VALUE_THRESHOLD, voxel_values_to_plot, 0.)
significant_voxels_baseline = copy.deepcopy(correlations_baseline)
significant_voxels_baseline[p_values_baseline > P_VALUE_THRESHOLD] = 0
significant_voxels_baseline.shape

(25263,)

## Plot significant values

In [None]:
label_volume = np.zeros(significant_voxels.shape, dtype=np.int32)

only_current   = (p_values >= 0) & (p_values <= P_VALUE_THRESHOLD) & (p_values_baseline >= P_VALUE_THRESHOLD)
only_baseline  = (p_values_baseline >= 0) & (p_values_baseline <= P_VALUE_THRESHOLD) & (p_values >= P_VALUE_THRESHOLD)
intersection   = (p_values >= 0) & (p_values <= P_VALUE_THRESHOLD) & (p_values_baseline >= 0) & (p_values_baseline <= P_VALUE_THRESHOLD)

label_volume = label_volume.astype(float)
label_volume[label_volume == 0] = np.nan

label_volume[only_baseline]  = 1
label_volume[intersection]   = 2
label_volume[only_current]   = 3

# Create cortex.Volume
vol = cortex.Volume(
    data=label_volume,
    subject=surfaces[sub],                     
    xfmname=new_transforms[sub],              
    vmin=1,
    vmax=3,
    cmap='viridis'
)

# Show in browser
cortex.webshow(vol)

Started server on port 44627
Stopping server


<JS: window.viewer>