In [97]:
import numpy as np
import os
import re
from scipy.signal import resample

In [81]:
os.chdir("C:\\Users/Bridget Leonard/Desktop/results-Brain")

## Part 5: fMRI Prediction
- project features into opposite modal space with feature alignment
- predict fMRI activity using features

### 1 Feature alignment matrices

#### 1.1 Load the matrices
These matrices were calculated in part 3. They represent the linear relationship between image and caption pairs. The image-->caption matrices were estimated by predicting each language feature from visual features, the caption--> image matrices were estimated by predicting each visual feature from the language features. Thus we have two matrices with size [768, 768] where each column represents a language or visual feature's linear relationship (beta coefficient) with each visual or language feature, respectively.

In [82]:
caption_to_image_matrices = np.load("results/feature_alignment/caption_to_image_matrices.npy")
image_to_caption_matrices = np.load("results/feature_alignment/image_to_caption_matrices.npy")

In [83]:
caption_to_image_matrices.shape

(768, 768)

#### 1.2 Project data into the opposite modal space

In [85]:
# Load feature vectors
# movie data
test = np.load("results/feature_vectors/movie/test_data.npy")
train00 = np.load("results/feature_vectors/movie/train_00_data.npy")
train01 = np.load("results/feature_vectors/movie/train_01_data.npy")
train02 = np.load("results/feature_vectors/movie/train_02_data.npy")
train03 = np.load("results/feature_vectors/movie/train_03_data.npy")
train04 = np.load("results/feature_vectors/movie/train_04_data.npy")
train05 = np.load("results/feature_vectors/movie/train_05_data.npy")
train06 = np.load("results/feature_vectors/movie/train_06_data.npy")
train07 = np.load("results/feature_vectors/movie/train_07_data.npy")
train08 = np.load("results/feature_vectors/movie/train_08_data.npy")
train09 = np.load("results/feature_vectors/movie/train_09_data.npy")
train10 = np.load("results/feature_vectors/movie/train_10_data.npy")
train11 = np.load("results/feature_vectors/movie/train_11_data.npy")

# story data
alternateithicatom = np.load("results/feature_vectors/story/alternateithicatom_data.npy")
avatar = np.load("results/feature_vectors/story/avatar_data.npy")
howtodraw = np.load("results/feature_vectors/story/howtodraw_data.npy")
legacy = np.load("results/feature_vectors/story/legacy_data.npy")
life = np.load("results/feature_vectors/story/life_data.npy")
myfirstdaywiththeyankees = np.load("results/feature_vectors/story/myfirstdaywiththeyankees_data.npy")
naked = np.load("results/feature_vectors/story/naked_data.npy")
odetostepfather = np.load("results/feature_vectors/story/odetostepfather_data.npy")
souls = np.load("results/feature_vectors/story/souls_data.npy")
undertheinfluence = np.load("results/feature_vectors/story/undertheinfluence_data.npy")

Visual to text: Movie data

In [88]:
test_transformed = np.dot(test, image_to_caption_matrices.T)
train00_transformed = np.dot(train00, image_to_caption_matrices.T)
train01_transformed = np.dot(train01, image_to_caption_matrices.T)
train02_transformed = np.dot(train02, image_to_caption_matrices.T)
train03_transformed = np.dot(train03, image_to_caption_matrices.T)
train04_transformed = np.dot(train04, image_to_caption_matrices.T)
train05_transformed = np.dot(train05, image_to_caption_matrices.T)
train06_transformed = np.dot(train06, image_to_caption_matrices.T)
train07_transformed = np.dot(train07, image_to_caption_matrices.T)
train08_transformed = np.dot(train08, image_to_caption_matrices.T)
train09_transformed = np.dot(train09, image_to_caption_matrices.T)
train10_transformed = np.dot(train10, image_to_caption_matrices.T)
train11_transformed = np.dot(train11, image_to_caption_matrices.T)

Text to visual: Story data

In [86]:
alternateithicatom_transformed = np.dot(alternateithicatom, caption_to_image_matrices.T)
avatar_transformed = np.dot(avatar, caption_to_image_matrices.T)
howtodraw_transformed = np.dot(howtodraw, caption_to_image_matrices.T)
legacy_transformed = np.dot(legacy, caption_to_image_matrices.T)
life_transformed = np.dot(life, caption_to_image_matrices.T)
myfirstdaywiththeyankees_transformed = np.dot(myfirstdaywiththeyankees, caption_to_image_matrices.T)
naked_transformed = np.dot(naked, caption_to_image_matrices.T)
odetostepfather_transformed = np.dot(odetostepfather, caption_to_image_matrices.T)
souls_transformed = np.dot(souls, caption_to_image_matrices.T)
undertheinfluence_transformed = np.dot(undertheinfluence, caption_to_image_matrices.T)

### 2 Load voxelwise encoding models
These matrices were calculated in part 4.

In [6]:
os.getcwd()

'C:\\Users\\Bridget Leonard\\Desktop\\BridgeTower-Brain'

In [65]:
# Function to extract the starting voxel number from the file name
def extract_start_number(filename):
    match = re.search(r'(\d+)-\d+\.npy$', filename)
    if match:
        return int(match.group(1))
    # Handle files with underscores
    match = re.search(r'(\d+)_\d+\.npy$', filename)
    if match:
        return int(match.group(1))
    return 0

In [66]:
# Path to the directory containing the .npy files
directory_path = 'results/encoding_model/movie/coefficients'

# List all .npy files and sort them based on the starting voxel number
npy_files = [f for f in os.listdir(directory_path) if f.endswith('.npy')]
npy_files.sort(key=extract_start_number)

In [67]:
npy_files[:10]

['coefficients_0_1599.npy',
 'coefficients_1600_3199.npy',
 'coefficients_3200_4799.npy',
 'coefficients_4800_6399.npy',
 'coefficients_6400_7999.npy',
 'coefficients_8000_9599.npy',
 'coefficients_9600_11199.npy',
 'coefficients_11200_12799.npy',
 'coefficients_12800_14399.npy',
 'coefficients_14400_15999.npy']

In [68]:
# Initialize an empty list to hold the data arrays
data_arrays = []

for file_name in npy_files:
    # Load the current batch and transpose it
    batch_data = np.load(os.path.join(directory_path, file_name)).T  # Transpose here
    data_arrays.append(batch_data)

# Combine all the transposed batches into one final matrix
# Note: np.concatenate operates along the first axis by default, so this aligns with our goal
final_matrix = np.concatenate(data_arrays, axis=1)

In [69]:
npy_files

['coefficients_0_1599.npy',
 'coefficients_1600_3199.npy',
 'coefficients_3200_4799.npy',
 'coefficients_4800_6399.npy',
 'coefficients_6400_7999.npy',
 'coefficients_8000_9599.npy',
 'coefficients_9600_11199.npy',
 'coefficients_11200_12799.npy',
 'coefficients_12800_14399.npy',
 'coefficients_14400_15999.npy',
 'coefficients_16000_17599.npy',
 'coefficients_17600_19199.npy',
 'coefficients_19200_20799.npy',
 'coefficients_20800_22399.npy',
 'coefficients_22400_23999.npy',
 'coefficients_24000_25599.npy',
 'coefficients_25600_27199.npy',
 'coefficients_27200_28799.npy',
 'coefficients_28800_30399.npy',
 'coefficients_30400_31999.npy',
 'coefficients_32000_33599.npy',
 'coefficients_33600_35199.npy',
 'coefficients_35200_36799.npy',
 'coefficients_36800_38399.npy',
 'coefficients_38400_39999.npy',
 'coefficients_40000_41599.npy',
 'coefficients_41600_43199.npy',
 'coefficients_43200_44799.npy',
 'coefficients_44800_46399.npy',
 'coefficients_46400_47999.npy',
 'coefficients_48000_49599

In [70]:
def find_file_for_voxel(voxel_number):
    for filename in npy_files:
        # Extract the start and end numbers from the filename
        match = re.search(r'(\d+)_(\d+)\.npy', filename)
        if match:
            start_num, end_num = int(match.group(1)), int(match.group(2))
            # Check if the voxel number falls within the range
            if start_num <= voxel_number <= end_num:
                # Calculate the index of the voxel within the file
                voxel_index_within_file = voxel_number - start_num
                return filename, voxel_index_within_file
    return None, None

In [71]:
def test_final_matrix(voxel):
    file_name, voxel_index = find_file_for_voxel(voxel)
    file = np.load(os.path.join(directory_path, file_name))
    assert np.array_equal(file[voxel_index], final_matrix[:, voxel]), "Voxel coefficients do not match"

In [72]:
test_final_matrix(0)
test_final_matrix(47800)

In [73]:
final_matrix.shape

(3072, 81600)

In [75]:
# Find columns that are not all zeroes
not_all_zeroes = np.any(final_matrix != 0, axis=0)

# Filter out columns that are all zeroes
final_matrix = final_matrix[:, not_all_zeroes]

print(final_matrix.shape)

(3072, 81111)


### Using the vision encoding model to predict fMRI responses to stories
We'll be using the tranformed story data above to predict the fMRI data found in `data/fmri_data/storydata/S1`

Load fMRI data

In [76]:
s1_alternateithicatom = np.load("data/fmri_data/storydata/S1/alternateithicatom.npy")
s1_avatar = np.load("data/fmri_data/storydata/S1/avatar.npy")
s1_howtodraw = np.load("data/fmri_data/storydata/S1/howtodraw.npy")
s1_legacy = np.load("data/fmri_data/storydata/S1/legacy.npy")
s1_life = np.load("data/fmri_data/storydata/S1/life.npy")
s1_myfirstdaywiththeyankees = np.load("data/fmri_data/storydata/S1/myfirstdaywiththeyankees.npy")
s1_naked = np.load("data/fmri_data/storydata/S1/naked.npy")
s1_odetostepfather = np.load("data/fmri_data/storydata/S1/odetostepfather.npy")
s1_souls = np.load("data/fmri_data/storydata/S1/souls.npy")

In [77]:
s1_alternateithicatom.shape

(363, 31, 100, 100)

In [78]:
mask = ~np.isnan(s1_alternateithicatom)

# Apply the mask and then flatten
# This will keep only the non-NaN values
s1_alternateithicatom_reshaped = s1_alternateithicatom[mask].reshape(s1_alternateithicatom.shape[0], -1)

In [79]:
s1_alternateithicatom_reshaped.shape

(363, 81111)

Resample to fMRI acquisition

In [87]:
alternateithicatom_transformed.shape

(2681, 768)

In [98]:
def resample_to_acq(feature_data, fmri_data):
    dimensions = fmri_data.shape[0]
    data_transposed = feature_data.T
    data_resampled = np.empty((data_transposed.shape[0], dimensions))

    for i in range(data_transposed.shape[0]):
        data_resampled[i, :] = resample(data_transposed[i, :], 363, window=('kaiser', 14))
    
    print("Final shape:", data_resampled.T.shape)
    return data_resampled.T

In [100]:
alternateithicatom_resampled = resample_to_acq(alternateithicatom_transformed, s1_alternateithicatom_reshaped)

Final shape: (363, 768)


Delay the features

In [103]:
def delay_features(features):
    delays = [2, 4, 6, 8]  # Delays in seconds
    shifted_features_list = []

    for delay in delays:
        shift_amount = delay // 2  # Assuming TR is 2 seconds
        shifted = np.roll(features, shift_amount, axis=0)
        # Optionally, handle edge effects here (e.g., zero-padding or trimming)
        shifted_features_list.append(shifted)

    # Stack the shifted arrays to create a 3D array
    shifted_features_3d = np.stack(shifted_features_list, axis=-1)
    
    # Reshape the feature data for regression
    n_time_points, n_features, n_delays = shifted_features_3d.shape
    features_reshaped = shifted_features_3d.reshape(n_time_points, n_features * n_delays)

    return features_reshaped

In [104]:
ai_resamp_delay = delay_features(alternateithicatom_resampled)
ai_resamp_delay.shape

(363, 3072)