In [2]:
import pandas as pd
import os
from embeds import fix_corrupt, multi_inner_align
import numpy as np
from hypertools.tools import align

In [4]:
def read_fmri(path: str) -> pd.DataFrame:
    dfs = []
    
    for f_name in os.listdir(path):
        pulled = {}
        with open(path + f_name, 'r') as f:
            for line in f:
                word, *vec = line.split()
                pulled[word] = vec
        pulled = fix_corrupt(pulled)
        dfs.append(pd.DataFrame(pulled).T.astype(float))
    
    dfs = list(multi_inner_align(dfs))
    
    return dfs

fmris_text = read_fmri('../../data/fmri_text/')
fmris_speech = read_fmri('../../data/fmri_speech/')
len(fmris_text), len(fmris_speech)

(8, 27)

# Hyperalignment approach

In [5]:
def hyper_align(dfs: list) -> pd.DataFrame:
    df = np.mean(align(dfs, align='hyper'), axis=0)
    return pd.DataFrame(df, index=dfs[0].index)
    
    
fMRI_text_hyper_align = hyper_align(fmris_text)
fMRI_text_hyper_align

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
a,0.464084,0.387565,0.431895,0.457597,0.086167,0.264227,0.345685,0.373531,0.423295,0.400895,...,0.353663,0.462671,0.447322,0.478354,0.392698,0.402035,0.470536,0.463509,0.092744,0.476082
able,0.459373,0.393196,0.439450,0.454664,0.091402,0.245934,0.346807,0.350977,0.412431,0.399686,...,0.362708,0.458254,0.446402,0.478978,0.389035,0.400096,0.470638,0.465834,0.087155,0.480569
about,0.464301,0.384210,0.429191,0.453147,0.081254,0.250097,0.345036,0.368856,0.421964,0.400547,...,0.354067,0.457110,0.444739,0.479023,0.389685,0.399005,0.472751,0.459774,0.095464,0.475121
about-face,0.450426,0.375557,0.430167,0.455650,0.091144,0.287397,0.353477,0.347728,0.427618,0.386271,...,0.351591,0.455494,0.438416,0.483457,0.388926,0.403327,0.466395,0.473255,0.096007,0.474517
absolutely,0.450832,0.377347,0.433368,0.467116,0.073468,0.294677,0.353965,0.341486,0.418872,0.386467,...,0.356674,0.458573,0.450858,0.468963,0.386884,0.404914,0.462753,0.469128,0.068851,0.474151
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
your,0.466140,0.387067,0.432189,0.459336,0.086656,0.266432,0.345749,0.368474,0.421565,0.402813,...,0.355378,0.462313,0.450236,0.479089,0.394671,0.400960,0.471252,0.467253,0.088219,0.477468
yours,0.457045,0.395528,0.437630,0.450184,0.093678,0.257322,0.349754,0.352258,0.416850,0.413722,...,0.359377,0.465482,0.440950,0.477618,0.396491,0.405599,0.467720,0.465823,0.083639,0.475564
yourself,0.450986,0.386400,0.424025,0.437541,0.066503,0.215904,0.337662,0.359548,0.418897,0.384093,...,0.344363,0.445212,0.437134,0.463745,0.369252,0.395645,0.469369,0.451563,0.115523,0.463821
yourselves,0.462372,0.396114,0.434834,0.453882,0.095540,0.265798,0.346158,0.386940,0.427108,0.403373,...,0.361900,0.481757,0.448758,0.480284,0.394603,0.400879,0.479807,0.449720,0.093439,0.481386


In [6]:
fMRI_speech_hyper_align = hyper_align(fmris_speech)
fMRI_speech_hyper_align

Unnamed: 0,0,1,2,3,4,5
a,0.299275,0.299497,0.333906,0.518374,0.284720,0.319470
about,0.299104,0.299159,0.333923,0.518484,0.284723,0.319366
across,0.298245,0.297607,0.335321,0.518655,0.283519,0.319439
actually,0.298728,0.297458,0.335913,0.518517,0.283730,0.319609
advice,0.300962,0.301003,0.333112,0.518127,0.283872,0.320064
...,...,...,...,...,...,...
written,0.299410,0.298803,0.334143,0.519568,0.284951,0.318465
yes,0.299132,0.298223,0.334086,0.518875,0.285071,0.318717
you,0.299528,0.299563,0.333599,0.518597,0.285119,0.319220
your,0.300223,0.300699,0.333411,0.518352,0.285121,0.320015


# Saving

In [7]:
# Subsetting to only the words in psychNorms norms
to_pull = set(
    pd.read_csv('../../data/psychNorms/psychNorms.zip', index_col=0, low_memory=False, compression='zip').index
)
fMRI_text_hyper_align = fMRI_text_hyper_align.loc[fMRI_text_hyper_align.index.isin(to_pull)].astype(float)
fMRI_speech_hyper_align = fMRI_speech_hyper_align.loc[fMRI_speech_hyper_align.index.isin(to_pull)].astype(float)

# Saving 
fMRI_text_hyper_align.to_csv('../../data/embeds/fMRI_text_hyper_align.csv')
fMRI_speech_hyper_align.to_csv('../../data/embeds/fMRI_speech_hyper_align.csv')