# Set up DFs

Set up Model and Beta DFs

In [1]:
import os
os.chdir('..')

In [2]:
import json

import numpy as np
from tqdm import tqdm
import pandas as pd
from sklearn.linear_model import Ridge
import statsmodels.api as sm
import matplotlib.pyplot as plt

from joblib import Parallel, delayed
import multiprocessing

MAX_CPU = multiprocessing.cpu_count() // 2

In [3]:
from analysis.fmri.analogy_rsa import \
downsample_rdms_df_by_factor, create_models, models_to_df, plotmodels,\
roi_rdm, run_rsa_dfs, subject_rdms, plot_results

from analysis.fmri.analogy_utils import \
    projectSettings, analysisSettings, contrastSettings, order, \
    pu, pa, pv, compile_models, rsa, save_rois, load_rois, load_betas

paths = projectSettings["filepaths"]["hoffPaths"]



Loading JSON config from config/analyses.json
Loading JSON config from config/contrasts.json
Loading label file from: labels/trialorder_rsa_absorted.csv


In [4]:
models = compile_models(write=False)


Loading label file from: labels/typicality.csv
Loading label file from: labels/word2vec_diffs.csv
Loading label file from: labels/humanratings.csv
Loading mat file...
Loading mat file...
Loading mat file...
Loading mat file...
Loading mat file...














In [10]:
models["rstpostprob79norm"].keys()

dict_keys(['big:large', 'boat:ship', 'car:auto', 'careful:cautious', 'couch:sofa', 'cute:adorable', 'house:home', 'kid:child', 'make:manufacture', 'option:choice', 'pants:trousers', 'pretty:beautiful', 'raise:elevate', 'run:sprint', 'spin:twirl', 'teach:instruct', 'book:magazine', 'chair:sofa', 'fence:hedge', 'hill:mountain', 'house:tent', 'ladder:stairs', 'paper:parchment', 'pencil:pen', 'picture:drawing', 'pillow:cushion', 'rake:fork', 'shovel:spoon', 'stairs:ladder', 'sword:knife', 'table:desk', 'wagon:trailer', 'acceleration:speed', 'darken:color', 'death:population', 'dim:light', 'discount:price', 'flood:water', 'force:pressure', 'heat:temperature', 'inflation:price', 'lower:volume', 'raise:salary', 'rise:tide', 'shorten:distance', 'soften:voice', 'speed:movement', 'terror:fear', 'accept:reject', 'big:small', 'black:white', 'bright:dark', 'dark:light', 'difficult:easy', 'dirty:clean', 'fast:slow', 'fat:thin', 'good:bad', 'hot:cold', 'old:young', 'pretty:ugly', 'rich:poor', 'tall:s

In [18]:
models["rstpostprob79norm"]["work:earn"]

array([3.24445129e-03, 1.49038667e-02, 1.13167213e-02, 1.89218944e-02,
       1.78755085e-02, 9.99849097e-03, 5.11483826e-03, 1.17571890e-02,
       4.95323165e-03, 2.05306081e-02, 1.81218284e-02, 1.45920903e-02,
       9.12636613e-03, 8.34669985e-05, 1.23028266e-03, 2.57566855e-02,
       2.05075729e-02, 1.76094546e-02, 2.32632304e-02, 1.83448602e-03,
       2.59963855e-02, 1.69601939e-02, 7.17630947e-03, 2.08097220e-02,
       2.71797000e-02, 2.43212207e-02, 2.55980383e-02, 1.91602173e-02,
       1.45656548e-02, 1.73103214e-02, 2.87609108e-04, 2.33085138e-02,
       4.79042859e-04, 3.10027201e-03, 1.46906070e-02, 2.66665557e-02,
       1.95562437e-02, 1.25949667e-02, 2.14931804e-03, 2.55144942e-02,
       1.51108598e-03, 9.33122208e-03, 3.41341327e-03, 2.07622599e-02,
       5.82529005e-03, 2.07486491e-02, 6.48136390e-04, 1.85468653e-02,
       8.05694604e-03, 4.93427015e-03, 4.16084382e-03, 1.05976933e-02,
       4.63022368e-03, 5.51161593e-03, 8.28412114e-03, 1.50961088e-02,
      

In [9]:


model_mat = []
names = []
model_mat.append(
    np.array(
        [models["rel"][v] for v in order.ABTag.values[::2]]
    )
)

names += ["mainrel_1", "mainrel_2", "mainrel_3", 
          "subrel_1", "subrel_2", "subrel_3", 
          "subrel_4", "subrel_5", "subrel_6", 
          "subrel_7", "subrel_8", "subrel_9"]

model_mat.append(
    np.array(
        [models["humanratings"][v] for v in order.ABTag.values[::2]]
    )
)

names += ["humanratings_1", "humanratings_2", "humanratings_3", 
          "humanratings_4", "humanratings_5", "humanratings_6", 
          "humanratings_7", "humanratings_8", "humanratings_9"]

model_mat.append(
    np.array(
        [models["w2vdiff"][v] for v in order.ABTag.values[::2]]
    )
)

names += ["w2vdiff_{}".format(i) for i in range(1, 301)]

model_mat.append(
    np.array(
        [models["rstpostprob9"][v] for v in order.ABTag.values[::2]]
    )
)

names += ["rstpostprob9_{}".format(i) for i in range(1, 10)]

model_mat.append(
    np.array(
        [models["rstpostprob79"][v] for v in order.ABTag.values[::2]]
    )
)

names += ["rstpostprob79_{}".format(i) for i in range(1, 80)]

model_mat.append(
    np.array(
        [models["rstpostprob79norm"][v] for v in order.ABTag.values[::2]]
    )
)

names += ["bart79norm_{}".format(i) for i in range(1, 80)]

model_mat.append(
    np.array(
        [models["rstpostprob79power"][v] for v in order.ABTag.values[::2]]
    )
)

names += ["bart79power_{}".format(i) for i in range(1, 80)]

model_mat.append(
    np.array(
        [models["concatword"][v] for v in order.ABTag.values[::2]]
    )
)

names += ["concatword_{}".format(i) for i in range(1, 601)]

model_mat.append(
    np.array(
        [models["typicality"][v] for v in order.ABTag.values[::2]]
    )
)

names += ["typicality"]

model_mat.append(
    np.array([
        [len(v)] for v in order.ABTag.values[::2]
    ])
)
    
names += ["numchar"]

models_df = pd.DataFrame(
    np.concatenate(model_mat, axis=1), 
    columns=names, 
    index=order.ABTag.values[::2]
)
models_df.join(
    order[::2][["ABTag", "ABMainRel", "ABSubRel"]]
    .set_index(keys="ABTag")
    .rename(columns={"ABMainRel": "MainCond", 
                     "ABSubRel": "SubCond"})).to_csv(
    os.path.join(paths["code"], "labels", "raw_models.csv"),
    index_label="ABTag"
)


In [12]:
modelnames = ["rel", "humanratings", "w2vdiff", 
              "rstpostprob9", "rstpostprob79", "bart79norm", "bart79power",
              "numchar", "concatword", "typicality"]

In [10]:
models_df = pu.load_labels(os.path.join(paths["code"], "labels", "raw_models.csv"))

Loading label file from: /u/project/monti/Analysis/Analogy/code/analogy-fmri/labels/raw_models.csv


In [14]:
models_df.head()

Unnamed: 0,ABTag,mainrel_1,mainrel_2,mainrel_3,subrel_1,subrel_2,subrel_3,subrel_4,subrel_5,subrel_6,...,concatword_595,concatword_596,concatword_597,concatword_598,concatword_599,concatword_600,typicality,numchar,MainCond,SubCond
0,big:large,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,-0.09082,-0.0249,-0.0051,-0.07227,0.03735,-0.15625,31.5,9.0,1,1
1,boat:ship,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.08643,-0.16211,-0.21191,-0.00067,0.02161,-0.15723,13.6,9.0,1,1
2,car:auto,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,-0.1123,-0.02576,-0.04126,0.22852,-0.14941,-0.15039,35.7,8.0,1,1
3,careful:cautious,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,-0.19336,-0.2168,0.17578,0.03467,-0.17188,-0.08398,24.1,16.0,1,1
4,couch:sofa,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.30469,-0.41016,0.00117,0.07959,0.08545,-0.00183,22.4,10.0,1,1


In [8]:
models_df.head()

Unnamed: 0,mainrel_1,mainrel_2,mainrel_3,subrel_1,subrel_2,subrel_3,subrel_4,subrel_5,subrel_6,subrel_7,...,concatword_593,concatword_594,concatword_595,concatword_596,concatword_597,concatword_598,concatword_599,concatword_600,typicality,numchar
big:large,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.09668,-0.1167,-0.09082,-0.0249,-0.0051,-0.07227,0.03735,-0.15625,31.5,9.0
boat:ship,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.17383,-0.15918,0.08643,-0.16211,-0.21191,-0.00067,0.02161,-0.15723,13.6,9.0
car:auto,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.04736,0.06494,-0.1123,-0.02576,-0.04126,0.22852,-0.14941,-0.15039,35.7,8.0
careful:cautious,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.01233,-0.05786,-0.19336,-0.2168,0.17578,0.03467,-0.17188,-0.08398,24.1,16.0
couch:sofa,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.16113,-0.02417,0.30469,-0.41016,0.00117,0.07959,0.08545,-0.00183,22.4,10.0
