# Set up DFs

Set up Model and Beta DFs

In [1]:
import os
os.chdir('..')

In [2]:
import json

import numpy as np
from tqdm import tqdm
import pandas as pd
from sklearn.linear_model import Ridge
import statsmodels.api as sm
import matplotlib.pyplot as plt

from joblib import Parallel, delayed
import multiprocessing

MAX_CPU = multiprocessing.cpu_count() // 2

In [3]:
from analysis.fmri.analogy_rsa import \
downsample_rdms_df_by_factor, create_models, models_to_df, plotmodels,\
roi_rdm, run_rsa_dfs, subject_rdms, plot_results

from analysis.fmri.analogy_utils import \
    projectSettings, analysisSettings, contrastSettings, order, \
    pu, pa, pv, compile_models, rsa, save_rois, load_rois, load_betas

paths = projectSettings["filepaths"]["hoffPaths"]



Loading JSON config from config/analyses.json
Loading JSON config from config/contrasts.json
Loading label file from: labels/trialorder_rsa_absorted.csv


In [4]:
models = compile_models(write=False)


Loading label file from: labels/typicality.csv
Loading label file from: labels/word2vec_diffs.csv
Loading label file from: labels/humanratings.csv
Loading mat file...
Loading mat file...
Loading mat file...
Loading mat file...
Loading mat file...
Loading mat file...
Loading mat file...
Loading label file from: labels/group_accuracy.csv






















In [5]:
models["accuracy"].keys()

dict_keys(['acceleration:speed', 'accept:reject', 'accident:damage', 'advertise:promote', 'ahead:behind', 'anger:yell', 'bath:cleanliness', 'bathe:clean', 'below:above', 'big:large', 'big:small', 'black:white', 'boat:ship', 'book:magazine', 'breathe:live', 'bright:dark', 'bright:dull', 'burnish:shine', 'car:auto', 'careful:cautious', 'chair:sofa', 'climb:descend', 'coldness:shiver', 'cook:eat', 'couch:sofa', 'cute:adorable', 'danger:flee', 'dark:light', 'darken:color', 'day:evening', 'death:population', 'difficult:easy', 'dim:light', 'dirtiness:bathe', 'dirty:bathe', 'dirty:clean', 'discount:price', 'disease:sickness', 'drink:hydrate', 'east:west', 'enthusiastic:lazy', 'exercise:fitness', 'exercise:healthy', 'explosion:damage', 'fast:slow', 'fat:thin', 'fence:hedge', 'fire:burns', 'flee:escape', 'flood:water', 'force:pressure', 'forward:backward', 'fright:scream', 'front:back', 'fun:boring', 'funny:serious', 'germs:sickness', 'good:bad', 'good:wrong', 'happiness:smile', 'heat:sweat', '

In [6]:
models["accuracy"]

{'acceleration:speed': Group Acc    0.71875
 Name: acceleration:speed, dtype: float64,
 'accept:reject': Group Acc    0.96875
 Name: accept:reject, dtype: float64,
 'accident:damage': Group Acc    0.78125
 Name: accident:damage, dtype: float64,
 'advertise:promote': Group Acc    0.625
 Name: advertise:promote, dtype: float64,
 'ahead:behind': Group Acc    0.84375
 Name: ahead:behind, dtype: float64,
 'anger:yell': Group Acc    1.0
 Name: anger:yell, dtype: float64,
 'bath:cleanliness': Group Acc    0.96875
 Name: bath:cleanliness, dtype: float64,
 'bathe:clean': Group Acc    0.71875
 Name: bathe:clean, dtype: float64,
 'below:above': Group Acc    0.9375
 Name: below:above, dtype: float64,
 'big:large': Group Acc    0.84375
 Name: big:large, dtype: float64,
 'big:small': Group Acc    0.78125
 Name: big:small, dtype: float64,
 'black:white': Group Acc    0.96875
 Name: black:white, dtype: float64,
 'boat:ship': Group Acc    0.96875
 Name: boat:ship, dtype: float64,
 'book:magazine': Grou

In [13]:
trial_order = order.ABTag.values # [::2]

x = np.array(
        [models["accuracy"][v] for v in trial_order]
    )

y = np.array(
        [models["typicality"][v] for v in trial_order]
    )
x.shape, y.shape


((288, 1), (288, 1))

In [18]:
trial_order = order.ABTag.values # [::2]

model_mat = []
names = []
model_mat.append(
    np.array(
        [models["rel"][v] for v in trial_order]
    )
)

names += ["mainrel_1", "mainrel_2", "mainrel_3", 
          "subrel_1", "subrel_2", "subrel_3", 
          "subrel_4", "subrel_5", "subrel_6", 
          "subrel_7", "subrel_8", "subrel_9"]

model_mat.append(
    np.array(
        [models["humanratings"][v] for v in trial_order]
    )
)

names += ["humanratings_1", "humanratings_2", "humanratings_3", 
          "humanratings_4", "humanratings_5", "humanratings_6", 
          "humanratings_7", "humanratings_8", "humanratings_9"]

model_mat.append(
    np.array(
        [models["w2vdiff"][v] for v in trial_order]
    )
)

names += ["w2vdiff_{}".format(i) for i in range(1, 301)]

model_mat.append(
    np.array(
        [models["rstpostprob9"][v] for v in trial_order]
    )
)

names += ["rstpostprob9_{}".format(i) for i in range(1, 10)]

model_mat.append(
    np.array(
        [models["rstpostprob79"][v] for v in trial_order]
    )
)

names += ["rstpostprob79_{}".format(i) for i in range(1, 80)]

model_mat.append(
    np.array(
        [models["rstpostprob79thresh"][v] for v in trial_order]
    )
)

names += ["bart79thresh_{}".format(i) for i in range(1, 80)]

model_mat.append(
    np.array(
        [models["rstpostprob79norm"][v] for v in trial_order]
    )
)

names += ["bart79norm_{}".format(i) for i in range(1, 80)]

model_mat.append(
    np.array(
        [models["rstpostprob79power"][v] for v in trial_order]
    )
)

names += ["bart79power_{}".format(i) for i in range(1, 80)]

model_mat.append(
    np.array(
        [models["rstpostprob270"][v] for v in trial_order]
    )
)

names += ["rstpostprob270_{}".format(i) for i in range(1, 271)]

model_mat.append(
    np.array(
        [models["concatword"][v] for v in trial_order] # [::2]
    )
)

names += ["concatword_{}".format(i) for i in range(1, 601)]

model_mat.append(
    np.array(
        [models["typicality"][v] for v in trial_order] # [::2]
    )
)

names += ["typicality"]

model_mat.append(
    np.array(
        [models["accuracy"][v] for v in trial_order] # [::2]
    )
)

names += ["accuracy"]

model_mat.append(
    np.array([
        [len(v)] for v in trial_order # [::2]
    ])
)
    
names += ["numchar"]

models_df = pd.DataFrame(
    np.concatenate(model_mat, axis=1), 
    columns=names, 
    index=trial_order # [::2]
)
# models_df = models_df.join(
#     order[::2][["ABTag", "ABMainRel", "ABSubRel"]]
#     # .set_index(keys="ABTag")
#     .rename(columns={"ABMainRel": "MainCond", 
#                      "ABSubRel": "SubCond"}))

models_df.merge(
    order[::2][["ABTag", "ABMainRel", "ABSubRel"]].rename(columns={"ABMainRel": "MainCond", "ABSubRel": "SubCond"}), 
    left_index=True, right_on="ABTag")

write=True
if write:
    models_df.to_csv(
        os.path.join(paths["code"], "labels", "raw_models.csv"),
        index_label="ABTag"
    )


In [17]:
models_df["accuracy"].corr()

TypeError: corr() missing 1 required positional argument: 'other'

In [43]:
models_df.merge(order[::2][["ABTag", "ABMainRel", "ABSubRel"]].rename(columns={"ABMainRel": "MainCond", "ABSubRel": "SubCond"}), left_index=True, right_on="ABTag")

Unnamed: 0,mainrel_1,mainrel_2,mainrel_3,subrel_1,subrel_2,subrel_3,subrel_4,subrel_5,subrel_6,subrel_7,...,concatword_596,concatword_597,concatword_598,concatword_599,concatword_600,typicality,numchar,ABTag,MainCond,SubCond
0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.02490,-0.00510,-0.07227,0.03735,-0.15625,31.5,9.0,big:large,1,1
0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.02490,-0.00510,-0.07227,0.03735,-0.15625,31.5,9.0,big:large,1,1
2,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.16211,-0.21191,-0.00067,0.02161,-0.15723,13.6,9.0,boat:ship,1,1
2,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.16211,-0.21191,-0.00067,0.02161,-0.15723,13.6,9.0,boat:ship,1,1
4,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.02576,-0.04126,0.22852,-0.14941,-0.15039,35.7,8.0,car:auto,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
282,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.02881,-0.09229,0.05859,0.05957,-0.05298,22.0,12.0,trim:shorten,3,9
284,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.04688,-0.24512,-0.27734,0.00273,-0.02087,33.3,10.0,wash:clean,3,9
284,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.04688,-0.24512,-0.27734,0.00273,-0.02087,33.3,10.0,wash:clean,3,9
286,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.12158,0.06250,0.01202,0.13574,-0.09131,28.0,9.0,work:earn,3,9


In [18]:
models_df

Unnamed: 0,mainrel_1,mainrel_2,mainrel_3,subrel_1,subrel_2,subrel_3,subrel_4,subrel_5,subrel_6,subrel_7,...,concatword_595,concatword_596,concatword_597,concatword_598,concatword_599,concatword_600,typicality,numchar,MainCond,SubCond
acceleration:speed,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.07861,0.10254,0.00185,0.15234,-0.33398,-0.00705,68.0,18.0,1,3
acceleration:speed,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.07861,0.10254,0.00185,0.15234,-0.33398,-0.00705,68.0,18.0,1,3
accept:reject,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,-0.01624,0.23047,0.04932,0.04932,0.05737,0.05737,33.3,13.0,2,4
accept:reject,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,-0.01624,0.23047,0.04932,0.04932,0.05737,0.05737,33.3,13.0,2,4
accident:damage,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,-0.17285,0.03271,-0.13477,-0.25586,-0.08447,-0.08936,41.2,15.0,3,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
witty:dumb,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,-0.26562,0.07227,0.30664,-0.23828,-0.03857,0.11914,18.0,10.0,2,6
work:earn,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.16406,0.12158,0.06250,0.01202,0.13574,-0.09131,28.0,9.0,3,9
work:earn,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.16406,0.12158,0.06250,0.01202,0.13574,-0.09131,28.0,9.0,3,9
workout:sweat,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.28516,0.02026,-0.18262,0.06885,-0.10742,-0.24414,14.0,13.0,3,7


In [12]:
modelnames = ["rel", "humanratings", "w2vdiff", 
              "rstpostprob9", "rstpostprob79", "bart79norm", "bart79power",
              "numchar", "concatword", "typicality"]

In [10]:
models_df = pu.load_labels(os.path.join(paths["code"], "labels", "raw_models.csv"))

Loading label file from: /u/project/monti/Analysis/Analogy/code/analogy-fmri/labels/raw_models.csv


In [14]:
models_df.head()

Unnamed: 0,ABTag,mainrel_1,mainrel_2,mainrel_3,subrel_1,subrel_2,subrel_3,subrel_4,subrel_5,subrel_6,...,concatword_595,concatword_596,concatword_597,concatword_598,concatword_599,concatword_600,typicality,numchar,MainCond,SubCond
0,big:large,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,-0.09082,-0.0249,-0.0051,-0.07227,0.03735,-0.15625,31.5,9.0,1,1
1,boat:ship,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.08643,-0.16211,-0.21191,-0.00067,0.02161,-0.15723,13.6,9.0,1,1
2,car:auto,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,-0.1123,-0.02576,-0.04126,0.22852,-0.14941,-0.15039,35.7,8.0,1,1
3,careful:cautious,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,-0.19336,-0.2168,0.17578,0.03467,-0.17188,-0.08398,24.1,16.0,1,1
4,couch:sofa,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.30469,-0.41016,0.00117,0.07959,0.08545,-0.00183,22.4,10.0,1,1


In [8]:
models_df.head()

Unnamed: 0,mainrel_1,mainrel_2,mainrel_3,subrel_1,subrel_2,subrel_3,subrel_4,subrel_5,subrel_6,subrel_7,...,concatword_593,concatword_594,concatword_595,concatword_596,concatword_597,concatword_598,concatword_599,concatword_600,typicality,numchar
big:large,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.09668,-0.1167,-0.09082,-0.0249,-0.0051,-0.07227,0.03735,-0.15625,31.5,9.0
boat:ship,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.17383,-0.15918,0.08643,-0.16211,-0.21191,-0.00067,0.02161,-0.15723,13.6,9.0
car:auto,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.04736,0.06494,-0.1123,-0.02576,-0.04126,0.22852,-0.14941,-0.15039,35.7,8.0
careful:cautious,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.01233,-0.05786,-0.19336,-0.2168,0.17578,0.03467,-0.17188,-0.08398,24.1,16.0
couch:sofa,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.16113,-0.02417,0.30469,-0.41016,0.00117,0.07959,0.08545,-0.00183,22.4,10.0
