In [1]:
import sys
import os.path as op
from glob import glob
import shutil
import warnings
import datetime

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.interpolate import interp1d
import seaborn as sns

import statsmodels.api as sm
from statsmodels.stats.multitest import multipletests
import nibabel as nib

pd.options.display.max_rows = 100
pd.options.display.max_columns = 999
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error as mse

sys.path.append(op.join(op.expanduser("~"), "code/style"))
from colors import get_colors
co, palettes = get_colors()

sys.path.append(op.join(op.expanduser("~"), "code"))
from general.array import array_operations as aop
from general.basic.helper_funcs import *
from general.basic.str_methods import *
import general.nifti.nifti_ops as nops
import general.nifti.nifti_plotting as niiplot
colors = [
    "2E45B8",
    "3EBCD2",
    "FF4983",
    "1DC9A4",
    "F9C31F",
    "B38FE7",
    "F97A1F",
    "E3120B",
]
font = {"tick": 10, "label": 12, "title": 14, "annot": 12}
pad = {"tick": 2, "label": 5, "title": 8}
colws = {1: 2.05, 2: 3.125, 3: 7.28346}
lws = {"axis": 0.8, "line": 1.5, "marker": 0.8}

%matplotlib inline
mpl.rcParams["axes.axisbelow"] = True
mpl.rcParams["axes.formatter.offset_threshold"] = 2
mpl.rcParams["axes.grid"] = True
mpl.rcParams["axes.grid.which"] = "both"
mpl.rcParams["axes.labelpad"] = pad["label"]
mpl.rcParams["axes.labelsize"] = font["label"]
mpl.rcParams["axes.linewidth"] = lws["axis"]
mpl.rcParams["axes.prop_cycle"] = mpl.cycler("color", colors)
mpl.rcParams["axes.spines.right"] = False
mpl.rcParams["axes.spines.top"] = False
mpl.rcParams["axes.titlepad"] = pad["title"]
mpl.rcParams["axes.titlesize"] = font["title"]
mpl.rcParams["figure.autolayout"] = True
mpl.rcParams["figure.dpi"] = 300
mpl.rcParams["figure.figsize"] = (colws[3], colws[3]*0.618034)
mpl.rcParams["figure.labelsize"] = font["label"]
mpl.rcParams["figure.subplot.hspace"] = 0.2
mpl.rcParams["figure.subplot.wspace"] = 0.2
mpl.rcParams["figure.titlesize"] = font["title"]
mpl.rcParams['font.sans-serif'] = "Arial"
mpl.rcParams['font.serif'] = "Times New Roman"
mpl.rcParams['font.family'] = "sans-serif"
mpl.rcParams["grid.alpha"] = 0.75
mpl.rcParams["grid.color"] = "#B7C6CF"
mpl.rcParams["grid.linewidth"] = 0.1
mpl.rcParams["hist.bins"] = 30
mpl.rcParams["legend.borderaxespad"] = 0
mpl.rcParams["legend.fontsize"] = font["tick"]
mpl.rcParams["legend.frameon"] = False
mpl.rcParams["legend.handletextpad"] = 0.4
mpl.rcParams["legend.markerscale"] = 1
mpl.rcParams["legend.title_fontsize"] = font["label"]
mpl.rcParams["lines.color"] = colors[0]
mpl.rcParams["lines.linewidth"] = lws["line"]
mpl.rcParams["lines.markeredgewidth"]: lws["marker"]
mpl.rcParams["lines.markersize"] = 8
mpl.rcParams["patch.facecolor"] = colors[1]
mpl.rcParams["patch.linewidth"] = lws["marker"]
mpl.rcParams["pdf.fonttype"] = 42
mpl.rcParams["savefig.bbox"] = "tight"
mpl.rcParams["savefig.directory"] = "~/Downloads"
mpl.rcParams["savefig.format"] = "pdf"
mpl.rcParams["savefig.pad_inches"] = 0.05
mpl.rcParams["xtick.labelsize"] = font["tick"]
mpl.rcParams["xtick.major.size"] = 4
mpl.rcParams["xtick.major.width"] = lws["axis"]
mpl.rcParams["xtick.minor.ndivs"] = 2
mpl.rcParams["ytick.labelsize"] = font["tick"]
mpl.rcParams["ytick.major.size"] = 4
mpl.rcParams["ytick.major.width"] = lws["axis"]
mpl.rcParams["ytick.minor.ndivs"] = 2


In [4]:
proj_dir = op.join(op.expanduser("~"), "Box", "CVLT_analyses_Chinese")

cvlt_files = glob(
    op.join(proj_dir, "data_capture", "final_data_entry", "*", "*.xlsx"), recursive=True
)

print(len(cvlt_files))


131


In [16]:
free_rec = cvlt.iloc[:10, :7]


In [17]:
pd.melt(free_rec, id_vars=["subject", "session"], var_name="trial", value_name="score")


Unnamed: 0,item,trial1,trial2,trial3,trial4,brief_delay,10min_delay
0,hat,1,1,8,3,1,nr
1,strawberry,2,2,3,4,2,nr
2,ferry,3,3,4,5,3,1
3,sweater,nr,4,7,6,5,nr
4,pineapple,nr,5,nr,10,6,2
5,train,nr,nr,6,7,7,5
6,belt,nr,nr,5,8,8,4
7,star fruit,nr,nr,2,1,9,3
8,plane,4,6,1,2,4,6
9,intrusions#,0,0,0,0,0,0


In [15]:
cvlt


Unnamed: 0,item,trial1,trial2,trial3,trial4,brief_delay,10min_delay,cued_fruit,cued_clothing,cued_vehicles,recog,distractors,false _rec
0,hat,1,1,8,3,1,nr,na,3,na,1,apple,nr
1,strawberry,2,2,3,4,2,nr,3,na,na,1,car,nr
2,ferry,3,3,4,5,3,1,na,na,3,1,shirt,nr
3,sweater,nr,4,7,6,5,nr,na,nr,na,1,pants,nr
4,pineapple,nr,5,nr,10,6,2,2,na,na,1,helicopter,nr
5,train,nr,nr,6,7,7,5,na,na,1,1,banana,nr
6,belt,nr,nr,5,8,8,4,na,2,na,1,leather shoes,nr
7,star fruit,nr,nr,2,1,9,3,1,na,na,1,tangerine,nr
8,plane,4,6,1,2,4,6,na,na,2,1,motorcycle,nr
9,intrusions#,0,0,0,0,0,0,0,1,0,na,NP_total,0


In [35]:
ITEM_ORDER = {
    "hat": 1,
    "strawberry": 2,
    "ferry": 3,
    "sweater": 4,
    "pineapple": 5,
    "train": 6,
    "belt": 7,
    "star fruit": 8,
    "plane": 9,
}


In [38]:
free_recm = pd.melt(
    free_rec,
    id_vars=["subj", "test_language", "item"],
    var_name="trial",
    value_name="recall_position",
)
free_recm.insert(4, "study_position", free_recm["item"].apply(lambda x: ITEM_ORDER[x]))
free_recm


Unnamed: 0,subj,test_language,item,trial,study_position,recall_position
0,33514,mandarin,hat,trial1,1,1
1,33514,mandarin,strawberry,trial1,2,2
2,33514,mandarin,ferry,trial1,3,3
3,33514,mandarin,sweater,trial1,4,nr
4,33514,mandarin,pineapple,trial1,5,nr
5,33514,mandarin,train,trial1,6,nr
6,33514,mandarin,belt,trial1,7,nr
7,33514,mandarin,star fruit,trial1,8,nr
8,33514,mandarin,plane,trial1,9,4
9,33514,mandarin,hat,trial2,1,1


In [31]:
cvlt_file = cvlt_files[0]
cvlt = pd.read_excel(cvlt_file)

# Format column names
cvlt = cvlt.rename(columns={cvlt.columns[0]: "item"})
cvlt.columns = cvlt.columns.str.lower()

# Get subject-level info
subj = op.basename(cvlt_file).split("_")[0]
test_lang = op.dirname(cvlt_file).split("/")[-1].lower()
cvlt.insert(0, "subj", subj)
cvlt.insert(1, "test_language", test_lang)

# Get the free recall portion of the test
free_rec = cvlt.iloc[:9, :9]
