## Analyses

In [3]:
# imports
import numpy as np
import pandas as pd; pd.set_option('display.max_columns', None)
import cmlreaders as cml
from analyses import analyses_behavioral
import warnings; warnings.filterwarnings("ignore")

[nltk_data] Downloading package cmudict to
[nltk_data]     /home1/hherrema/nltk_data...
[nltk_data]   Package cmudict is already up-to-date!


In [4]:
df = cml.get_data_index()

df_intrac = df[df.experiment.isin(['FR1', 'pyFR', 'IFR1'])]
df_scalp = df[(df['experiment'] == 'ltpFR2') & (df['session'] != 23)]

df_select = pd.concat([df_intrac, df_scalp], ignore_index=True)

#### Storing Results

In [5]:
def store_results_v1(data_tr_lst, data_sess, data_sub, nom, tr_lst_toggle, idx=''):
    if tr_lst_toggle:    # trial level
        data_tr_lst.to_csv(f'analyses/dataframes/{nom}_H{idx}_data_tr.csv', index=False)
    else:                # list level
        data_tr_lst.to_csv(f'analyses/dataframes/{nom}_H{idx}_data_lst.csv', index=False)
    
    # session level
    data_sess.to_csv(f'analyses/dataframes/{nom}_H{idx}_data.csv', index=False)
    
    # subject level (between-subject average)
    data_sub.to_csv(f'analyses/dataframes/{nom}_H{idx}_data_bsa.csv', index=False)

In [6]:
def store_results_v2(data_tr_lst, data_sess, data_sub, nom, idx, tr_lst_toggle):
    if idx not in ['J', 'JFL']:
        raise ValueError(f'{idx} not a valid index.')
    
    if tr_lst_toggle:    # trial level
        data_tr_lst.to_csv(f'analyses/dataframes/{nom}_{idx}_data_tr.csv', index=False)
    else:                # list level
        data_tr_lst.to_csv(f'analyses/dataframes/{nom}_{idx}_data_lst.csv', index=False)
    
    # session level
    data_sess.to_csv(f'analyses/dataframes/{nom}_{idx}_data.csv', index=False)
    
    # subject level (between-subject average)
    data_sub.to_csv(f'analyses/dataframes/{nom}_{idx}_data_bsa.csv', index=False)

#### Parallel Computing

In [7]:
client = analyses_behavioral.create_client('pcs', '5GB', 300)

Unique port for hherrema is 51465
{'dashboard_address': ':51465'}
To view the dashboard, run: 
`ssh -fN hherrema@rhino2.psych.upenn.edu -L 8000:192.168.86.108:51465` in your local computer's terminal (NOT rhino) 
and then navigate to localhost:8000 in your browser


In [8]:
sub_iter, exp_iter, sess_iter, loc_iter, mont_iter = analyses_behavioral.build_iterables(df_select)

0it [00:00, ?it/s]

In [7]:
md = pd.DataFrame()
md['subj'] = sub_iter
md['exp'] = exp_iter
md['sess'] = sess_iter

len(md.query("exp == 'ltpFR2'").subj.unique()), len(md.query("exp != 'ltpFR2'").subj.unique())

(127, 421)

In [18]:
client.shutdown

<bound method Client.shutdown of <Client: 'tcp://192.168.86.108:36449' processes=0 threads=0, memory=0 B>>

#### Phonetic Clustering Score

In [8]:
# sim start or rhyme
method_intr = ['both' for x in range(len(sub_iter))]
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.pcs_parallel_v1, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter, method_intr)

In [9]:
pcs_H_data_lst = pd.concat(results, ignore_index=True)                 # list level data
pcs_H_data = analyses_behavioral.pcs_sess_avg(pcs_H_data_lst)          # session average
pcs_H_data_bsa = analyses_behavioral.pcs_btwn_subj_avg(pcs_H_data)     # between-subject average

# store results
store_results_v1(pcs_H_data_lst, pcs_H_data, pcs_H_data_bsa, 'pcs', False)

In [11]:
# sim start
method_intr = ['sim_start' for x in range(len(sub_iter))]
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.pcs_parallel_v1, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter, method_intr)

In [12]:
pcs_HS_data_lst = pd.concat(results, ignore_index=True)                 # list level data
pcs_HS_data = analyses_behavioral.pcs_sess_avg(pcs_HS_data_lst)          # session average
pcs_HS_data_bsa = analyses_behavioral.pcs_btwn_subj_avg(pcs_HS_data)     # between-subject average

# store results
store_results_v1(pcs_HS_data_lst, pcs_HS_data, pcs_HS_data_bsa, 'pcs', False, 'S')

In [13]:
# rhyme
method_intr = ['rhyme' for x in range(len(sub_iter))]
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.pcs_parallel_v1, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter, method_intr)

In [14]:
pcs_HR_data_lst = pd.concat(results, ignore_index=True)                 # list level data
pcs_HR_data = analyses_behavioral.pcs_sess_avg(pcs_HR_data_lst)          # session average
pcs_HR_data_bsa = analyses_behavioral.pcs_btwn_subj_avg(pcs_HR_data)     # between-subject average

# store results
store_results_v1(pcs_HR_data_lst, pcs_HR_data, pcs_HR_data_bsa, 'pcs', False, 'R')

In [15]:
# Jaccard index
psim_fxn_iter = [analyses_behavioral.phonetic_sim_J for x in range(len(sub_iter))]
errors, results = analyses_behavioral.run_parallel_sessions_fxn_arg(client, analyses_behavioral.pcs_parallel_v2, 
                                                                    sub_iter, exp_iter, sess_iter, loc_iter, mont_iter, 
                                                                    psim_fxn_iter)

In [16]:
pcs_J_data_lst = pd.concat(results, ignore_index=True)                 # list level data
pcs_J_data = analyses_behavioral.pcs_sess_avg(pcs_J_data_lst)         # session average
pcs_J_data_bsa = analyses_behavioral.pcs_btwn_subj_avg(pcs_J_data)    # between-subject average

# store results
store_results_v2(pcs_J_data_lst, pcs_J_data, pcs_J_data_bsa, 'pcs', 'J', False)

In [17]:
# Jaccard index (first and last phoneme)
psim_fxn_iter = [analyses_behavioral.phonetic_sim_JFL for x in range(len(sub_iter))]
errors, results = analyses_behavioral.run_parallel_sessions_fxn_arg(client, analyses_behavioral.pcs_parallel_v2, 
                                                                    sub_iter, exp_iter, sess_iter, loc_iter, mont_iter, 
                                                                    psim_fxn_iter)

In [18]:
pcs_JFL_data_lst = pd.concat(results, ignore_index=True)                  # list level data
pcs_JFL_data = analyses_behavioral.pcs_sess_avg(pcs_JFL_data_lst)         # session average
pcs_JFL_data_bsa = analyses_behavioral.pcs_btwn_subj_avg(pcs_JFL_data)    # between-subject average

# store results
store_results_v2(pcs_JFL_data_lst, pcs_JFL_data, pcs_JFL_data_bsa, 'pcs', 'JFL', False)

#### Temporal Clustering Score

In [19]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.tcs_parallel, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

In [20]:
# list level data
tcs_data_lst = pd.concat(results, ignore_index=True)
tcs_data_lst.to_csv('analyses/dataframes/tcs_data_lst.csv', index=False)

# session averages
tcs_data = analyses_behavioral.tcs_sess_avg(tcs_data_lst)
tcs_data.to_csv('analyses/dataframes/tcs_data.csv', index=False)

# between-subject average
tcs_data_bsa = analyses_behavioral.tcs_btwn_subj_avg(tcs_data)
tcs_data_bsa.to_csv('analyses/dataframes/tcs_data_bsa.csv', index=False)

#### Semantic Clustering Score

In [21]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.scs_parallel, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

In [22]:
# list level data
scs_data_lst = pd.concat(results, ignore_index=True)
scs_data_lst.to_csv('analyses/dataframes/scs_data_lst.csv', index=False)

# session averages
scs_data = analyses_behavioral.scs_sess_avg(scs_data_lst)
scs_data.to_csv('analyses/dataframes/scs_data.csv', index=False)

# between-subject average
scs_data_bsa = analyses_behavioral.scs_btwn_subj_avg(scs_data)
scs_data_bsa.to_csv('analyses/dataframes/scs_data_bsa.csv', index=False)

#### Correlation of Clustering Scores

In [23]:
# pcs_H, tcs
pcs_H_tcs_corrs = analyses_behavioral.find_correlated_sessions(pcs_H_data_lst, tcs_data_lst, 'tcs')
len(pcs_H_tcs_corrs.query("exp_type == 'scalp'")), len(pcs_H_tcs_corrs.query("exp_type == 'scalp' and pearson_r > 0 and p_val < 0.05")), len(pcs_H_tcs_corrs.query("exp_type == 'scalp' and pearson_r > 0 and p_val_fdr < 0.05"))

  0%|          | 0/3299 [00:00<?, ?it/s]

(2374, 65, 0)

In [24]:
# pcs_H, scs
pcs_H_scs_corrs = analyses_behavioral.find_correlated_sessions(pcs_H_data_lst, scs_data_lst, 'scs')
len(pcs_H_scs_corrs.query("exp_type == 'scalp'")), len(pcs_H_scs_corrs.query("exp_type == 'scalp' and pearson_r > 0 and p_val < 0.05")), len(pcs_H_scs_corrs.query("exp_type == 'scalp' and pearson_r > 0 and p_val_fdr < 0.05"))

  0%|          | 0/3299 [00:00<?, ?it/s]

(2374, 114, 0)

In [25]:
# pcs_JFL, tcs
pcs_JFL_tcs_corrs = analyses_behavioral.find_correlated_sessions(pcs_JFL_data_lst, tcs_data_lst, 'tcs')
len(pcs_JFL_tcs_corrs.query("exp_type == 'scalp'")), len(pcs_JFL_tcs_corrs.query("exp_type == 'scalp' and pearson_r > 0 and p_val < 0.05")), len(pcs_JFL_tcs_corrs.query("exp_type == 'scalp' and pearson_r > 0 and p_val_fdr < 0.05"))

  0%|          | 0/3301 [00:00<?, ?it/s]

(2374, 92, 2)

In [26]:
# pcs_JFL, scs
pcs_JFL_scs_corrs = analyses_behavioral.find_correlated_sessions(pcs_JFL_data_lst, scs_data_lst, 'scs')
len(pcs_JFL_scs_corrs.query("exp_type == 'scalp'")), len(pcs_JFL_scs_corrs.query("exp_type == 'scalp' and pearson_r > 0 and p_val < 0.05")), len(pcs_JFL_scs_corrs.query("exp_type == 'scalp' and pearson_r > 0 and p_val_fdr < 0.05"))

  0%|          | 0/3301 [00:00<?, ?it/s]

(2374, 109, 0)

In [27]:
# reliability of phonetic similarity metrics
pcs_corrs = analyses_behavioral.pcs_correlations(pcs_H_data_lst, pcs_J_data_lst, pcs_JFL_data_lst)
pcs_corrs_bsa = analyses_behavioral.pcs_corr_btwn_subj_avg(pcs_corrs)
pcs_corrs_bsa.to_csv('analyses/dataframes/pcs_corrs_bsa.csv', index=False)

  0%|          | 0/3299 [00:00<?, ?it/s]

#### Phonetic-CRL

In [44]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.psim_crl_parallel_v1, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

2024-09-30 13:26:28,624 - distributed.scheduler - ERROR - Couldn't gather keys {'psim_crl_parallel_v1-1da758257b8f9c34cd2ef73bd5f6178c': ['tcp://192.168.86.133:46676'], 'psim_crl_parallel_v1-3fb62977e9631d36bba3b34eac415e4a': ['tcp://192.168.86.145:44556'], 'psim_crl_parallel_v1-f4ea6ef714f4434f29b2382dd9ae0f24': ['tcp://192.168.86.136:44666'], 'psim_crl_parallel_v1-159d3f752c8d282f8e36624b55c6d3e3': ['tcp://192.168.86.135:38065'], 'psim_crl_parallel_v1-19716e1baf14543e968ebb2ae10c49fe': ['tcp://192.168.86.136:44666'], 'psim_crl_parallel_v1-891ed6ecd9de7d41aa43bae53c90b796': ['tcp://192.168.86.135:35975'], 'psim_crl_parallel_v1-4e7377d494c16ebe0f2f3b36b42c71d7': ['tcp://192.168.86.137:40782'], 'psim_crl_parallel_v1-e72ed6949dc0bcc61147438ad3b43b40': ['tcp://192.168.86.144:44426'], 'psim_crl_parallel_v1-c08940b1b080199391d121843d0f0387': ['tcp://192.168.86.136:42325'], 'psim_crl_parallel_v1-49cc6f134416e7499acc85b28cac4ea8': ['tcp://192.168.86.133:40741'], 'psim_crl_parallel_v1-52f80453

In [49]:
psim_crl_H_data_tr = pd.concat(results, ignore_index=True)                                  # trial level data
psim_crl_H_data = analyses_behavioral.psim_crl_sess_avg_v1(psim_crl_H_data_tr)             # session average
psim_crl_H_data_bsa = analyses_behavioral.psim_crl_btwn_subj_avg_v1(psim_crl_H_data)       # between-subject average

# store results
store_results_v1(psim_crl_H_data_tr, psim_crl_H_data, psim_crl_H_data_bsa, 'psim_crl', True)

In [50]:
# bin phonetic similarities (within exp_type)
def bin_phonetic_similarities(psim_crl_v2_data_tr):
    psim_crl_v2_data_intrac = psim_crl_v2_data_tr.query("exp_type == 'intracranial'")
    psim_crl_v2_data_scalp = psim_crl_v2_data_tr.query("exp_type == 'scalp'")

    mask_i = psim_crl_v2_data_intrac['psim'] == 0
    mask_s = psim_crl_v2_data_scalp['psim'] == 0

    # psim = 0 all in one bin
    psim_crl_v2_data_intrac.loc[mask_i, 'bin'] = 0
    psim_crl_v2_data_scalp.loc[mask_s, 'bin'] = 0

    # 5 quantiles
    psim_crl_v2_data_intrac.loc[~mask_i, 'bin'] = pd.qcut(psim_crl_v2_data_intrac.loc[~mask_i, 'psim'], q=5, labels=False) + 1
    psim_crl_v2_data_scalp.loc[~mask_s, 'bin'] = pd.qcut(psim_crl_v2_data_scalp.loc[~mask_s, 'psim'], q=5, labels=False) + 1

    # concatenate dataframes
    return pd.concat([psim_crl_v2_data_intrac, psim_crl_v2_data_scalp], ignore_index=True)

In [51]:
# Jaccard index
psim_fxn_iter = [analyses_behavioral.phonetic_sim_J for x in range(len(sub_iter))]
errors, results = analyses_behavioral.run_parallel_sessions_fxn_arg(client, analyses_behavioral.psim_crl_parallel_v2, 
                                                                    sub_iter, exp_iter, sess_iter, loc_iter, mont_iter,
                                                                    psim_fxn_iter)

In [53]:
psim_crl_J_data_tr = pd.concat(results, ignore_index=True)                                # trial level data
psim_crl_J_data_tr = bin_phonetic_similarities(psim_crl_J_data_tr)
psim_crl_J_data = analyses_behavioral.psim_crl_sess_avg_v2(psim_crl_J_data_tr)           # session averages
psim_crl_J_data_bsa = analyses_behavioral.psim_crl_btwn_subj_avg_v2(psim_crl_J_data)     # between-subject average

# store results
store_results_v2(psim_crl_J_data_tr, psim_crl_J_data, psim_crl_J_data_bsa, 'psim_crl', 'J', True)

#### Phonetic Intrusions

In [7]:
# binary metric, encoding list
psim_fxn_iter = [analyses_behavioral.phonetic_sim_H for x in range(len(sub_iter))]
seed_iter = [x for x in range(len(sub_iter))]
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.psim_intr_parallel_l, 
                                                            sub_iter, exp_iter, sess_iter, loc_iter, mont_iter, 
                                                            psim_fxn_iter, seed_iter)

In [14]:
psim_intr_l_H_data_tr = pd.concat(results, ignore_index=True)                                  # trial level data
psim_intr_l_H_data = analyses_behavioral.psim_intr_sess_avg_l(psim_intr_l_H_data_tr)            # session averages
psim_intr_l_H_data_bsa = analyses_behavioral.psim_intr_btwn_subj_avg_l(psim_intr_l_H_data)      # between-subject average

# store results
store_results_v1(psim_intr_l_H_data_tr, psim_intr_l_H_data, psim_intr_l_H_data_bsa, 'psim_intr_l', True)

In [16]:
# Jaccard index, encoding list
psim_fxn_iter = [analyses_behavioral.phonetic_sim_J for x in range(len(sub_iter))]
seed_iter = [x for x in range(len(sub_iter))]
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.psim_intr_parallel_l, 
                                                            sub_iter, exp_iter, sess_iter, loc_iter, mont_iter, 
                                                            psim_fxn_iter, seed_iter)

In [20]:
psim_intr_l_J_data_tr = pd.concat(results, ignore_index=True)                                  # trial level data
psim_intr_l_J_data = analyses_behavioral.psim_intr_sess_avg_l(psim_intr_l_J_data_tr)           # session averages
psim_intr_l_J_data_bsa = analyses_behavioral.psim_intr_btwn_subj_avg_l(psim_intr_l_J_data)     # between-subject average

# store results
store_results_v2(psim_intr_l_J_data_tr, psim_intr_l_J_data, psim_intr_l_J_data_bsa, 'psim_intr_l', 'J', True)

In [10]:
# binary metric, preceding recall
psim_fxn_iter = [analyses_behavioral.phonetic_sim_H for x in range(len(sub_iter))]
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.psim_intr_parallel_r, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter, psim_fxn_iter)

In [14]:
psim_intr_r_H_data_tr = pd.concat(results, ignore_index=True)                                  # trial level data
psim_intr_r_H_data = analyses_behavioral.psim_intr_sess_avg_r(psim_intr_r_H_data_tr)           # session averages
psim_intr_r_H_data_bsa = analyses_behavioral.psim_intr_btwn_subj_avg_r(psim_intr_r_H_data)     # between-subject average

# store results
store_results_v1(psim_intr_r_H_data_tr, psim_intr_r_H_data, psim_intr_r_H_data_bsa, 'psim_intr_r', True)

In [15]:
# Jaccard index, preceding recall
psim_fxn_iter = [analyses_behavioral.phonetic_sim_J for x in range(len(sub_iter))]
errors, results = analyses_behavioral.run_parallel_sessions_fxn_arg(client, analyses_behavioral.psim_intr_parallel_r, 
                                                                    sub_iter, exp_iter, sess_iter, loc_iter, mont_iter,
                                                                    psim_fxn_iter)

In [16]:
errors

Unnamed: 0_level_0,exception,traceback_obj
param,Unnamed: 1_level_1,Unnamed: 2_level_1
LTP326_ltpFR2_19,FileNotFoundError('Unable to find the requeste...,<traceback object at 0x2aea4489a340>


In [17]:
psim_intr_r_J_data_tr = pd.concat(results, ignore_index=True)                                    # trial level data
psim_intr_r_J_data = analyses_behavioral.psim_intr_sess_avg_r(psim_intr_r_J_data_tr)             # session averages
psim_intr_r_J_data_bsa = analyses_behavioral.psim_intr_btwn_subj_avg_r(psim_intr_r_J_data)       # between-subject average

# store results
store_results_v2(psim_intr_r_J_data_tr, psim_intr_r_J_data, psim_intr_r_J_data_bsa, 'psim_intr_r', 'J', True)

#### Recall Probability

In [37]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.p_recall_parallel, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

In [38]:
# list level data
p_recall_data_lst = pd.concat(results, ignore_index=True)
p_recall_data_lst.to_csv('analyses/dataframes/p_recall_data_lst.csv', index=False)

# session averages
p_recall_data = analyses_behavioral.p_recall_sess_avg(p_recall_data_lst)
p_recall_data.to_csv('analyses/dataframes/p_recall_data.csv', index=False)

# between-subject average
p_recall_data_bsa = analyses_behavioral.p_recall_btwn_subj_avg(p_recall_data)
p_recall_data_bsa.to_csv('analyses/dataframes/p_recall_data_bsa.csv', index=False)

#### Correlations with Recall Probability

In [39]:
df_beh = analyses_behavioral.aggregate_data_beh(pcs_H_data_lst, pcs_J_data_lst, tcs_data_lst, scs_data_lst, p_recall_data_lst)
df_beh.to_csv('analyses/dataframes/cl_pr.csv', index=False)

p_recall_corrs = analyses_behavioral.p_recall_correlations(df_beh)
p_recall_corrs_bsa = analyses_behavioral.p_recall_corr_btwn_subj_avg(p_recall_corrs)
p_recall_corrs_bsa.to_csv('analyses/dataframes/p_recall_corrs_bsa.csv', index=False)

  0%|          | 0/3299 [00:00<?, ?it/s]