## Analyses

In [1]:
# imports
import numpy as np
import pandas as pd; pd.set_option('display.max_columns', None)
import cmlreaders as cml
from analyses import analyses_behavioral
import warnings; warnings.filterwarnings("ignore")

[nltk_data] Downloading package cmudict to
[nltk_data]     /home1/hherrema/nltk_data...
[nltk_data]   Package cmudict is already up-to-date!


In [2]:
df = cml.get_data_index()

df_intrac = df[df.experiment.isin(['FR1', 'pyFR', 'IFR1'])]
df_scalp = df[(df['experiment'] == 'ltpFR2') & (df['session'] != 23)]

df_select = pd.concat([df_intrac, df_scalp], ignore_index=True)

#### Parallel Computing

In [3]:
client = analyses_behavioral.create_client('pcs', '3GB', 300)

Unique port for hherrema is 51465
{'dashboard_address': ':51465'}
To view the dashboard, run: 
`ssh -fN hherrema@rhino2.psych.upenn.edu -L 8000:192.168.86.106:51465` in your local computer's terminal (NOT rhino) 
and then navigate to localhost:8000 in your browser


In [4]:
sub_iter, exp_iter, sess_iter, loc_iter, mont_iter = analyses_behavioral.build_iterables(df_select)

0it [00:00, ?it/s]

In [15]:
client.shutdown

<bound method Client.shutdown of <Client: 'tcp://192.168.86.107:43485' processes=0 threads=0, memory=0 B>>

#### Phonetic Clustering Score

In [6]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.pcs_parallel_v1, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

In [7]:
pcs_v1_data = pd.concat(results, ignore_index=True)
pcs_v1_data.to_csv('analyses/dataframes/pcs_v1_data.csv', index=False)

# between-subject average
pcs_v1_data_bsa = analyses_behavioral.pcs_btwn_subj_avg(pcs_v1_data)
pcs_v1_data_bsa.to_csv('analyses/dataframes/pcs_v1_data_bsa.csv', index=False)

In [8]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.pcs_parallel_v2, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

In [9]:
pcs_v2_data = pd.concat(results, ignore_index=True)
pcs_v2_data.to_csv('analyses/dataframes/pcs_v2_data.csv', index=False)

# between-subject average
pcs_v2_data_bsa = analyses_behavioral.pcs_btwn_subj_avg(pcs_v2_data)
pcs_v2_data_bsa.to_csv('analyses/dataframes/pcs_v2_data_bsa.csv', index=False)

#### Phonetic-CRL

In [10]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.psim_crl_parallel_v1, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

In [11]:
psim_crl_v1_data = pd.concat(results, ignore_index=True)
psim_crl_v1_data.to_csv('analyses/dataframes/psim_crl_v1_data.csv', index=False)

# between-subject average
psim_crl_v1_data_bsa = analyses_behavioral.psim_crl_btwn_subj_avg_v1(psim_crl_v1_data)
psim_crl_v1_data_bsa.to_csv('analyses/dataframes/psim_crl_v1_data_bsa.csv', index=False)

In [12]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.psim_crl_parallel_v2, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

In [13]:
psim_crl_v2_data = pd.concat(results, ignore_index=True)
psim_crl_v2_data.to_csv('analyses/dataframes/psim_crl_v2_data.csv', index=False)

# bin phonetic similarities (within exp_type)
psim_crl_v2_data_intrac = psim_crl_v2_data.query("exp_type == 'intracranial'")
psim_crl_v2_data_scalp = psim_crl_v2_data.query("exp_type == 'scalp'")

mask_i = psim_crl_v2_data_intrac['psim'] == 0
mask_s = psim_crl_v2_data_scalp['psim'] == 0

# psim = 0 all in one bin
psim_crl_v2_data_intrac.loc[mask_i, 'bin'] = 0
psim_crl_v2_data_scalp.loc[mask_s, 'bin'] = 0

# 5 quantiles
psim_crl_v2_data_intrac.loc[~mask_i, 'bin'] = pd.qcut(psim_crl_v2_data_intrac.loc[~mask_i, 'psim'], q=5, labels=False) + 1
psim_crl_v2_data_scalp.loc[~mask_s, 'bin'] = pd.qcut(psim_crl_v2_data_scalp.loc[~mask_s, 'psim'], q=5, labels=False) + 1

# concatenate dataframes
psim_crl_v2_data = pd.concat([psim_crl_v2_data_intrac, psim_crl_v2_data_scalp], ignore_index=True)

# between-subject average
psim_crl_v2_data_bsa = analyses_behavioral.psim_crl_btwn_subj_avg_v2(psim_crl_v2_data)
psim_crl_v2_data_bsa.to_csv('analyses/dataframes/psim_crl_v2_data_bsa.csv', index=False)

#### Phonetic Intrusions

In [15]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.psim_intr_parallel_v1, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

In [16]:
psim_intr_v1_data = pd.concat(results, ignore_index=True)
psim_intr_v1_data.to_csv('analyses/dataframes/psim_intr_v1_data.csv', index=False)

# between-subject average
psim_intr_v1_data_bsa = analyses_behavioral.psim_intr_btwn_subj_avg_v1(psim_intr_v1_data)
psim_intr_v1_data_bsa.to_csv('analyses/dataframes/psim_intr_v1_data_bsa.csv', index=False)

In [17]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.psim_intr_parallel_v2, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

In [18]:
psim_intr_v2_data = pd.concat(results, ignore_index=True)
psim_intr_v2_data.to_csv('analyses/dataframes/psim_intr_v2_data.csv', index=False)

# between-subject average
psim_intr_v2_data_bsa = analyses_behavioral.psim_intr_btwn_subj_avg_v2(psim_intr_v2_data)
psim_intr_v2_data_bsa.to_csv('analyses/dataframes/psim_intr_v2_data_bsa.csv', index=False)

#### Temporal Clustering Score

In [19]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.tcs_parallel, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

In [20]:
tcs_data = pd.concat(results, ignore_index=True)
tcs_data.to_csv('analyses/dataframes/tcs_data.csv', index=False)

# between-subject average
tcs_data_bsa = analyses_behavioral.tcs_btwn_subj_avg(tcs_data)
tcs_data_bsa.to_csv('analyses/dataframes/tcs_data_bsa.csv', index=False)

#### Semantic Clustering Score

In [21]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.scs_parallel, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

In [22]:
scs_data = pd.concat(results, ignore_index=True)
scs_data.to_csv('analyses/dataframes/scs_data.csv', index=False)

# between-subject average
scs_data_bsa = analyses_behavioral.scs_btwn_subj_avg(scs_data)
scs_data_bsa.to_csv('analyses/dataframes/scs_data_bsa.csv', index=False)

#### Recall Probability

In [23]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.p_recall_parallel, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

In [24]:
p_recall_data = pd.concat(results, ignore_index=True)
p_recall_data.to_csv('analyses/dataframes/p_recall_data.csv', index=False)

# between-subject average
p_recall_data_bsa = analyses_behavioral.p_recall_btwn_subj_avg(p_recall_data)
p_recall_data_bsa.to_csv('analyses/dataframes/p_recall_data_bsa.csv', index=False)

#### First Phoneme Recall Probability

In [25]:
errors, results = analyses_behavioral.run_parallel_sessions(client, analyses_behavioral.ph1_recall_parallel, sub_iter, exp_iter, sess_iter, loc_iter, mont_iter)

In [26]:
ph1_recall_data = pd.concat(results, ignore_index=True)
ph1_recall_data.to_csv('analyses/dataframes/ph1_recall_data.csv', index=False)

# between-subject average
ph1_recall_data_bsa = analyses_behavioral.ph1_recall_btwn_subj_avg(ph1_recall_data)
ph1_recall_data_bsa.to_csv('analyses/dataframes/ph1_recall_data_bsa.csv', index=False)

#### Correlation with Phonetic Clustering Scores

In [34]:
pcs_corrs = analyses_behavioral.pcs_correlations(pcs_v1_data, pcs_v2_data)
pcs_corrs_bsa = analyses_behavioral.pcs_corr_btwn_subj_avg(pcs_corrs)
pcs_corrs_bsa.to_csv('analyses/dataframes/pcs_corrs_bsa.csv', index=False)

  0%|          | 0/3284 [00:00<?, ?it/s]

#### Correlations with Recall Probability

In [35]:
# correlate with p_recall over lists within sessions
def p_recall_correlations(df):
    stats = []
    for (sub, et, exp, sess, loc, mont), data in tqdm(df.groupby(['subject', 'exp_type', 'experiment', 'session', 'localization', 'montage'])):
        if len(data) > 1:
            # correlations with p_recall
            r_pcs_v1, _ = scipy.stats.pearsonr(data.pcs_v1, data.p_recall, alternative='two-sided')
            r_pcs_v2, _ = scipy.stats.pearsonr(data.pcs_v2, data.p_recall, alternative='two-sided')
            r_tcs, _ = scipy.stats.pearsonr(data.tcs, data.p_recall, alternative='two-sided')
            r_scs, _ = scipy.stats.pearsonr(data.scs, data.p_recall, alternative='two-sided')

            stats.append((sub, et, exp, sess, loc, mont, r_pcs_v1, r_pcs_v2, r_tcs, r_scs))

    # save results as dataframe
    return pd.DataFrame(stats, columns=['subject', 'exp_type', 'experiment', 'session', 'localization', 'montage',
                                        'r_pcs_v1', 'r_pcs_v2', 'r_tcs', 'r_scs'])

In [36]:
df_beh = analyses_behavioral.aggregate_data_beh(pcs_v1_data, pcs_v2_data, tcs_data, scs_data, p_recall_data)
p_recall_corrs = analyses_behavioral.p_recall_correlations(df_beh)
p_recall_corrs_bsa = analyses_behavioral.p_recall_corr_btwn_subj_avg(p_recall_corrs)
p_recall_corrs_bsa.to_csv('analyses/dataframes/p_recall_corrs_bsa.csv', index=False)

  0%|          | 0/3275 [00:00<?, ?it/s]

In [37]:
df_beh

Unnamed: 0,subject,exp_type,experiment,session,localization,montage,list,pcs_v1,pcs_v2,tcs,scs,l_length,ncr,p_recall
0,R1001P,intracranial,FR1,0,0,0,3,0.818182,1.000000,0.400000,0.400000,12,2,0.166667
1,R1001P,intracranial,FR1,0,0,0,8,0.354545,0.588889,0.500000,0.722222,12,3,0.25
2,R1001P,intracranial,FR1,0,0,0,14,0.133418,0.750000,0.614286,0.408333,12,5,0.416667
3,R1001P,intracranial,FR1,0,0,0,15,-0.146465,0.332986,0.533532,0.830357,12,5,0.416667
4,R1001P,intracranial,FR1,0,0,0,16,-0.090909,0.475000,0.975000,0.666667,12,3,0.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65684,LTP393,scalp,ltpFR2,9,0,9,17,-0.173913,0.592045,0.675239,0.509091,24,4,0.166667
65685,LTP393,scalp,ltpFR2,9,0,9,19,-0.082352,0.625890,0.980921,0.423972,24,5,0.208333
65686,LTP393,scalp,ltpFR2,9,0,9,20,0.231738,0.404016,0.664905,0.522384,24,8,0.333333
65687,LTP393,scalp,ltpFR2,9,0,9,22,0.281435,0.713786,0.689741,0.688577,24,5,0.208333


In [38]:
df_beh.to_csv('df_cl_pr.csv', index=False)