## Analyses

In [1]:
# imports
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import scipy.stats
import analyses.analyses as analyses
import warnings; warnings.filterwarnings('ignore')

In [2]:
# all processed data
dataframe = pd.read_csv('data/data_storage/dataframe.csv')

# wordpool
wordpool = list(np.loadtxt('data/wordpool/wordpool_ltpFR3.txt', dtype=str))
w2v_scores = np.loadtxt('data/wordpool/w2v_scores_ltpFR3.txt')

# conditions
condition_map = {
    0.0: '10-2',
    1.0: '20-1',
    2.0: '15-2',
    3.0: '20-2',
    4.0: '30-1',
    5.0: '40-1'
}

### (A) All Data Together

#### Standard Free Recall Analyses

1. Serial Position Curve
2. Primacy and Recency Effect
3. Probability of First Recall
4. Primacy and Recency Initiation Bias


#### Motivation for Recall Initiation Groups

5. Correlation of PFR and SPC
6. Within Session Recall Initiation Variance
7. Change in Recall Initiation Serial Position Across Sessions

#### A.1) Serial Position Curve

In [3]:
spc_data_all = analyses.spc_all(dataframe, condition_map)
spc_data_bsa_all = analyses.spc_btwn_subj_avg_all(spc_data_all)
spc_data_bsa_all.to_csv('analyses/dataframes/spc_data_bsa_all.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

#### A.2) Primacy and Recency Effect

In [4]:
spc_prim_rec_lr_all = spc_data_bsa_all.apply(analyses.prim_rec_lr, axis=1)
spc_prim_rec_lr_all.to_csv('analyses/dataframes/spc_prim_rec_lr_all.csv', index=False)

#### A.3) Probability of First Recall

In [5]:
# all trials, including R1 intrusions
pfr_data_all = analyses.pfr_all(dataframe, condition_map, toggle=False)
pfr_data_bsa_all = analyses.pfr_btwn_subj_avg_all(pfr_data_all)
pfr_data_bsa_all.to_csv('analyses/dataframes/pfr_data_bsa_all.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

In [6]:
# only trials initiated with correct recall
pfr_data_only_cr_all = analyses.pfr_all(dataframe, condition_map, toggle=True)
pfr_data_only_cr_bsa_all = analyses.pfr_btwn_subj_avg_all(pfr_data_only_cr_all)
pfr_data_only_cr_bsa_all.to_csv('analyses/dataframes/pfr_data_only_cr_bsa_all.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

#### A.4) Primacy and Recency Initiation Bias

In [7]:
prim_rec_pfr = analyses.pfr_primacy_recency_bias(pfr_data_bsa_all)
prim_rec_pfr.to_csv('analyses/dataframes/prim_rec_pfr.csv', index=False)

0it [00:00, ?it/s]

#### A.5) Correlation of PFR and SPC

In [8]:
pfr_spc_corrs = analyses.pfr_spc_correlation(pfr_data_bsa_all, spc_data_bsa_all)
pfr_spc_corrs.to_csv('analyses/dataframes/pfr_spc_corrs.csv', index=False)

#### A.6) Within Session Recall Initiation Variance

In [9]:
r1_var_data = analyses.r1_variance(dataframe, condition_map, pfr_data_only_cr_bsa_all)
r1_var_data_bsa = analyses.r1_variance_btwn_subj_avg(r1_var_data)
r1_var_data_bsa.to_csv('analyses/dataframes/r1_var_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

#### A.7) Change in Recall Initiation Serial Position Across Sessions

In [10]:
r1_sp_dec_data = analyses.r1_sp_dec(dataframe, condition_map)
r1_sp_dec_data_bsa = analyses.r1_sp_dec_btwn_subj_avg(r1_sp_dec_data)
r1_sp_dec_data_bsa.to_csv('analyses/dataframes/r1_sp_dec_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

### (B) Recall Initiation Groups

#### Partition by PFR and Apply Labels

#### Re-Run Serial Position Analyses
1. Serial Position Curve
2. Probability of First Recall

#### Recall Performance
3. Mean Words Recalled

#### Recall Initiation
4. Proportion of Lists Initiated with Intrusion
5. Probability of 2nd Recall Following Intrusion
6. Initial Response Times

#### Recall Dynamics
7. Intrusion Rates (ELI, PLI)
8. Inter-Response Times
9. Temporal Clustering Score
10. Lag-CRP
11. Semantic Clustering Score

In [11]:
# partition groups by PFR
# only trials initiated with correct recall
r1_groups = pfr_data_only_cr_bsa_all.apply(analyses.r1_groups_partition, axis=1)
r1_groups.to_csv('analyses/dataframes/r1_groups.csv', index=False)

In [12]:
r1_groups.groupby('strat')['strat'].count(), r1_groups.groupby(['condition', 'strat'])['strat'].count()

(strat
 ns      137
 prim    175
 rec     146
 Name: strat, dtype: int64,
 condition  strat
 10-2       ns       21
            prim     29
            rec      20
 15-2       ns       32
            prim     39
            rec      22
 20-1       ns       23
            prim     24
            rec      24
 20-2       ns       19
            prim     32
            rec      24
 30-1       ns       22
            prim     25
            rec      29
 40-1       ns       20
            prim     26
            rec      27
 Name: strat, dtype: int64)

In [13]:
### apply recall initiation group labels to dataframe
df_strat = analyses.apply_recall_initiation_labels(r1_groups, dataframe)
df_strat.to_csv('data/data_storage/df_strat.csv', index=False)

#### B.1) Serial Position Curve

In [14]:
spc_data = analyses.spc(df_strat, condition_map)
spc_data_bsa = analyses.spc_btwn_subj_avg(spc_data)
spc_data_bsa.to_csv('analyses/dataframes/spc_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

#### B.2) Probability of First Recall

In [15]:
# all trials, including R1 intrusions
pfr_data = analyses.pfr(df_strat, condition_map, toggle=False)
pfr_data_bsa = analyses.pfr_btwn_subj_avg(pfr_data)
pfr_data_bsa.to_csv('analyses/dataframes/pfr_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

#### B.3) Mean Words Recalled

In [16]:
mwr_data = analyses.mwr(df_strat, condition_map)
mwr_data_bsa = analyses.mwr_btwn_subj_avg(mwr_data)
mwr_data_bsa.to_csv('analyses/dataframes/mwr_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

#### B.4) Proportion of Lists Initiated with Intrusion

In [17]:
r1_intr_data = analyses.r1_intrusion(df_strat, condition_map)
r1_intr_data_bsa = analyses.r1_intr_btwn_subj_avg(r1_intr_data)
r1_intr_data_bsa.to_csv('analyses/dataframes/r1_intr_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

#### B.5) Probability of 2nd Recall Following Intrusion

In [18]:
p2r_intr_data = analyses.p2r_intr(df_strat, condition_map)
p2r_intr_data_bsa = analyses.p2r_intr_btwn_subj_avg(p2r_intr_data)
p2r_intr_data_bsa.to_csv('analyses/dataframes/p2r_intr_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

#### B.6) Initial Response Times

In [19]:
# individual trials (histogram)
# only trials initiated with correct recall
rti_at_data_only_cr = analyses.rt_init_all_trials(df_strat, condition_map, toggle=True)
rti_at_data_only_cr.to_csv('analyses/dataframes/rti_at_data_only_cr.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

In [22]:
# averages
# only trials initiated with correct recall
rti_data_only_cr = analyses.rt_init(df_strat, condition_map, toggle=True)
rti_data_only_cr_bsa = analyses.rti_btwn_subj_avg(rti_data_only_cr)
rti_data_only_cr_bsa.to_csv('analyses/dataframes/rti_data_only_cr_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

#### B.7) Intrusion Rates (ELI, PLI)

In [23]:
# only lists initiated with a correct recall
intr_data_only_cr = analyses.intrusion_rates(df_strat, condition_map, toggle=True)
intr_data_only_cr_bsa = analyses.intrusion_rates_btwn_subj_avg(intr_data_only_cr)
intr_data_only_cr_bsa.to_csv('analyses/dataframes/intr_data_only_cr_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

#### B.8) Inter-Response Times

In [24]:
# joint function of number of correct recalls and output position
irt_data = analyses.irt(df_strat, condition_map)
irt_data_bsa = analyses.irt_btwn_subj_avg(irt_data)
irt_data_bsa.to_csv('analyses/dataframes/irt_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/4206 [00:00<?, ?it/s]

In [27]:
# final N recalls of trial
# only trials with mu +/- sig correct recalls
mwr_md = []
for c, group in mwr_data_bsa.groupby('condition'):
    mwr_md.append((c, np.mean(group.mwr), np.std(group.mwr, ddof=1)))
    
mwr_md = pd.DataFrame(mwr_md, columns=['condition', 'mwr', 'stdev'])
mwr_md['lb'] = np.floor(mwr_md['mwr'] - mwr_md['stdev']).astype(int)
mwr_md['ub'] = np.ceil(mwr_md['mwr'] + mwr_md['stdev']).astype(int)

irt_final_data = analyses.irt_final(df_strat, condition_map, mwr_md)
irt_final_data_bsa = analyses.irt_final_btwn_subj_avg(irt_final_data, mwr_md)
irt_final_data_bsa.to_csv('analyses/dataframes/irt_final_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

In [28]:
# total time for each half of correct recalls
irt_tot_data = analyses.irt_tot(df_strat, condition_map)
irt_tot_data_bsa = analyses.irt_tot_btwn_subj_avg(irt_tot_data)
irt_tot_data_bsa.to_csv('analyses/dataframes/irt_tot_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/3981 [00:00<?, ?it/s]

#### B.9) Temporal Clustering Score

In [29]:
# all recalls
tcl_data = analyses.tcl(df_strat, condition_map, 0)
tcl_data_bsa = analyses.tcl_btwn_subj_avg(tcl_data)
tcl_data_bsa.to_csv('analyses/dataframes/tcl_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

In [30]:
# split half of recalls
tcl_h_data = analyses.tcl_h(df_strat, condition_map)
tcl_h_data_bsa = analyses.tcl_h_btwn_subj_avg(tcl_h_data)
tcl_h_data_bsa.to_csv('analyses/dataframes/tcl_h_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

#### B.10) Lag-CRP

In [31]:
# full lag-CRP, exlude first 3 recalls
lcrp_data = analyses.lag_crp(df_strat, condition_map, 3)
lcrp_data_bsa = analyses.lag_crp_btwn_subj_avg(lcrp_data)
lcrp_data_bsa.to_csv('analyses/dataframes/lcrp_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

In [32]:
# conditioned on serial position
lcrp_sp_data = analyses.lag_crp_sp(df_strat, condition_map, 0)
lcrp_sp_data_bsa = analyses.lag_crp_sp_btwn_subj_avg(lcrp_sp_data)
lcrp_sp_data_bsa.to_csv('analyses/dataframes/lcrp_sp_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/10215 [00:00<?, ?it/s]

#### B.11) Semantic Clustering Score

In [33]:
scl_data = analyses.scl(df_strat, condition_map, 0, wordpool, w2v_scores)
scl_data_bsa = analyses.scl_btwn_subj_avg(scl_data)
scl_data_bsa.to_csv('analyses/dataframes/scl_data_bsa.csv', index=False)

  0%|          | 0/1660 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]

### (C) Other Group Analyses

Compare trials where recall is initiated with a primacy v. recency item, Other group.

1. Mean Words Recalled
2. Initial Response Times
3. Temporal Clustering Score

#### C.1) Mean Words Recalled

In [34]:
mwr_ns_data = analyses.mwr_ns(df_strat, condition_map)
mwr_ns_data_bsa = analyses.mwr_ns_btwn_subj_avg(mwr_ns_data)

# only participants with data in both bins
mwr_ns_data_bsa = mwr_ns_data_bsa.dropna()

# re-organize dataframe
mwr_ns_data_bsa = pd.melt(mwr_ns_data_bsa, id_vars=['subject', 'strategy', 'condition', 'l_length', 'pres_rate'],
                          value_vars=['mwr_prim', 'mwr_rec'], var_name='r1_label', value_name='mwr')

mwr_ns_data_bsa.to_csv('analyses/dataframes/mwr_ns_data_bsa.csv', index=False)

  0%|          | 0/489 [00:00<?, ?it/s]

  0%|          | 0/137 [00:00<?, ?it/s]

#### C.2) Initial Response Times

In [36]:
rti_ns_data = analyses.rt_init_ns(rti_at_data_only_cr)
rti_ns_data_bsa = analyses.rti_ns_btwn_subj_avg(rti_ns_data)

# only participants with data in both bins
rti_ns_data_bsa = rti_ns_data_bsa.dropna()

# re-organize dataframe
rti_ns_data_bsa = pd.melt(rti_ns_data_bsa, id_vars=['subject', 'strategy', 'condition', 'l_length', 'pres_rate'],
                          value_vars=['rt_prim', 'rt_rec'], var_name='r1_label', value_name='rt')

rti_ns_data_bsa.to_csv('analyses/dataframes/rti_ns_data_bsa.csv', index=False)

  0%|          | 0/489 [00:00<?, ?it/s]

  0%|          | 0/137 [00:00<?, ?it/s]

#### C.3) Temporal Clustering Score

In [37]:
tcl_ns_data = analyses.tcl_ns(df_strat, condition_map)
tcl_ns_data_bsa = analyses.tcl_ns_btwn_subj_avg(tcl_ns_data)

# only participants with data in both bins
tcl_ns_data_bsa = tcl_ns_data_bsa.dropna()

# re-organize dataframe
tcl_ns_data_bsa = pd.melt(tcl_ns_data_bsa, id_vars=['subject', 'strategy', 'condition', 'l_length', 'pres_rate'],
                          value_vars=['tcl_prim', 'tcl_rec'], var_name='r1_label', value_name='tcl')

tcl_ns_data_bsa.to_csv('analyses/dataframes/tcl_ns_data_bsa.csv', index=False)

  0%|          | 0/489 [00:00<?, ?it/s]

  0%|          | 0/137 [00:00<?, ?it/s]