## Meta Analyses

In [1]:
# imports
import pandas as pd
import numpy as np
import json

### Participant Counts

#### Completion of Each Session

In [2]:
s0 = pd.read_csv('data/data_storage/events_df_0.csv')
s1 = pd.read_csv('data/data_storage/events_df_1.csv')
s2 = pd.read_csv('data/data_storage/events_df_2.csv')
s3 = pd.read_csv('data/data_storage/events_df_3.csv')
s4 = pd.read_csv('data/data_storage/events_df_4.csv')

len(s0.worker_id.unique()), len(s1.worker_id.unique()), len(s2.worker_id.unique()), len(s3.worker_id.unique()), len(s4.worker_id.unique())

(1184, 547, 463, 401, 353)

#### Used for Analyses

After checks for passing all exclusion criteria, same condition for return sessions.

In [3]:
df = pd.read_csv('data/data_storage/dataframe.csv')
len(df.worker_id.unique())

456

#### Recall Initiation Groups

In [12]:
r1_groups = pd.read_csv('analyses/dataframes/r1_groups.csv')
r1_groups.groupby(['strat'])['subject'].count(), r1_groups.groupby(['condition', 'strat'])['subject'].count()

(strat
 ns      136
 prim    174
 rec     146
 Name: subject, dtype: int64,
 condition  strat
 10-2       ns       21
            prim     29
            rec      20
 15-2       ns       32
            prim     39
            rec      22
 20-1       ns       23
            prim     23
            rec      24
 20-2       ns       18
            prim     32
            rec      24
 30-1       ns       22
            prim     25
            rec      29
 40-1       ns       20
            prim     26
            rec      27
 Name: subject, dtype: int64)

### Ages

Mean and standard deviation for recall initiation groups.

In [6]:
with open('data/data_storage/age_dict_0.json', 'r') as f:
    ad0 = json.load(f)
    
with open('data/data_storage/age_dict_2.json', 'r') as f:
    ad2 = json.load(f)
    
with open('data/data_storage/age_dict_4.json', 'r') as f:
    ad4 = json.load(f)
    
    
age_df_0 = pd.DataFrame(list(ad0.items()), columns=['subject', 'age'])
age_df_2 = pd.DataFrame(list(ad2.items()), columns=['subject', 'age'])
age_df_4 = pd.DataFrame(list(ad4.items()), columns=['subject', 'age'])

age_df = pd.concat([age_df_0, age_df_2, age_df_4], ignore_index=True).drop_duplicates()

In [7]:
# convert from strings
# remove values >= 100
ages = []
for x in age_df.age:
    try:
        age = int(x)
        if age < 100:
            ages.append(age)
        else:
            ages.append(np.nan)
    except BaseException:
        ages.append(np.nan)
        
age_df['age'] = ages

# multiple responses, take mean
age_df = age_df.groupby('subject')['age'].mean().reset_index()

ages = []
for _, row in r1_groups.iterrows():
    age_data = age_df[age_df.subject == row.subject]
    if len(age_data) == 1:
        ages.append(age_data.iloc[0].age)
    else:
        ages.append(np.nan)
        
r1_groups['age'] = ages

In [8]:
r1_groups.groupby('strat')['age'].mean(), r1_groups.groupby('strat')['age'].std(ddof=1), r1_groups.groupby('strat')['age'].count()

(strat
 ns      38.451613
 prim    38.259036
 rec     37.738281
 Name: age, dtype: float64,
 strat
 ns      11.398521
 prim    11.702334
 rec     10.130442
 Name: age, dtype: float64,
 strat
 ns      124
 prim    166
 rec     128
 Name: age, dtype: int64)