# curbage_recall Analyses
## Aging shifts recall of naturalistic events from temporal to topic organization
#### Angelique I. Delarazan, Katherine March, Elena Markantonakis, June Dy, and Zach Reagh

## Set Up

### Import Packages

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
from scipy import stats
import itertools
import quail
import statsmodels.api as sm 
from statsmodels.formula.api import ols 
from statsmodels.stats.anova import AnovaRM
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.graphics.regressionplots import plot_partregress_grid
import pingouin as pg
from pingouin import anova as pg_anova
from psifr import fr
import tensorflow as tf
import tensorflow_hub as hub
import rpy2
from scipy.spatial.distance import squareform
from scipy.stats import ttest_ind
from scipy.stats import mannwhitneyu
from scipy.stats import chisquare
from pingouin import mixed_anova
import statsmodels.api as sm 
from statsmodels.formula.api import ols 
from scipy.stats.stats import pearsonr
import ptitprince as pt

  **kwargs
  **kwargs


### Load Data

#### Read in Recall

In [5]:
recall = pd.read_csv('~/Box Sync/aidelarazan_box/Projects/curbage_recall/github/data/curbage_recall_sub-all_group-all_task-main_desc-recall.csv')
recall.sort_values(by=['subject', 'recall_order'], inplace=True)
recall = recall[['subject', 'group', 'version', 'event', 'recall_description', 'recall_order', 'scene', 'scene_location', 'theme', 'word_count']]
recall.sort_values(by=['subject', 'recall_order'], inplace=True)
recall['recall_order'] = recall.groupby('subject').cumcount() + 1
recall = recall.reset_index(drop=True)
recall = recall.rename(columns={'recall_description': 'description', 'recall_order': 'position', 'event':'item'})
recall = recall.assign(trial_type='recall')
recall = recall.assign(list=1)
recall['item'] = recall['item'].astype(str)
recall['position'] = recall['position'].astype(int) 
recall = recall[['subject', 'group', 'version', 'list', 'trial_type', 'item', 'scene', 'scene_location', 'theme', 'word_count', 'position', 'description']]
subjects = recall['subject'].unique().tolist()
recall.head()

Unnamed: 0,subject,group,version,list,trial_type,item,scene,scene_location,theme,word_count,position,description
0,101,older,exp2,1,recall,A1,A,Walking on the Street,Caterer,30,1,"Okay, this was an episode of Curb Your Enthusi..."
1,101,older,exp2,1,recall,A3,A,Walking on the Street,Caterer,10,2,that he was carrying that was actually tuna fi...
2,101,older,exp2,1,recall,A2,A,Walking on the Street,Chevy,22,3,and Jeff his friends said that he was going to...
3,101,older,exp2,1,recall,A3,A,Walking on the Street,Caterer,26,4,"and then larry, as they were they're walking d..."
4,101,older,exp2,1,recall,other,C,Jeff's Office,Other,42,5,and they go upstairs or they go to the office ...


#### Read in Encoding

In [7]:
encoding = pd.read_csv('~/Box Sync/aidelarazan_box/Projects/curbage_recall/github/data/curbage_recall_desc-annotations.csv')
encoding['event_num'] = encoding['event_num'].astype(int)
encoding.sort_values(by=['event_num'], inplace=True)
encoding = encoding.rename(columns={'event_description': 'description', 'event_num': 'position', 'event':'item'})
encoding = encoding.assign(trial_type='study')
encoding = encoding.assign(list=1)
encoding['item'] = encoding['item'].astype(str)
encoding.head()

Unnamed: 0,item,position,scene,scene_location,theme,description,trial_type,list
0,A1,1,A,Walking on the Street,Caterer,Larry and Jeff are walking down the street. La...,study,1
1,A2,2,A,Walking on the Street,Chevy,Jeff tells Larry that he is getting a 57 Chevy...,study,1
2,A3,3,A,Walking on the Street,Caterer,A homeless man holding a cup interrupts Larry ...,study,1
3,B1,4,B,The David's Back Patio,Other,Larry walks through his kitchen and into the p...,study,1
4,B2,5,B,The David's Back Patio,Dinner_Party,Julie gets up to leave and mentions the dinner...,study,1


In [8]:
encoding_tmp = []

for subject in subjects:
    current_df = encoding.copy()
    current_df['subject'] = subject
    encoding_tmp.append(current_df)

encoded_story = pd.concat(encoding_tmp)
encoded_story = encoded_story.dropna()
encoded_story['group'] = encoded_story['subject'].apply(lambda x: 'younger' if 200 < x < 400 else 'older')
encoded_story['version'] = encoded_story['subject'].apply(lambda x: 'exp1' if x > 300 else 'exp2')
encoded_story['word_count'] = encoded_story['description'].str.split().str.len()
encoded_story = encoded_story[['subject', 'group', 'version', 'list', 'trial_type', 'item', 'scene', 'scene_location', 'theme', 'position', 'description', 'word_count']]
encoded_story.head()

Unnamed: 0,subject,group,version,list,trial_type,item,scene,scene_location,theme,position,description,word_count
0,101,older,exp2,1,study,A1,A,Walking on the Street,Caterer,1,Larry and Jeff are walking down the street. La...,14
1,101,older,exp2,1,study,A2,A,Walking on the Street,Chevy,2,Jeff tells Larry that he is getting a 57 Chevy...,14
2,101,older,exp2,1,study,A3,A,Walking on the Street,Caterer,3,A homeless man holding a cup interrupts Larry ...,60
3,101,older,exp2,1,study,B1,B,The David's Back Patio,Other,4,Larry walks through his kitchen and into the p...,65
4,101,older,exp2,1,study,B2,B,The David's Back Patio,Dinner_Party,5,Julie gets up to leave and mentions the dinner...,13


In [9]:
story = pd.concat([encoded_story, recall])
story.head()

Unnamed: 0,subject,group,version,list,trial_type,item,scene,scene_location,theme,position,description,word_count
0,101,older,exp2,1,study,A1,A,Walking on the Street,Caterer,1,Larry and Jeff are walking down the street. La...,14
1,101,older,exp2,1,study,A2,A,Walking on the Street,Chevy,2,Jeff tells Larry that he is getting a 57 Chevy...,14
2,101,older,exp2,1,study,A3,A,Walking on the Street,Caterer,3,A homeless man holding a cup interrupts Larry ...,60
3,101,older,exp2,1,study,B1,B,The David's Back Patio,Other,4,Larry walks through his kitchen and into the p...,65
4,101,older,exp2,1,study,B2,B,The David's Back Patio,Dinner_Party,5,Julie gets up to leave and mentions the dinner...,13


In [11]:
story_df = fr.merge_free_recall(story, study_keys=['group', 'version', 'theme', 'scene_location'])
story_df.head()

Unnamed: 0,subject,list,item,input,output,study,recall,repeat,intrusion,group,version,theme,scene_location,prior_list,prior_input
0,101,1,A1,1.0,1.0,True,True,0,False,older,exp2,Caterer,Walking on the Street,,
1,101,1,A2,2.0,3.0,True,True,0,False,older,exp2,Chevy,Walking on the Street,,
2,101,1,A3,3.0,2.0,True,True,0,False,older,exp2,Caterer,Walking on the Street,,
3,101,1,A3,3.0,4.0,False,True,1,False,older,exp2,Caterer,Walking on the Street,,
4,101,1,B1,4.0,9.0,True,True,0,False,older,exp2,Other,The David's Back Patio,,


### Statistics

#### Recall Performance (Word Count)

In [12]:
story[story['trial_type']=='recall'].groupby(['subject', 'group', 'version'])['word_count'].sum().reset_index()

Unnamed: 0,subject,group,version,word_count
0,101,older,exp2,2562
1,102,older,exp2,452
2,107,older,exp2,1226
3,108,older,exp2,1157
4,113,older,exp2,2025
...,...,...,...,...
79,417,older,exp1,953
80,418,older,exp1,2752
81,419,older,exp1,1699
82,420,older,exp1,246


In [13]:
model = smf.mixedlm(
    data = story[story['trial_type']=='recall'], 
    formula = "word_count ~ group", 
    groups = story[story['trial_type']=='recall']['version'],
    )
result = model.fit(reml=True)  # Fit the model with REML (restricted maximum likelihood)

print(result.summary())


           Mixed Linear Model Regression Results
Model:              MixedLM Dependent Variable: word_count 
No. Observations:   2416    Method:             REML       
No. Groups:         2       Scale:              3039.8426  
Min. group size:    1074    Log-Likelihood:     -13114.0617
Max. group size:    1342    Converged:          Yes        
Mean group size:    1208.0                                 
-----------------------------------------------------------
                  Coef.  Std.Err.   z   P>|z| [0.025 0.975]
-----------------------------------------------------------
Intercept         51.622    8.860 5.827 0.000 34.258 68.986
group[T.younger]   1.149    2.272 0.505 0.613 -3.305  5.602
Group Var        151.152    3.942                          



#### Lag-CRP

In [14]:
lagcrp = fr.lag_crp(story_df).reset_index()
lagcrp['lag'] = lagcrp['lag'].astype(int)
lagcrp['group'] = lagcrp['subject'].apply(lambda x: 'younger' if 200 < x < 400 else 'older')
lagcrp['version'] = lagcrp['subject'].apply(lambda x: 'exp1' if x > 300 else 'exp2')
lagcrp['direction'] = lagcrp['lag'].apply(lambda x: 'forward' if x > 0 else 'backward')
lagcrp.head()

Unnamed: 0,subject,lag,prob,actual,possible,group,version,direction
0,101,-36,,0,0,older,exp2,backward
1,101,-35,,0,0,older,exp2,backward
2,101,-34,,0,0,older,exp2,backward
3,101,-33,,0,0,older,exp2,backward
4,101,-32,,0,0,older,exp2,backward


Forward Asymmetry (+1)

In [15]:
model = smf.mixedlm(
    data = lagcrp[lagcrp['lag']==1], 
    formula = "prob ~ group", 
    groups = lagcrp[lagcrp['lag']==1]['version'],
    )
result = model.fit(reml=True)

print(result.summary())


          Mixed Linear Model Regression Results
Model:               MixedLM  Dependent Variable:  prob   
No. Observations:    84       Method:              REML   
No. Groups:          2        Scale:               0.0205 
Min. group size:     41       Log-Likelihood:      39.3872
Max. group size:     43       Converged:           No     
Mean group size:     42.0                                 
----------------------------------------------------------
                 Coef. Std.Err.   z    P>|z| [0.025 0.975]
----------------------------------------------------------
Intercept        0.400    0.023 17.281 0.000  0.355  0.446
group[T.younger] 0.131    0.031  4.203 0.000  0.070  0.192
Group Var        0.000                                    





### Temporal Clustering

In [18]:
temporal_cluster = fr.lag_rank(story_df).reset_index()
temporal_cluster['group'] = temporal_cluster['subject'].apply(lambda x: 'younger' if 200 < x < 400 else 'older')
temporal_cluster['version'] = temporal_cluster['subject'].apply(lambda x: 'exp1' if x > 300 else 'exp2')
temporal_cluster.head()

Unnamed: 0,subject,rank,group,version
0,101,0.794559,older,exp2
1,102,0.652631,older,exp2
2,107,0.914802,older,exp2
3,108,0.870524,older,exp2
4,113,0.912836,older,exp2


In [19]:
model = smf.mixedlm(
    data = temporal_cluster, 
    formula = "rank ~ group", 
    groups = temporal_cluster['version'],
)
result = model.fit(reml=True)

print(result.summary())


          Mixed Linear Model Regression Results
Model:              MixedLM  Dependent Variable:  rank    
No. Observations:   84       Method:              REML    
No. Groups:         2        Scale:               0.0044  
Min. group size:    41       Log-Likelihood:      102.5315
Max. group size:    43       Converged:           Yes     
Mean group size:    42.0                                  
----------------------------------------------------------
                 Coef. Std.Err.   z    P>|z| [0.025 0.975]
----------------------------------------------------------
Intercept        0.851    0.011 78.478 0.000  0.829  0.872
group[T.younger] 0.073    0.015  4.900 0.000  0.044  0.102
Group Var        0.000    0.004                           





### Topic Clustering Scores

In [20]:
topic_cluster = fr.category_clustering(story_df, category_key='theme').reset_index()
topic_cluster['group'] = topic_cluster['subject'].apply(lambda x: 'younger' if 200 < x < 400 else 'older')
topic_cluster['version'] = topic_cluster['subject'].apply(lambda x: 'exp1' if x > 300 else 'exp2')
topic_cluster.head()

Unnamed: 0,subject,lbc,arc,group,version
0,101,5.1875,0.366906,older,exp2
1,102,2.166667,0.470588,older,exp2
2,107,1.958333,0.100977,older,exp2
3,108,5.5,0.315068,older,exp2
4,113,2.041667,0.077449,older,exp2


#### Adjustect Category Clustering
[Roenker et al., 1971]('https://web.p.ebscohost.com/ehost/pdfviewer/pdfviewer?vid=0&sid=41517a60-7967-4661-a691-113ac6ffddb2%40redis'):
The computational formula for the ARC score is as follows:

ARC = (R ~ E(R))/maxR - E(R)

* R = total number of observed category repetitions (i.e., the number of times a category item follows an item from the same category)
* maxR = maximum possible number of category repetitions
* E(R) = expected (chance) number of category repetitions


In [21]:
model = smf.mixedlm(
    data = topic_cluster, 
    formula = "arc ~ group", 
    groups = topic_cluster['version'],
)
result = model.fit(reml=True)

print(result.summary())


           Mixed Linear Model Regression Results
Model:              MixedLM   Dependent Variable:   arc    
No. Observations:   84        Method:               REML   
No. Groups:         2         Scale:                0.0278 
Min. group size:    41        Log-Likelihood:       26.7685
Max. group size:    43        Converged:            Yes    
Mean group size:    42.0                                   
-----------------------------------------------------------
                 Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-----------------------------------------------------------
Intercept         0.256    0.028  9.159 0.000  0.201  0.310
group[T.younger] -0.085    0.037 -2.289 0.022 -0.157 -0.012
Group Var         0.000    0.006                           





### Narrative Centrality

#### Causal Centrality

In [159]:
centrality_ratings = pd.read_csv('/Users/aidelarazan/Box Sync/aidelarazan_box/Projects/curbage_recall/github/data/curbage_recall_sub-all_group-younger_task-narrative_centrality_desc-ratings.csv')

centrality_ratings['centrality'] = pd.qcut(
    centrality_ratings['cause_count'], 
    q=[0, 0.4, 0.6, 1.0],
    labels=['Low', 'Med', 'High']
)
centrality_ratings.sort_values(by='input').reset_index(drop=True)
centrality_ratings.head()

Unnamed: 0,input,cause_count,effect_count,total_count,centrality
0,20,0,0,0,Low
1,25,1,0,1,Low
2,1,4,0,4,Low
3,14,1,5,6,Low
4,3,3,4,7,Low


In [155]:
causal_centrality = pd.merge(centrality_ratings, story_df, on='input', how='inner')
causal_centrality['recall'] = causal_centrality['recall'].astype(int)
causal_centrality_avg = causal_centrality.groupby(['subject', 'centrality'])['recall'].mean().reset_index()
causal_centrality_avg['group'] = causal_centrality_avg['subject'].apply(lambda x: 'younger' if 200 < x < 400 else 'older')
causal_centrality_avg['version'] = causal_centrality_avg['subject'].apply(lambda x: 'exp1' if x > 300 else 'exp2')
causal_centrality_avg = causal_centrality_avg[causal_centrality_avg['centrality']!='Med']
causal_centrality_avg['centrality'] = causal_centrality_avg['centrality'].astype(str)
causal_centrality_avg.reset_index(drop=True, inplace=True)
causal_centrality_avg.tail()

Unnamed: 0,subject,centrality,recall,group,version
163,419,High,0.888889,older,exp1
164,420,Low,0.125,older,exp1
165,420,High,0.411765,older,exp1
166,421,Low,0.5,older,exp1
167,421,High,0.8,older,exp1


In [158]:
model = smf.mixedlm(
    formula="recall ~ group + centrality", 
    data=causal_centrality_avg, 
    groups=causal_centrality_avg['version']
)
result = model.fit(reml=True)

print(result.summary())


            Mixed Linear Model Regression Results
Model:                MixedLM   Dependent Variable:   recall 
No. Observations:     168       Method:               REML   
No. Groups:           2         Scale:                0.0227 
Min. group size:      82        Log-Likelihood:       72.0445
Max. group size:      86        Converged:            Yes    
Mean group size:      84.0                                   
-------------------------------------------------------------
                  Coef.  Std.Err.    z    P>|z| [0.025 0.975]
-------------------------------------------------------------
Intercept          0.758    0.021  36.105 0.000  0.717  0.799
group[T.younger]   0.074    0.024   3.044 0.002  0.026  0.121
centrality[T.Low] -0.278    0.023 -11.970 0.000 -0.324 -0.232
Group Var          0.000    0.005                            





#### Semantic Centrality

##### Universal Sentence Encoder Modules

In [188]:
module_url = "https://tfhub.dev/google/universal-sentence-encoder/4" #@param ["https://tfhub.dev/google/universal-sentence-encoder/4", "https://tfhub.dev/google/universal-sentence-encoder-large/5"]
model = hub.load(module_url)
print ("module %s loaded" % module_url)
def embed(input):
  return model(input)

module https://tfhub.dev/google/universal-sentence-encoder/4 loaded


In [189]:
encoding = pd.read_csv('~/Box Sync/aidelarazan_box/Projects/curbage_recall/github/data/curbage_recall_desc-annotations.csv')
encoding['event_num'] = encoding['event_num'].astype(int)
encoding.sort_values(by=['event_num'], inplace=True)
encoding = encoding.rename(columns={'event_description': 'description', 'event_num': 'position', 'event':'item'})
encoding = encoding.assign(trial_type='study')
encoding = encoding.assign(list=1)
encoding['item'] = encoding['item'].astype(str)
encoding.head()

Unnamed: 0,item,position,scene,scene_location,theme,description,trial_type,list
0,A1,1,A,Walking on the Street,Caterer,Larry and Jeff are walking down the street. La...,study,1
1,A2,2,A,Walking on the Street,Chevy,Jeff tells Larry that he is getting a 57 Chevy...,study,1
2,A3,3,A,Walking on the Street,Caterer,A homeless man holding a cup interrupts Larry ...,study,1
3,B1,4,B,The David's Back Patio,Other,Larry walks through his kitchen and into the p...,study,1
4,B2,5,B,The David's Back Patio,Dinner_Party,Julie gets up to leave and mentions the dinner...,study,1


In [223]:
threshold = 0.59
semantic_centrality = []

# Iterate through each pair of event descriptions
for i in range(len(encoding)):
    for j in range(i + 1, len(encoding)):
        position_i = encoding.loc[i, 'position']
        position_j = encoding.loc[j, 'position']

        scene_i = encoding.loc[i, 'scene']
        scene_j = encoding.loc[j, 'scene']

        scene_location_i = encoding.loc[i, 'scene_location']
        scene_location_j = encoding.loc[j, 'scene_location']

        theme_i = encoding.loc[i, 'theme']
        theme_j = encoding.loc[j, 'theme']

        description_i = encoding.loc[i, 'description']
        description_j = encoding.loc[j, 'description']
        
        # Compute embeddings for both event descriptions
        event_description_embedding_i = np.array(embed([description_i])).tolist()
        event_description_embedding_j = np.array(embed([description_j])).tolist()
        
        # Compute correlation between embeddings
        correlation = np.inner(event_description_embedding_i, event_description_embedding_j)[0][0]
        
        # Store the pair and their correlation
        curr_dict = {
            'eventA_position': position_i,
            'eventB_position': position_j,
            'eventA_scene': scene_i,
            'eventB_scene': scene_j,
            'eventA_scene_location': scene_location_i,
            'eventB_scene_location': scene_location_j,
            'eventA_theme': theme_i,
            'eventB_theme': theme_j,
            'eventA_description': description_i,
            'eventB_description': description_j,
            'correlation': correlation
        }
        semantic_centrality.append(curr_dict)

semantic_centrality = pd.DataFrame(semantic_centrality)
semantic_centrality = semantic_centrality[semantic_centrality['correlation'] > threshold].reset_index(drop=True)
semantic_centrality.head()


Unnamed: 0,eventA_position,eventB_position,eventA_scene,eventB_scene,eventA_scene_location,eventB_scene_location,eventA_theme,eventB_theme,eventA_description,eventB_description,correlation
0,1,11,A,C,Walking on the Street,Jeff's Office,Caterer,Caterer,Larry and Jeff are walking down the street. La...,Jeff and Larry are in Jeff's office and Jeff t...,0.623495
1,1,16,A,E,Walking on the Street,Dinner Party,Caterer,Dinner_Party,Larry and Jeff are walking down the street. La...,"At the dinner party, Larry talks to a couple w...",0.591348
2,1,30,A,I,Walking on the Street,Jeff's Kitchen,Caterer,Chevy,Larry and Jeff are walking down the street. La...,Jeff brings up a message that Larry left him a...,0.680375
3,2,12,A,C,Walking on the Street,Jeff's Office,Chevy,Chevy,Jeff tells Larry that he is getting a 57 Chevy...,Jeff moves to the couch and tells Larry that h...,0.659799
4,2,29,A,I,Walking on the Street,Jeff's Kitchen,Chevy,Caterer,Jeff tells Larry that he is getting a 57 Chevy...,Jeff is eating the leftover chicken when Larry...,0.596201


In [233]:
sorted_semantic_centrality = semantic_centrality.sort_values('correlation').reset_index(drop=True)
event_similarity_values = pd.concat([sorted_semantic_centrality['eventA_position'], 
                                      sorted_semantic_centrality['eventB_position']])
event_similarity_counts = event_similarity_values.value_counts().reset_index()
event_similarity_counts.columns = ['input', 'count']
event_similarity_counts.tail(60) 


Unnamed: 0,input,count
0,30,12
1,24,11
2,26,9
3,12,9
4,23,9
5,22,8
6,34,8
7,16,7
8,17,7
9,11,7


In [235]:
semantic_ratings = event_similarity_counts.copy()
semantic_ratings['centrality'] = pd.qcut(
    semantic_ratings['count'], 
    q=[0, 0.4, 0.6, 1.0], 
    labels=['Low', 'Med', 'High']
)
semantic_ratings.sort_values(by='input').reset_index(drop=True)
semantic_ratings.head(60)

Unnamed: 0,input,count,centrality
0,30,12,High
1,24,11,High
2,26,9,High
3,12,9,High
4,23,9,High
5,22,8,High
6,34,8,High
7,16,7,High
8,17,7,High
9,11,7,High


In [240]:
semantic_centrality = pd.merge(semantic_ratings, story_df, on='input', how='inner')
semantic_centrality['recall'] = semantic_centrality['recall'].astype(int)
semantic_centrality_avg = semantic_centrality.groupby(['subject', 'centrality'])['recall'].mean().reset_index()
semantic_centrality_avg['group'] = semantic_centrality_avg['subject'].apply(lambda x: 'younger' if 200 < x < 400 else 'older')
semantic_centrality_avg['version'] = semantic_centrality_avg['subject'].apply(lambda x: 'exp1' if x > 300 else 'exp2')
semantic_centrality_avg = semantic_centrality_avg[semantic_centrality_avg['centrality']!='Med']
semantic_centrality_avg['centrality'] = semantic_centrality_avg['centrality'].astype(str)
semantic_centrality_avg.reset_index(drop=True, inplace=True)
semantic_centrality_avg.tail()

Unnamed: 0,subject,centrality,recall,group,version
163,419,High,0.846154,older,exp1
164,420,Low,0.142857,older,exp1
165,420,High,0.384615,older,exp1
166,421,Low,0.533333,older,exp1
167,421,High,0.454545,older,exp1


In [241]:
model = smf.mixedlm(
    formula="recall ~ group + centrality", 
    data=semantic_centrality_avg, 
    groups=semantic_centrality_avg['version']
)
result = model.fit(reml=True)
print(result.summary())


           Mixed Linear Model Regression Results
Model:               MixedLM   Dependent Variable:   recall 
No. Observations:    168       Method:               REML   
No. Groups:          2         Scale:                0.0260 
Min. group size:     82        Log-Likelihood:       60.5737
Max. group size:     86        Converged:            No     
Mean group size:     84.0                                   
------------------------------------------------------------
                  Coef.  Std.Err.   z    P>|z| [0.025 0.975]
------------------------------------------------------------
Intercept          0.602    0.022 26.987 0.000  0.558  0.645
group[T.younger]   0.114    0.025  4.562 0.000  0.065  0.163
centrality[T.Low] -0.012    0.025 -0.482 0.630 -0.061  0.037
Group Var          0.000                                    





### Correlations: Clustering Scores and Recall Performance

In [25]:
story[story['trial_type']=='recall'].groupby(['subject'])['word_count'].sum().reset_index()

Unnamed: 0,subject,word_count
0,101,2562
1,102,452
2,107,1226
3,108,1157
4,113,2025
...,...,...
79,417,953
80,418,2752
81,419,1699
82,420,246


In [26]:
temporal_rank = fr.lag_rank(story_df).reset_index()
temporal_rank['rank_type'] = 'temporal'
temporal_rank['chance_rank'] = 0.5
temporal_rank.head()

Unnamed: 0,subject,rank,rank_type,chance_rank
0,101,0.794559,temporal,0.5
1,102,0.652631,temporal,0.5
2,107,0.914802,temporal,0.5
3,108,0.870524,temporal,0.5
4,113,0.912836,temporal,0.5


In [27]:
topic_rank = fr.category_clustering(story_df, category_key='theme').reset_index()
topic_rank = topic_rank.rename(columns={'arc': 'rank'})
topic_rank['rank_type'] = 'topic'
topic_rank['chance_rank'] = 0.0
topic_rank = topic_rank[['subject', 'rank', 'rank_type', 'chance_rank']]
topic_rank.head()

Unnamed: 0,subject,rank,rank_type,chance_rank
0,101,0.366906,topic,0.0
1,102,0.470588,topic,0.0
2,107,0.100977,topic,0.0
3,108,0.315068,topic,0.0
4,113,0.077449,topic,0.0


In [80]:
rank = pd.concat([temporal_rank, topic_rank]) 
rank.tail()

Unnamed: 0,subject,rank,rank_type,chance_rank
79,417,0.483692,semantic,0.5
80,418,0.511245,semantic,0.5
81,419,0.424944,semantic,0.5
82,420,0.583333,semantic,0.5
83,421,0.486977,semantic,0.5


In [81]:
cluster_recall = pd.merge(story[story['trial_type']=='recall'].groupby(['subject', 'group', 'version'])['word_count'].sum().reset_index(), rank, how='inner')
cluster_recall.head()

Unnamed: 0,subject,group,version,word_count,rank,rank_type,chance_rank
0,101,older,exp2,2562,0.794559,temporal,0.5
1,101,older,exp2,2562,0.366906,topic,0.0
2,101,older,exp2,2562,0.469324,semantic,0.5
3,102,older,exp2,452,0.652631,temporal,0.5
4,102,older,exp2,452,0.470588,topic,0.0


Temporal Clustering and Recall Performance

In [82]:
corr = pg.corr(
    x=cluster_recall[(cluster_recall['group']=='younger')&(cluster_recall['rank_type']=='temporal')]['rank'],
    y=cluster_recall[(cluster_recall['group']=='younger')&(cluster_recall['rank_type']=='temporal')]['word_count'],
    method='pearson'
)

pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall[(cluster_recall['group']=='older')&(cluster_recall['rank_type']=='temporal')]['rank'],
    y=cluster_recall[(cluster_recall['group']=='older')&(cluster_recall['rank_type']=='temporal')]['word_count'],
    method='pearson'
)

pg.print_table(corr)


  n      r  CI95%          p-val    BF10    power
---  -----  -----------  -------  ------  -------
 46  0.364  [0.08 0.59]    0.013   3.704    0.715

  n      r  CI95%            p-val    BF10    power
---  -----  -------------  -------  ------  -------
 38  0.266  [-0.06  0.54]    0.106   0.710    0.371



Topic Clustering and Recall Performance

In [83]:
corr = pg.corr(
    x=cluster_recall[(cluster_recall['group']=='younger')&(cluster_recall['rank_type']=='topic')]['rank'],
    y=cluster_recall[(cluster_recall['group']=='younger')&(cluster_recall['rank_type']=='topic')]['word_count'],
    method='pearson'
)

pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall[(cluster_recall['group']=='older')&(cluster_recall['rank_type']=='topic')]['rank'],
    y=cluster_recall[(cluster_recall['group']=='older')&(cluster_recall['rank_type']=='topic')]['word_count'],
    method='pearson'
)

pg.print_table(corr)


  n       r  CI95%            p-val    BF10    power
---  ------  -------------  -------  ------  -------
 46  -0.335  [-0.57 -0.05]    0.023   2.261    0.635

  n       r  CI95%            p-val    BF10    power
---  ------  -------------  -------  ------  -------
 38  -0.225  [-0.51  0.1 ]    0.174   0.492    0.278



Neuropsychological Tests

In [477]:
neuropsych = pd.read_csv('~/Box Sync/aidelarazan_box/Projects/curbage_recall/github/data/curbage_recall_sub-all_group-older_task-neuropsych.csv')
neuropsych['subject'] = neuropsych['subject'].astype(int)
neuropsych.head()

Unnamed: 0,subject,version,group,Craft21_immediate_verbatim,Craft21_immediate_paraphase,Craft21_immediate_total,MoCA_visuospatial/executive,MoCA_naming,MoCA_attention,MoCA_language,MoCA_abstraction,MoCA_delayed_recall,MoCA_orientation,MoCA_total,MINT_uncued,MINT_semantic,MINT_total,Craft21_delayed_verbatim,Craft21_delayed_paraphrase,Craft21_delayed_total
0,401,exp1,older,21,1,22,5,3,6,3,1,5,6,29,29,0,29,19,3,22
1,402,exp1,older,24,2,26,4,3,4,2,2,3,6,24,26,0,26,20,1,21
2,403,exp1,older,10,2,12,5,3,6,3,2,5,6,30,30,0,30,10,3,13
3,404,exp1,older,9,5,14,5,3,2,3,2,5,6,30,31,0,31,9,4,13
4,405,exp1,older,23,3,26,5,3,6,3,2,4,6,29,25,2,27,22,5,27


In [486]:
neuropsych['MoCA_total'].describe()
neuropsych['Craft21_immediate_total'].describe()
neuropsych['Craft21_delayed_total'].describe()
neuropsych['MINT_total'].describe()

count    37.000000
mean     30.459459
std       1.709363
min      26.000000
25%      29.000000
50%      31.000000
75%      32.000000
max      32.000000
Name: MINT_total, dtype: float64

In [294]:
cluster_recall_neuropsych = pd.merge(cluster_recall, neuropsych, on=['subject', 'group', 'version'], how='inner') 
cluster_recall_neuropsych.columns

Index(['subject', 'group', 'version', 'word_count', 'rank', 'rank_type',
       'chance_rank', 'Craft21_immediate_verbatim',
       'Craft21_immediate_paraphase', 'Craft21_immediate_total',
       'MoCA_visuospatial/executive', 'MoCA_naming', 'MoCA_attention',
       'MoCA_language', 'MoCA_abstraction', 'MoCA_delayed_recall',
       'MoCA_orientation', 'MoCA_total', 'MINT_uncued', 'MINT_semantic',
       'MINT_total', 'Craft21_delayed_verbatim', 'Craft21_delayed_paraphrase',
       'Craft21_delayed_total'],
      dtype='object')

In [414]:
corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['MoCA_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['word_count'],
    method='pearson'
)

print('MocA and Recall:') 
pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['MoCA_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['rank'],
    method='pearson'
)

print('MocA and Temporal Cluster:') 
pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='semantic')]['MoCA_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='semantic')]['rank'],
    method='pearson'
)

print('MocA and Semantic Cluster:') 
pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='topic')]['MoCA_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='topic')]['rank'],
    method='pearson'
)

print('MocA and Topic Cluster:') 
pg.print_table(corr)

MocA and Recall:
  n      r  CI95%            p-val    BF10    power
---  -----  -------------  -------  ------  -------
 37  0.148  [-0.19  0.45]    0.384   0.295    0.141

MocA and Temporal Cluster:
  n       r  CI95%            p-val    BF10    power
---  ------  -------------  -------  ------  -------
 37  -0.082  [-0.4   0.25]    0.630   0.229    0.077

MocA and Semantic Cluster:
  n       r  CI95%            p-val    BF10    power
---  ------  -------------  -------  ------  -------
 37  -0.072  [-0.39  0.26]    0.672   0.223    0.071

MocA and Topic Cluster:
  n       r  CI95%            p-val    BF10    power
---  ------  -------------  -------  ------  -------
 37  -0.066  [-0.38  0.26]    0.698   0.220    0.067



In [415]:
corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['Craft21_immediate_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['word_count'],
    method='pearson'
)

print('Craft21 Immediate and Recall:') 
pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['Craft21_immediate_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['rank'],
    method='pearson'
)

print('Craft21 Immediate and Temporal Cluster:') 
pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='semantic')]['Craft21_immediate_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='semantic')]['rank'],
    method='pearson'
)

print('Craft21 Immediate and Semantic Cluster:') 
pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='topic')]['Craft21_immediate_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='topic')]['rank'],
    method='pearson'
)

print('Craft21 Immediate and Topic Cluster:') 
pg.print_table(corr)

Craft21 Immediate and Recall:
  n      r  CI95%            p-val    BF10    power
---  -----  -------------  -------  ------  -------
 37  0.322  [-0.    0.58]    0.052   1.261    0.504

Craft21 Immediate and Temporal Cluster:
  n      r  CI95%            p-val    BF10    power
---  -----  -------------  -------  ------  -------
 37  0.152  [-0.18  0.45]    0.370   0.301    0.147

Craft21 Immediate and Semantic Cluster:
  n       r  CI95%            p-val    BF10    power
---  ------  -------------  -------  ------  -------
 37  -0.084  [-0.4   0.25]    0.620   0.230    0.079

Craft21 Immediate and Topic Cluster:
  n       r  CI95%            p-val    BF10    power
---  ------  -------------  -------  ------  -------
 37  -0.053  [-0.37  0.28]    0.755   0.214    0.061



In [416]:
corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['Craft21_delayed_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['word_count'],
    method='pearson'
)

print('Craft21 Delayed and Recall:') 
pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['Craft21_delayed_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['rank'],
    method='pearson'
)

print('Craft21 Delayed and Temporal Cluster:') 
pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='semantic')]['Craft21_delayed_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='semantic')]['rank'],
    method='pearson'
)

print('Craft21 Delayed and Semantic Cluster:') 
pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='topic')]['Craft21_delayed_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='topic')]['rank'],
    method='pearson'
)

print('Craft21 Delayed and Topic Cluster:') 
pg.print_table(corr)

Craft21 Delayed and Recall:
  n      r  CI95%            p-val    BF10    power
---  -----  -------------  -------  ------  -------
 37  0.315  [-0.01  0.58]    0.058   1.157    0.485

Craft21 Delayed and Temporal Cluster:
  n      r  CI95%            p-val    BF10    power
---  -----  -------------  -------  ------  -------
 37  0.085  [-0.25  0.4 ]    0.619   0.231    0.079

Craft21 Delayed and Semantic Cluster:
  n       r  CI95%            p-val    BF10    power
---  ------  -------------  -------  ------  -------
 37  -0.153  [-0.45  0.18]    0.367   0.303    0.148

Craft21 Delayed and Topic Cluster:
  n       r  CI95%            p-val    BF10    power
---  ------  -------------  -------  ------  -------
 37  -0.140  [-0.44  0.19]    0.410   0.284    0.131



In [417]:
corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['MINT_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['word_count'],
    method='pearson'
)

print('MINT and Recall:') 
pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['MINT_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='temporal')]['rank'],
    method='pearson'
)

print('MINT and Temporal Cluster:') 
pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='semantic')]['MINT_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='semantic')]['rank'],
    method='pearson'
)

print('MINT and Semantic Cluster:') 
pg.print_table(corr)

corr = pg.corr(
    x=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='topic')]['MINT_total'],
    y=cluster_recall_neuropsych[(cluster_recall_neuropsych['group']=='older')&(cluster_recall_neuropsych['rank_type']=='topic')]['rank'],
    method='pearson'
)

print('MINT and Topic Cluster:') 
pg.print_table(corr)

MINT and Recall:
  n      r  CI95%            p-val    BF10    power
---  -----  -------------  -------  ------  -------
 37  0.013  [-0.31  0.34]    0.941   0.205    0.050

MINT and Temporal Cluster:
  n      r  CI95%            p-val    BF10    power
---  -----  -------------  -------  ------  -------
 37  0.138  [-0.19  0.44]    0.415   0.282    0.129

MINT and Semantic Cluster:
  n       r  CI95%            p-val    BF10    power
---  ------  -------------  -------  ------  -------
 37  -0.028  [-0.35  0.3 ]    0.869   0.207    0.053

MINT and Topic Cluster:
  n       r  CI95%            p-val    BF10    power
---  ------  -------------  -------  ------  -------
 37  -0.103  [-0.41  0.23]    0.544   0.244    0.093



### Word Count and Verifiable Details

In [404]:
verifiable_recall = pd.read_csv('/Users/aidelarazan/Box Sync/aidelarazan_box/Projects/curbage_recall/github/data/curbage_recall_sub-all_group-older_task-main_desc-recall_verifiable_details.csv')
verifiable_recall = verifiable_recall[verifiable_recall['subject'] != 999]
verifiable_recall['group'] = verifiable_recall['subject'].apply(lambda x: 'younger' if x >= 300 else 'older')
verifiable_recall.loc[verifiable_recall['group'] == 'older', 'subject'] += 200
verifiable_recall['subject'] = verifiable_recall['subject'].astype(int)
print(verifiable_recall['subject'].unique())


[401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 301 302 303
 304 305 306 307 308 309 310 311 312 313 314 315 417 418 419 316 317 318
 319 420 421 320 416]


[Calculate Intraclass Correlation Coeffifient (ICC)](https://pingouin-stats.org/build/html/generated/pingouin.intraclass_corr.html)

In [405]:
ratings_df = verifiable_recall[['subject', 'NarrativeVerifiableDetails_JD', 'NarrativeVerifibleDetails_EM']]
reshaped_df = pd.melt(ratings_df, id_vars='subject', var_name='Rater', value_name='Rating')
icc_result = pg.intraclass_corr(data=reshaped_df, targets='subject', raters='Rater', ratings='Rating')
icc_result


Unnamed: 0,Type,Description,ICC,F,df1,df2,pval,CI95%
0,ICC1,Single raters absolute,0.835835,11.182828,40,41,1.721831e-12,"[0.71, 0.91]"
1,ICC2,Single random raters,0.843563,28.086741,40,40,1.9496589999999998e-19,"[0.13, 0.95]"
2,ICC3,Single fixed raters,0.93124,28.086741,40,40,1.9496589999999998e-19,"[0.87, 0.96]"
3,ICC1k,Average raters absolute,0.910577,11.182828,40,41,1.721831e-12,"[0.83, 0.95]"
4,ICC2k,Average random raters,0.915144,28.086741,40,40,1.9496589999999998e-19,"[0.24, 0.98]"
5,ICC3k,Average fixed raters,0.964396,28.086741,40,40,1.9496589999999998e-19,"[0.93, 0.98]"


In [406]:
verifiable_recall_total = verifiable_recall.groupby(['subject'])[['NarrativeVerifiableDetails_JD', 'NarrativeVerifibleDetails_EM']].sum().reset_index()
verifiable_recall_total['NarrativeVerifiableDetails_Avg'] = verifiable_recall_total[['NarrativeVerifiableDetails_JD', 'NarrativeVerifibleDetails_EM']].mean(axis=1)
verifiable_recall_total['subject'] = verifiable_recall_total['subject'].astype(int)
verifiable_recall_edited = verifiable_recall_total[['subject', 'NarrativeVerifiableDetails_Avg']]
word_count_df = cluster_recall[['subject', 'group', 'version', 'word_count']].drop_duplicates().reset_index(drop=True)
word_count_df['subject'] = word_count_df['subject'].astype(int)
verifiable_recall_performance = verifiable_recall_edited.merge(word_count_df, on=['subject'], how='inner')
verifiable_recall_performance.head()

Unnamed: 0,subject,NarrativeVerifiableDetails_Avg,group,version,word_count
0,301,455.0,younger,exp1,5680
1,302,103.0,younger,exp1,973
2,303,104.5,younger,exp1,1247
3,304,312.5,younger,exp1,3468
4,305,61.0,younger,exp1,726


In [413]:
corr = pg.corr(
    x=verifiable_recall_performance['NarrativeVerifiableDetails_Avg'],
    y=verifiable_recall_performance['word_count'],
    method='pearson'
)

print('Average Verifiable Details and Recall Performance (Word_Count):') 
pg.print_table(corr)

corr = pg.corr(
    x=verifiable_recall_performance[verifiable_recall_performance['group']=='younger']['NarrativeVerifiableDetails_Avg'],
    y=verifiable_recall_performance[verifiable_recall_performance['group']=='younger']['word_count'],
    method='pearson'
)

print('Younger Adults Average Verifiable Details and Recall Performance (Word_Count):') 
pg.print_table(corr)

corr = pg.corr(
    x=verifiable_recall_performance[verifiable_recall_performance['group']=='older']['NarrativeVerifiableDetails_Avg'],
    y=verifiable_recall_performance[verifiable_recall_performance['group']=='older']['word_count'],
    method='pearson'
)

print('Older Adults Average Verifiable Details and Recall Performance (Word_Count):') 
pg.print_table(corr)

Average Verifiable Details and Recall Performance (Word_Count):
  n      r  CI95%          p-val                      BF10    power
---  -----  -----------  -------  ------------------------  -------
 41  0.960  [0.92 0.98]    0.000  50050000000000000000.000    1.000

Younger Adults Average Verifiable Details and Recall Performance (Word_Count):
  n      r  CI95%          p-val              BF10    power
---  -----  -----------  -------  ----------------  -------
 20  0.983  [0.96 0.99]    0.000  195100000000.000    1.000

Older Adults Average Verifiable Details and Recall Performance (Word_Count):
  n      r  CI95%          p-val        BF10    power
---  -----  -----------  -------  ----------  -------
 21  0.897  [0.76 0.96]    0.000  349300.000    1.000



### Demographics

In [420]:
recall[recall['version']=='exp2']['subject'].unique()

array([101, 102, 107, 108, 113, 114, 119, 125, 140, 145, 146, 151, 153,
       160, 163, 164, 171, 201, 202, 207, 208, 212, 214, 219, 220, 225,
       226, 231, 233, 237, 238, 243, 244, 249, 256, 261, 262, 264, 266,
       267, 276, 277, 278])

In [463]:
demographics = pd.read_csv('~/Box Sync/aidelarazan_box/Projects/curbage_recall/github/data/curbage_recall_sub-all_group-all_task-main_desc-demographics.csv')
demographics

Unnamed: 0,subject,group,version,age,education,handedness,gender,hispanic,ethnicity
0,101,older,exp2,69,,,Male,No,Caucasian / White
1,102,older,exp2,79,,,Male,No,Caucasian / White
2,107,older,exp2,65,,,Female,No,Caucasian / White
3,108,older,exp2,71,,,Female,No,Caucasian / White
4,113,older,exp2,67,,,Female,No,Caucasian / White
...,...,...,...,...,...,...,...,...,...
77,417,older,exp1,79,21.0,Right,Male,No,Caucasian / White
78,418,older,exp1,69,25.0,Right,Male,No,Caucasian / White
79,419,older,exp1,68,19.0,Right,Male,No,Caucasian / White
80,420,older,exp1,93,20.0,Right,Male,No,Caucasian / White


In [464]:
print('\nOlder, Expierment 1:\n', demographics[(demographics['version']=='exp1')&(demographics['group']=='older')]['age'].describe())
print('\nYounger, Expierment 1:\n', demographics[(demographics['version']=='exp1')&(demographics['group']=='younger')]['age'].describe())


Older, Expierment 1:
 count    21.000000
mean     72.952381
std       7.432874
min      61.000000
25%      68.000000
50%      70.000000
75%      79.000000
max      93.000000
Name: age, dtype: float64

Younger, Expierment 1:
 count    22.000000
mean     20.045455
std       1.812057
min      18.000000
25%      19.000000
50%      20.000000
75%      21.000000
max      25.000000
Name: age, dtype: float64


In [465]:
print('\nOlder, Expierment 2:\n', demographics[(demographics['version']=='exp2')&(demographics['group']=='older')]['age'].describe())
print('\nYounger, Expierment 2:\n', demographics[(demographics['version']=='exp2')&(demographics['group']=='younger')]['age'].describe())


Older, Expierment 2:
 count    15.000000
mean     71.200000
std       6.258708
min      60.000000
25%      67.500000
50%      71.000000
75%      75.000000
max      81.000000
Name: age, dtype: float64

Younger, Expierment 2:
 count    24.000000
mean     19.875000
std       1.361345
min      18.000000
25%      19.000000
50%      20.000000
75%      21.000000
max      23.000000
Name: age, dtype: float64


In [466]:
print('\nOlder, Expierment All:\n', demographics[(demographics['group']=='older')]['age'].describe())
print('\nYounger, Expierment All:\n', demographics[(demographics['group']=='younger')]['age'].describe())


Older, Expierment All:
 count    36.000000
mean     72.222222
std       6.928661
min      60.000000
25%      67.750000
50%      70.500000
75%      79.000000
max      93.000000
Name: age, dtype: float64

Younger, Expierment All:
 count    46.000000
mean     19.956522
std       1.577009
min      18.000000
25%      19.000000
50%      20.000000
75%      21.000000
max      25.000000
Name: age, dtype: float64


In [467]:
print('\nOlder, Expierment 1:\n', demographics[(demographics['version']=='exp1')&(demographics['group']=='older')]['gender'].value_counts())
print('\nYounger, Expierment 1:\n', demographics[(demographics['version']=='exp1')&(demographics['group']=='younger')]['gender'].value_counts())


Older, Expierment 1:
 Male      11
Female    10
Name: gender, dtype: int64

Younger, Expierment 1:
 Female    21
Male       1
Name: gender, dtype: int64


In [468]:
print('\nOlder, Expierment 2:\n', demographics[(demographics['version']=='exp2')&(demographics['group']=='older')]['gender'].value_counts())
print('\nYounger, Expierment 2:\n', demographics[(demographics['version']=='exp2')&(demographics['group']=='younger')]['gender'].value_counts())


Older, Expierment 2:
 Female    10
Male       5
Name: gender, dtype: int64

Younger, Expierment 2:
 Female    13
Male      11
Name: gender, dtype: int64


In [469]:
print('\nOlder, Expierment All:\n', demographics[(demographics['group']=='older')]['gender'].value_counts())
print('\nYounger, Expierment All:\n', demographics[(demographics['group']=='younger')]['gender'].value_counts())


Older, Expierment All:
 Female    20
Male      16
Name: gender, dtype: int64

Younger, Expierment All:
 Female    34
Male      12
Name: gender, dtype: int64


Narrative Centrality Raters

In [473]:
demographics_narrative_centrality = pd.read_csv('~/Box Sync/aidelarazan_box/Projects/curbage_recall/github/data/curabge_recall_sub-all_group-younger_task-narrative_centrality_desc-demographics.csv')
demographics_narrative_centrality

Unnamed: 0,subject,group,version,age,education,handedness,gender,hispanic,ethnicity
0,520,younger,narrative_centrality,,,,Prefer Not To Report,Prefer Not To Report,Prefer Not To Report
1,513,younger,narrative_centrality,,,,Prefer Not To Report,Prefer Not To Report,Prefer Not To Report
2,525,younger,narrative_centrality,19.0,,,Female,No,Asian
3,501,younger,narrative_centrality,26.0,,,Female,No,Caucasian / White
4,502,younger,narrative_centrality,19.0,,,Female,No,Caucasian / White
5,503,younger,narrative_centrality,,,,Prefer Not To Report,Prefer Not To Report,Prefer Not To Report
6,504,younger,narrative_centrality,20.0,,,Male,No,Asian
7,505,younger,narrative_centrality,18.0,,,Female,No,Asian
8,506,younger,narrative_centrality,19.0,,,Female,No,Asian
9,507,younger,narrative_centrality,19.0,,,Female,No,Asian


In [474]:
print('\nNarrative Centrality:\n', demographics_narrative_centrality['age'].describe())


Narrative Centrality:
 count    15.00000
mean     20.40000
std       2.02837
min      18.00000
25%      19.00000
50%      20.00000
75%      21.50000
max      26.00000
Name: age, dtype: float64


In [476]:
print('\nNarrative Centrality:\n', demographics_narrative_centrality['gender'].value_counts())


Narrative Centrality:
 Female                  12
Prefer Not To Report    10
Male                     3
Name: gender, dtype: int64
