## Correlations between social interactions and theory of mind ratings

Zizhuang Miao

This notebook is used to calculate how similar, on the group level, the ratings for social interactions and theory of mind are. Separate correlations will be calculated for mean and median.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

#### Calculate correlations

In [None]:
socialDataDir = 'C:\\'
tomDataDir = 'C:\\'

corr_narratives_mean = pd.DataFrame(columns=['narrative', 'pearson', 'spearman'])
corr_narratives_median = pd.DataFrame(columns=['narrative', 'pearson', 'spearman'])

mean_social = pd.Series()
mean_tom = pd.Series()
median_social = pd.Series()
median_tom = pd.Series()

for n in range(1,9):
    # read mean data for one narrative
    social_data = pd.read_csv(os.path.join(socialDataDir, f"narrative{n}_mean.csv"))
    social_data.loc[social_data['included']==0, ['mean', 'std']] = np.nan
    tom_data = pd.read_csv(os.path.join(tomDataDir, f"narrative{n}_mean.csv"))
    tom_data.loc[tom_data['included']==0, ['mean', 'std']] = np.nan

    # calculate correlation for this narrative
    pearson = social_data['mean'].corr(tom_data['mean'], method='pearson')
    spearman = social_data['mean'].corr(tom_data['mean'], method='spearman')
    corr_narratives_mean = pd.concat([corr_narratives_mean, 
                                      pd.DataFrame({'narrative': n, 'pearson': pearson, 'spearman': spearman}, index=[n-5])], ignore_index=True)

    # concatenate data from all narratives into one dataframe
    mean_social = pd.concat([mean_social, social_data['mean']], axis=0)   
    mean_tom = pd.concat([mean_tom, tom_data['mean']], axis=0)

    # read median data for one narrative
    social_data = pd.read_csv(os.path.join(socialDataDir, f"narrative{n}_median.csv"))
    social_data.loc[social_data['included']==0, ['median', 'std']] = np.nan
    tom_data = pd.read_csv(os.path.join(tomDataDir, f"narrative{n}_median.csv"))
    tom_data.loc[tom_data['included']==0, ['median', 'std']] = np.nan

    # calculate correlation for this narrative
    pearson = social_data['median'].corr(tom_data['median'], method='pearson')
    spearman = social_data['median'].corr(tom_data['median'], method='spearman')
    corr_narratives_median = pd.concat([corr_narratives_median, 
                                        pd.DataFrame({'narrative': n, 'pearson': pearson, 'spearman': spearman}, index=[n-5])], ignore_index=True)
    
    # concatenate data from all narratives into one dataframe
    median_social = pd.concat([median_social, social_data['median']], axis=0)
    median_tom = pd.concat([median_tom, tom_data['median']], axis=0)

# calculate correlation for all narratives
pearson_mean = mean_social.corr(mean_tom, method='pearson')
spearman_mean = mean_social.corr(mean_tom, method='spearman')
pearson_median = median_social.corr(median_tom, method='pearson')
spearman_median = median_social.corr(median_tom, method='spearman')

# print results
print('Correlation between social and tom data for each narrative (mean):')
print(corr_narratives_mean)
print()
print('Correlation between social and tom data for each narrative (median):')
print(corr_narratives_median)
print()
print('Correlation for all narratives (mean data):')
print(f'Pearson correlation: {pearson_mean}')
print(f'Spearman correlation: {spearman_mean}')
print()
print('Correlation for all narratives (median data):')
print(f'Pearson correlation: {pearson_median}')
print(f'Spearman correlation: {spearman_median}')

In [4]:
# calculate correlation for only medians
socialDataDir = 'C:\\'
tomDataDir = 'C:\\'

corr_narratives_median = pd.DataFrame(columns=['narrative', 'pearson', 'spearman'])
median_social = pd.Series()
median_tom = pd.Series()

for n in range(1,9):
    # read median data for one narrative
    social_data = pd.read_csv(os.path.join(socialDataDir, f"narrative{n}_median.csv"))
    social_data.loc[social_data['included']==0, ['median', 'std']] = np.nan
    tom_data = pd.read_csv(os.path.join(tomDataDir, f"narrative{n}_median.csv"))
    tom_data.loc[tom_data['included']==0, ['median', 'mad']] = np.nan

    # calculate correlation for this narrative
    pearson = social_data['median'].corr(tom_data['median'], method='pearson')
    spearman = social_data['median'].corr(tom_data['median'], method='spearman')
    corr_narratives_median = pd.concat([corr_narratives_median, 
                                        pd.DataFrame({'narrative': n, 'pearson': pearson, 'spearman': spearman}, index=[n-5])], ignore_index=True)
    
    # concatenate data from all narratives into one dataframe
    median_social = pd.concat([median_social, social_data['median']], axis=0)
    median_tom = pd.concat([median_tom, tom_data['median']], axis=0)

# calculate correlation for all narratives
pearson_median = median_social.corr(median_tom, method='pearson')
spearman_median = median_social.corr(median_tom, method='spearman')

# print results
print('Correlation between social and tom data for each narrative (median):')
print(corr_narratives_median)
print()
print('Correlation for all narratives (median data):')
print(f'Pearson correlation: {pearson_median}')
print(f'Spearman correlation: {spearman_median}')

Correlation between social and tom data for each narrative (median):
  narrative   pearson  spearman
0         1 -0.108763 -0.142516
1         2  0.502130  0.615102
2         3  0.621432  0.623383
3         4  0.358679  0.363846
4         5  0.474134  0.504193
5         6  0.246612  0.294862
6         7  0.753914  0.745873
7         8  0.221399  0.300874

Correlation for all narratives (median data):
Pearson correlation: 0.3226134923930169
Spearman correlation: 0.33405817407008953


In [12]:
# save the correlations by narratives results to csv
corr_narratives_mean.to_csv('C:\\', index=False)
corr_narratives_median.to_csv('C:\\', index=False)