Goals:
* PCA on political/values measures
* Estimate 'quality' of participants as representatives of conservatives/liberals (i.e. check correspondence between ideology slider measure during prescreen and long ideology value surveys during experiment)

In [None]:
import os
import numpy as np
import pandas as pd
import scipy as sp
import scipy.spatial
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
import re

##### Load data

In [None]:
base_dir = os.path.realpath('../..')
print(base_dir)
data_dir = base_dir + '/Data'

In [None]:
ID_dat = pd.read_csv(data_dir + 
             '/Cleaned/Surveys/ID_dat.csv',
                    index_col = 0, dtype = {'SubID':str})
print(ID_dat.shape)

##### Show

In [None]:
ID_dat.head()

In [None]:
colnames = ','.join(ID_dat.columns)
colnames

## Does IUS correlate with ideology or extremism?

In [None]:
metric = 'IUS_mean'

In [None]:
# Exclusions
# We only need to exclude subject 34 here, as this person misrepresented their ideology in the screening survey.
# The other exclusions are mostly relevant for analyzing the brain data, e.g. exclusions due to motion.
exclude = ['%i'%sub for sub in 
           pd.read_csv(data_dir + '/Subjects_and_exclusions/exclude_ideology.csv', index_col=0).values.flatten().tolist()]
print(exclude)
tmp_dat = ID_dat.loc[~ID_dat['SubID'].isin(exclude),:].copy()
print(tmp_dat.shape)

In [None]:
sns.set(context = 'notebook', style = 'whitegrid', font = 'arial')

In [None]:
plt.scatter(tmp_dat['IdeologyScale_1'],tmp_dat[metric])

In [None]:
scipy.stats.pearsonr(tmp_dat['IdeologyScale_1'],tmp_dat[metric])

In [None]:
plt.scatter(np.abs(tmp_dat['IdeologyScale_1']-50),tmp_dat[metric])

In [None]:
scipy.stats.pearsonr(np.abs(tmp_dat['IdeologyScale_1']-50),tmp_dat[metric])

## Comments

In [None]:
debrief_dat = ID_dat[['SubID','DebriefComments']].copy()
debrief_dat.dropna(inplace=True)
debrief_dat = debrief_dat.set_index('SubID')
debrief_dat.to_dict()

## Ideology scale

In [None]:
sns.set(context = 'talk', font = 'arial', style = 'ticks')
ic_palette = sns.color_palette('RdBu_r',7)
sns.set_palette([ic_palette[0],ic_palette[6]])
precision = 5
fig,ax = plt.subplots(1,1,figsize=[6,4])
sns.distplot(ID_dat.query('IdeologyScale_1 < 50')['IdeologyScale_1'],
            bins = np.arange(0 - precision/2,105 + precision/2,precision), rug = True, kde = False,
            label = 'Liberal', ax = ax)
sns.distplot(ID_dat.query('IdeologyScale_1 > 50')['IdeologyScale_1'],
            bins = np.arange(0 - precision/2,105 + precision/2,precision), rug = True, kde = False,
            label = 'Conservative', ax = ax)
ax.set_xlim([0 - precision/2,100 + precision/2])
ax.legend(frameon = False)
for _,s in ax.spines.items():
    s.set_linewidth(1)
    s.set_color('k')
ax.set(ylabel = 'Frequency', xlabel = 'Ideology self-report')
ax.plot([50,50], [0,5],'k--', lw = 1)
plt.savefig(base_dir + '/Results/Individual_differences/Ideology_self_report.pdf',
            transparent = True, bbox_inches = 'tight')
plt.savefig(base_dir + '/Results/Individual_differences/Ideology_self_report.png',
            transparent = True, bbox_inches = 'tight', dpi = 500)

# The following sections are used to compare responses on the longer-form ideology surveys (SECS, SDO etc) to the ideology self-report in the prescreener

## SECS

SECS by political identity:

In [None]:
sns.set(context='notebook', font = 'arial')

In [None]:
ic_options = ['Progressive', 'Liberal', 'Other', 'Moderate', 'Libertarian','Conservative']
ic_palette = sns.color_palette('RdBu_r',len(ic_options))
ic_palette[2] = [.7,.7,.7]
ic_palette[3] = sns.color_palette('Greens',1)[0]
sns.set_palette(ic_palette)

In [None]:
fig, ax = plt.subplots(ncols=3,nrows=1,figsize=[14,4])
cols = ['SECS_total','SECS_social','SECS_economic']
for coli,col in enumerate(cols):
    sns.scatterplot(data = ID_dat, x = 'IdeologyScale_1', y = col, ax = ax[coli],
                    s = 50, hue = 'IdentityChoice', edgecolor = 'k', hue_order = ic_options)
    hand,lab = ax[coli].get_legend_handles_labels()
    ax[coli].get_legend().remove()
    ax[coli].set(xlabel = 'Self-reported conservatism\n(prescreening survey)',
                title = col, ylabel = 'Score')
ax[2].legend(hand,lab, loc = [1.1,0], title = 'Political identity choice\n(prescreening survey)')
plt.tight_layout()

By party affiliation:

In [None]:
pc_options = ['Democrat: Strong', 'Democrat: Not very strong',
       'Independent: Leaning Democrat', 'Independent', 'Other',
       'Independent: Leaning Republican', 'Republican: Not very strong',
       'Republican: Strong']
pc_palette = sns.color_palette('RdBu_r',len(pc_options))
pc_palette[3] = sns.color_palette('Greens',1)[0]
pc_palette[4] = [.7,.7,.7]
sns.set_palette(pc_palette)

In [None]:
fig, ax = plt.subplots(ncols=3,nrows=1,figsize=[14,4])
cols = ['SECS_total','SECS_social','SECS_economic']
for coli,col in enumerate(cols):
    sns.scatterplot(data = ID_dat, x = 'IdeologyScale_1', y = col, ax = ax[coli],
                    s = 50, hue = 'PartyChoice', hue_order = pc_options, edgecolor='k')
    hand,lab = ax[coli].get_legend_handles_labels()
    ax[coli].get_legend().remove()
ax[2].legend(hand,lab, loc = [1.1,0])
plt.tight_layout()

By Social Conservatism identity:

In [None]:
ic_options = ['Progressive', 'Liberal', 'Moderate', 'Libertarian',
              'Conservative']
ic_palette = sns.color_palette('RdBu_r',len(ic_options))
ic_palette[2] = sns.color_palette('Greens',1)[0]
sns.set_palette(ic_palette)

In [None]:
fig, ax = plt.subplots(ncols=3,nrows=1,figsize=[14,4])
cols = ['SECS_total','SECS_social','SECS_economic']
for coli,col in enumerate(cols):
    sns.scatterplot(data = ID_dat, x = 'IdeologyScale_1', y = col, ax = ax[coli],
                    s = 50, hue = 'ConservatismSocial', edgecolor = 'k', hue_order = ic_options)
    hand,lab = ax[coli].get_legend_handles_labels()
    ax[coli].get_legend().remove()
ax[2].legend(hand,lab, loc = [1.1,0])
plt.tight_layout()

In [None]:
fig, ax = plt.subplots(ncols=3,nrows=1,figsize=[14,4])
cols = ['SECS_total','SECS_social','SECS_economic']
for coli,col in enumerate(cols):
    sns.scatterplot(data = ID_dat, x = 'IdeologyScale_1', y = col, ax = ax[coli],
                    s = 50, hue = 'ConservatismEconomic', edgecolor = 'k', hue_order = ic_options)
    hand,lab = ax[coli].get_legend_handles_labels()
    ax[coli].get_legend().remove()
ax[2].legend(hand,lab, loc = [1.1,0])
plt.tight_layout()

## SECS items specifically referenced in video stimuli

In [None]:
pc_options = ['Democrat: Strong', 'Democrat: Not very strong',
       'Independent: Leaning Democrat', 'Independent', 'Other',
       'Independent: Leaning Republican', 'Republican: Not very strong',
       'Republican: Strong']
pc_palette = sns.color_palette('RdBu_r',len(pc_options))
pc_palette[3] = sns.color_palette('Greens',1)[0]
pc_palette[4] = [.7,.7,.7]
sns.set_palette(pc_palette)

In [None]:
cols = ['SECS_1','SECS_6','SECS_3']
col_labels = ['Abortion', 'Gun ownership', 'Military and national security']
fig, ax = plt.subplots(ncols=3,nrows=1,figsize=[14,4])
for coli,col in enumerate(cols):
    sns.scatterplot(data = ID_dat, x = 'IdeologyScale_1', y = col, ax = ax[coli],
                    s = 50, hue = 'PartyChoice', edgecolor = 'k', hue_order = pc_options)
    hand,lab = ax[coli].get_legend_handles_labels()
    ax[coli].get_legend().remove()
    ax[coli].set(title = col_labels[coli], ylabel = 'Positivity of feeling toward issue\n(lab session)',
                xlabel = 'Self-reported conservatism\n(prescreening survey)')
ax[2].legend(hand,lab, loc = [1.1,0], title = 'Party choice in prescreening survey')
plt.tight_layout()

In [None]:
ic_options = ['Progressive', 'Liberal', 'Other', 'Moderate',
       'Libertarian', 'Conservative']
ic_palette = sns.color_palette('RdBu_r',len(ic_options))
ic_palette[2] = [.7,.7,.7]
ic_palette[3] = sns.color_palette('Greens',1)[0]
sns.set_palette(ic_palette)

In [None]:
cols = ['SECS_1','SECS_6','SECS_3']
col_labels = ['Abortion', 'Gun ownership', 'Military and national security']
fig, ax = plt.subplots(ncols=3,nrows=1,figsize=[14,4])
for coli,col in enumerate(cols):
    sns.scatterplot(data = ID_dat, x = 'IdeologyScale_1', y = col, ax = ax[coli],
                    s = 50, hue = 'IdentityChoice', edgecolor = 'k', hue_order = ic_options)
    hand,lab = ax[coli].get_legend_handles_labels()
    ax[coli].get_legend().remove()
    ax[coli].set(title = col_labels[coli], ylabel = 'Positivity of feeling toward issue\n(lab session)',
                xlabel = 'Self-reported conservatism\n(prescreening survey)')
ax[2].legend(hand,lab, loc = [1.1,0], title = 'Political identity choice\n(prescreening survey)')
plt.tight_layout()

In [None]:
ID_dat.query('SECS_6 < 10')[['SubID','IdeologyScale_1','SECS_6']]

In [None]:
ID_dat.query('SECS_1 > 90')[['SubID','IdeologyScale_1','SECS_1']]

So subject 34 is liberal on abortion and gun control.

## S-SVS

In [None]:
survey_basename = 'SSVS'
survey_dtype = int
survey_cols = []
for match in re.finditer(survey_basename,colnames):
    survey_cols.append(colnames[match.span()[0]:match.span()[1]+3].strip(','))
[print(survey_cols[i], end = '\t') for i in range(len(survey_cols))];

In [None]:
ID_dat[survey_cols].head()

In [None]:
sns.heatmap(np.corrcoef(ID_dat[survey_cols], rowvar=False), square = True,
           xticklabels = survey_cols, yticklabels = survey_cols, vmin = -1, vmax = 1)

In [None]:
from sklearn.decomposition import PCA
from matplotlib.colors import ListedColormap
RdBu_r_cmap = ListedColormap(sns.color_palette('RdBu_r',10).as_hex())

In [None]:
pca = PCA(n_components=2)
SSVS_reduced = pca.fit_transform(ID_dat[survey_cols])

In [None]:
sns.set(context = 'notebook', style = 'whitegrid', font = 'arial')
fig, ax  = plt.subplots(1,1,figsize=[6,5])
im = ax.scatter(x = SSVS_reduced[:,0], y = SSVS_reduced[:,1],
            c = ID_dat['IdeologyScale_1'],
               s = 100, cmap = RdBu_r_cmap, edgecolor = None,
           alpha = .6)
for i in range(len(SSVS_reduced)):
    ax.text(SSVS_reduced[i,0], SSVS_reduced[i,1],
         ID_dat.loc[i,'SubID'])
fig.colorbar(im, label = 'Conservatism')
ax.set_xlabel('PC1 values\nOpenness to change <--> Conservation');
ax.set_ylabel('PC2 values\nSelf-trancendence <--> Self-enhancement');
ax.set_title('First 2 principal components\n%.1f%% of SSVS variance explained'%(
    np.sum(pca.explained_variance_ratio_[0:2])*100));
plt.savefig(base_dir + '/Results/Individual_differences/SVSS_PCA.pdf',
           bbox_inches='tight', transparent = True)

In [None]:
plt.scatter(x = ID_dat['IdeologyScale_1'], y = SSVS_reduced[:,0],
            c = ID_dat['IdeologyScale_1'],
               s = 100, cmap = RdBu_r_cmap, edgecolor = None,
           alpha = .6)
for i in range(len(SSVS_reduced)):
    plt.text(ID_dat.loc[i,'IdeologyScale_1'], SSVS_reduced[i,0],
         ID_dat.loc[i,'SubID'])
plt.colorbar(label = 'Conservatism')
plt.xlabel('Conservatism')
plt.ylabel('PC1 values')
plt.title('First principal component\n%.1f%% of SSVS variance explained'%(
    pca.explained_variance_ratio_[0]*100));

In [None]:
dem_indices = ID_dat.query('IdeologyScale_1 < 50').index.tolist()
rep_indices = ID_dat.query('IdeologyScale_1 > 50').index.tolist()

In [None]:
sns.distplot(SSVS_reduced[dem_indices,0], 20, color = 'b',
        kde = True, rug=True)
sns.distplot(SSVS_reduced[rep_indices,0], 20, color = 'r',
        kde = True, rug = True)

In [None]:
sns.set_context('talk')
plt.bar(x = -0.25 + np.arange(1,11), height = pca.components_[0],
        width = .5, label = 'PC1')
plt.bar(x = 0.25 + np.arange(1,11), height = pca.components_[1],
        width = .5, label = 'PC2')
values_list = 'Power Achievement Hedonism Stimulation Self-direction Universalism Benevolence Tradition Conformism Security'.split(' ')
plt.xticks(np.arange(1,11), values_list, rotation = 90);
plt.legend(loc = 'lower right');
plt.savefig(base_dir + '/Results/Individual_differences/SVSS_PCA_loadings.pdf',
           bbox_inches='tight', transparent = True)

##### So it looks like the first component that the S-SVS picks up on is actually conservatism.

## RWA

In [None]:
subscale_cols = ['RWA_Aggression',
    'RWA_Submission','RWA_Conventionalism', 'RWA_total']
sns.heatmap(np.corrcoef(ID_dat[subscale_cols], rowvar=False),
            square = True, xticklabels = subscale_cols,
            yticklabels = subscale_cols, vmin = -1, vmax = 1)

In [None]:
sns.set(context = 'notebook', style = 'whitegrid', font = 'arial')
fig, ax  = plt.subplots(1,1,figsize=[6,5])
im = ax.scatter(x = ID_dat['IdeologyScale_1'],
                y = ID_dat['RWA_total'],
            c = ID_dat['IdeologyScale_1'],
               s = 100, cmap = RdBu_r_cmap, edgecolor = None,
           alpha = .6)
for sub in ID_dat['SubID'].unique():
    sub_dat = ID_dat.query('SubID == @sub').copy()
    ax.text(sub_dat['IdeologyScale_1'],
            sub_dat['RWA_total'],
         sub)
fig.colorbar(im, label = 'Conservatism')
ax.set_xlabel('Conservatism');
ax.set_ylabel('Right-wing authoritarianism');
ax.set(title = 'RWA')

## LWA

In [None]:
sns.set(context = 'notebook', style = 'whitegrid', font = 'arial')

subscale_cols = ['RWA_total','LWA_total']
sns.relplot(data = ID_dat, x = 'RWA_total', y = 'LWA_total')

In [None]:
sns.set(context = 'notebook', style = 'whitegrid', font = 'arial')
fig, ax  = plt.subplots(1,1,figsize=[6,5])
im = ax.scatter(x = ID_dat['IdeologyScale_1'],
                y = ID_dat['LWA_total'],
            c = ID_dat['IdeologyScale_1'],
               s = 100, cmap = RdBu_r_cmap, edgecolor = None,
           alpha = .6)
for sub in ID_dat['SubID'].unique():
    sub_dat = ID_dat.query('SubID == @sub').copy()
    ax.text(sub_dat['IdeologyScale_1'],
            sub_dat['LWA_total'],
         sub)
fig.colorbar(im, label = 'Conservatism')
ax.set_xlabel('Conservatism');
ax.set_ylabel('Left-wing authoritarianism');
ax.set(title = 'LWA')

## SDO-7(s)

In [None]:
sns.set(context = 'notebook', style = 'whitegrid', font = 'arial')
fig, ax  = plt.subplots(1,1,figsize=[6,5])
im = ax.scatter(x = ID_dat['IdeologyScale_1'],
                y = ID_dat['SDO_total'],
            c = ID_dat['IdeologyScale_1'],
               s = 100, cmap = RdBu_r_cmap, edgecolor = None,
           alpha = .6)
for sub in ID_dat['SubID'].unique():
    sub_dat = ID_dat.query('SubID == @sub').copy()
    ax.text(sub_dat['IdeologyScale_1'],
            sub_dat['SDO_total'],
         sub)
fig.colorbar(im, label = 'Conservatism')
ax.set_xlabel('Conservatism');
ax.set_ylabel('Social dominance orientation');
ax.set(title = 'SDO')

In [None]:
scipy.stats.pearsonr(ID_dat['IdeologyScale_1'], ID_dat['NFC_mean'])

## PCA on political survey items

Select data

In [None]:
base_names = ['SECS','SSVS','RWA','LWA','SDO']

In [None]:
poli_cols = []
for base_name in base_names:
    for match in re.finditer(base_name,colnames[:1000]):
        poli_cols.append(colnames[match.span()[0]:match.span()[1]+3].strip(','))

In [None]:
print(len(poli_cols))

In [None]:
exclude = [] #["34"]
pca_subs = ID_dat.loc[~ID_dat['SubID'].isin(exclude),'SubID'].values
pca_dat = ID_dat.loc[ID_dat['SubID'].isin(pca_subs),poli_cols].copy()
# pca_dat = ID_dat[poli_cols].copy()
pca_dat.shape

z-score measures

In [None]:
pca_dat = scipy.stats.zscore(pca_dat, axis = 0)

Run PCA

In [None]:
pca = PCA(n_components=10)
poli_cols_reduced = pca.fit_transform(pca_dat)

In [None]:
pca.explained_variance_ratio_

In [None]:
plt.bar(x = np.arange(1,1+len(pca.explained_variance_ratio_)), height = 100*pca.explained_variance_ratio_)
plt.xticks(np.arange(1,11));
plt.title('PCA explained variance per component');
plt.ylabel('%')
plt.xlabel('Component');

Plot first 2 components:

In [None]:
sns.set(context = 'notebook', style = 'whitegrid', font = 'arial')
fig, ax  = plt.subplots(1,1,figsize=[6,5])
im = ax.scatter(x = poli_cols_reduced[:,0], y = poli_cols_reduced[:,1],
            c = ID_dat.loc[ID_dat['SubID'].isin(pca_subs),'IdeologyScale_1'],
               s = 100, cmap = RdBu_r_cmap, edgecolor = None,
           alpha = .6)
for i in range(len(poli_cols_reduced)):
    ax.text(poli_cols_reduced[i,0], poli_cols_reduced[i,1],
         pca_subs[i])
fig.colorbar(im, label = 'Conservatism')
ax.set_xlabel('PC1 values');
ax.set_ylabel('PC2 values');
ax.set_title('First 2 principal components\n%.1f%% of political survey variance explained'%(
    np.sum(pca.explained_variance_ratio_[0:2])*100));
plt.savefig(base_dir + '/Results/Individual_differences/Political_surveys_PCA.pdf',
           bbox_inches='tight', transparent = True)

So 34 is clearly an outlier.

Plot first component only against conservatism self-report:

In [None]:
sns.set(context = 'notebook', font = 'arial', style = 'whitegrid')
fig, ax  = plt.subplots(1,1,figsize=[6,5])
im = ax.scatter(x = ID_dat['IdeologyScale_1'], y = poli_cols_reduced[:,0],
            c = ID_dat['IdeologyScale_1'],
               s = 100, cmap = RdBu_r_cmap, edgecolor = None,
           alpha = .6)
for i in range(len(poli_cols_reduced)):
    ax.text(ID_dat.loc[i,'IdeologyScale_1'], poli_cols_reduced[i,0],
         ID_dat.loc[i,'SubID'])
fig.colorbar(im, label = 'Conservatism')
ax.set_xlabel('Conservatism');
ax.set_ylabel('PC1 values');
ax.set_title('First principal component\n%.1f%% of political survey variance explained'%(
    np.sum(pca.explained_variance_ratio_[0])*100));
plt.savefig(base_dir + '/Results/Individual_differences/Political_surveys_PCA_PC1.pdf',
           bbox_inches='tight', transparent = True)

Plot spread of PC1 - looks like two means which is good:

In [None]:
sns.distplot(poli_cols_reduced[:,0], bins = 20)

Plot PC loadings onto survey items:

In [None]:
survey_colors = sns.color_palette('tab10',len(base_names))

In [None]:
sns.set(context = 'notebook', style = 'whitegrid', font = 'arial')
fig, ax = plt.subplots(ncols=1,nrows=3,figsize=[14,12])
for pci in [0,1,2]:
    label = 'PC%i'%(pci+1)
    weights = pca.components_[pci]
    start_x = 1
    for surveyi,survey_name in enumerate(base_names):
        item_indices = [i for i in range(len(poli_cols)) if survey_name in poli_cols[i]]
        ax[pci].bar(x = start_x + np.arange(len(item_indices)),
            height = pca.components_[pci][item_indices],
            width = .75,  label = survey_name, color = survey_colors[surveyi])
        start_x += len(item_indices)
    ax[pci].set(xlim = [0,start_x + 7], title = label)
    ax[pci].legend(loc = 'right');
values_list = 'Power Achievement Hedonism Stimulation Self-direction Universalism Benevolence Tradition Conformism Security'.split(' ')
SECS_length = len([i for i in range(len(poli_cols)) if 'SECS' in poli_cols[i]])
plt.xticks(np.arange(SECS_length+1,SECS_length+12), values_list, rotation = 90);
plt.savefig(base_dir + '/Results/Individual_differences/Political_surveys_PCA_loadings.pdf',
           bbox_inches='tight', transparent = True)

Comparing these loadings to the loadings of the SSVS only (scroll up a bit), it looks like PC1 is the 'conservatism' component which is also found in the SSVS data (see Lindeman & Verkasalo, 2005). PC2 corresponds highly to the 'self-trancendence' factor identified in the same paper. So overall just the SSVS may have done just as well at pulling out the main factors - or even just the responses on the prescreen.

##### Store components in ID_dat

In [None]:
ID_dat['PCA_comp1'] = poli_cols_reduced[:,0]
ID_dat['PCA_comp2'] = poli_cols_reduced[:,1]
ID_dat['PCA_comp3'] = poli_cols_reduced[:,2]

In [None]:
ID_dat.to_csv(data_dir +'/Cleaned/Surveys/ID_dat.csv')

In [None]:
[a for a in ID_dat.columns if 'IUS' in a]

##### Relationship between ideology and PCA component 1

In [None]:
tmp = ID_dat.loc[(ID_dat['SubID'] != '34'),:]
scipy.stats.pearsonr(tmp['IdeologyScale_1'],tmp['PCA_comp1'])

In [None]:
tmp.shape

##### How much of an outlier is sub 34?

In [None]:
tmp = ID_dat.loc[(ID_dat['IdeologyScale_1'] > 50),['SubID','PCA_comp1']]
tmp['PCA_comp1'] = scipy.stats.zscore(tmp['PCA_comp1'])
plt.hist(tmp['PCA_comp1']);
plt.xlabel('PCA conservatism component (z-scored)')
plt.show();
tmp.sort_values(by='PCA_comp1').head()

So again, subject 34 is clearly an outlier, as he is more than 3 standard deviations from the other 'conservatives' on the conservatism components of the PCA.

## Check: does IUS _multivariate_ similarity relate to ideology?

In [None]:
IUS_cols = [i for i in ID_dat.columns if 'IUS' in i][:-1]

In [None]:
RDMs = pd.DataFrame()
k=0
for sub1 in ID_dat['SubID'].unique():
    print(sub1, end = ',')
    for sub2 in ID_dat['SubID'].unique():
        if sub1 != sub2:
            sub1dat = ID_dat.query('SubID == @sub1').iloc[0,:]
            sub2dat = ID_dat.query('SubID == @sub2').iloc[0,:]
            to_append = pd.Series()
            to_append['SubID1'] = sub1
            to_append['SubID2'] = sub2
            # Ideology
            to_append['ideology_similarity'] = 100 - np.abs(sub1dat['IdeologyScale_1'] - sub2dat['IdeologyScale_1'])
            # Response with nans ignored
            dat1 = sub1dat[IUS_cols].values
            dat2 = sub2dat[IUS_cols].values
            nonan = np.where((~sub1dat[IUS_cols].isnull()) & (~sub2dat[IUS_cols].isnull()))[0]
            to_append['IUS_sim_multivariate'] = scipy.stats.pearsonr(dat1[nonan],dat2[nonan])[0]
            # Append
            RDMs = RDMs.append(pd.DataFrame(to_append).T).reset_index(drop=True)

In [None]:
# Drop duplicate pairs
RDMs['lowersub'] = np.min(RDMs[['SubID1','SubID2']],axis=1)
RDMs['highersub'] = np.max(RDMs[['SubID1','SubID2']],axis=1)

print(RDMs.shape)
RDMs.drop_duplicates(subset = ['lowersub','highersub'], inplace = True)
print(RDMs.shape)

In [None]:
RDMs = RDMs.query('SubID1 != "34"').query('SubID2 != "34"').copy()

In [None]:
plt.scatter(RDMs['ideology_similarity'],RDMs['IUS_sim_multivariate'])

In [None]:
corrdat = RDMs.dropna(subset = ['ideology_similarity','IUS_sim_multivariate'])
print('DF = %i'%(len(corrdat)-2))
scipy.stats.pearsonr(corrdat['ideology_similarity'],corrdat['IUS_sim_multivariate'])

So ideology does not capture similarity in answering on the IUS survey.

## Does IUS correlate with NFC (need for closure)?

In [None]:
sns.set(context = 'talk', style = 'whitegrid', font = 'arial')
tmp_dat = ID_dat.query('SubID != "34"').copy()
print(tmp_dat.shape)
plt.scatter(tmp_dat['IUS_sum'],tmp_dat['NFC_mean'])
stats = scipy.stats.pearsonr(tmp_dat['IUS_sum'],tmp_dat['NFC_mean'])
plt.show()
print(stats)

In [None]:
sns.set(context = 'talk', style = 'whitegrid', font = 'arial')
fig,ax = plt.subplots(1,1, figsize = [6,6])
sns.regplot(data = tmp_dat, x = 'IUS_sum', y = 'NFC_mean', color = 'k', ax = ax,
           scatter_kws = {'color':'none', 'lw':2, 'edgecolor':'k'},
           line_kws = {'lw':2})
ax.set(title = 'r(%i) = %.3f, p = %.4f'%(tmp_dat.shape[0]-2, stats[0], stats[1]),
      xlabel = 'Intolerance of uncertainty (IUS)', ylabel = 'Need for closure (NFC)',
#       xlim = [27-1,5*27+1], ylim = [1,6]
      )

## IUS, NFC, IRI

In [None]:
cols = ['IUS_sum','NFC_mean','IRI_total']
sns.heatmap(ID_dat[cols].corr(), square = True,
           xticklabels = cols, yticklabels = cols, vmin = -1, vmax = 1)

In [None]:
annot_dat = plot_dat.copy().replace({1:np.nan})
annot_dat[annot_dat<0.45] = np.nan
annot_dat

In [None]:
base_dir

In [None]:
cols = ['IUS_sum','NFC_mean','IRI_total','SDO_total']
fancynames = ['IUS','NFC','IRI','SDO']
sns.set(context = 'talk', style = 'whitegrid', font = 'arial')
# fig,ax = plt.subplots(1,1, figsize = [6,6])
tmp_dat = ID_dat.query('SubID != "34"').copy()
tmp_dat = tmp_dat.rename(columns = dict(zip(cols,fancynames)))
plot_dat = tmp_dat[fancynames].corr()
annot_dat = plot_dat.copy()
for i in range(annot_dat.shape[0]):
    for j in range(annot_dat.shape[1]):
        val = annot_dat.iloc[i,j]
        pstr = '*' if (val < 0.55) & (val > 0.45) else ''
        annot_dat.iloc[i,j] = '%.2f%s'%(annot_dat.iloc[i,j],pstr)
# annot_dat[annot_dat<0.45] = 1
# annot_dat[annot_dat<0.55] = '0.45*'
# annot_dat = annot_dat.replace({1:''})
# annot_dat[annot_dat>0.45] = '0.45*'
sns.heatmap(plot_dat, annot = annot_dat, square = True, vmin = -1, vmax = 1, cmap = "RdBu", fmt = '',
            linewidths = 1, linecolor='w', cbar_kws = {'label':'Pearson\'s r'})
plt.savefig(base_dir + '/Results/Individual_differences/ID_corrs.pdf',
            bbox_inches='tight', transparent = True)

for cola in cols:
    for colb in cols:
        if cola is not colb:
            tmp_dat = ID_dat.query('SubID != "34"').copy()[[cola,colb]].dropna()
            stats = scipy.stats.pearsonr(tmp_dat[cola],tmp_dat[colb])
            sigmark = '*' if stats[1] < 0.05 else ''
            print('%s and %s: r = %.3f, p = %.4f%s'%(cola,colb,stats[0],stats[1],sigmark))

In [None]:
sns.set(context = 'talk', style = 'whitegrid', font = 'arial')
fig,ax = plt.subplots(1,1, figsize = [6,6])
sns.regplot(data = tmp_dat, x = 'IUS_sum', y = 'IRI_total', color = 'k', ax = ax,
           scatter_kws = {'color':'none', 'lw':2, 'edgecolor':'k'},
           line_kws = {'lw':2})
tmp_dat = ID_dat.query('SubID != "34"').copy()[['IUS_sum','IRI_total']].dropna()
stats = scipy.stats.pearsonr(tmp_dat['IUS_sum'],tmp_dat['IRI_total'])
ax.set(
    title = 'r(%i) = %.3f, p = %.4f'%(tmp_dat.shape[0]-2, stats[0], stats[1]),
      xlabel = 'Intolerance of uncertainty (IUS)', ylabel = 'Interpersonal Reactivity Index (IRI)',
#       xlim = [27-1,5*27+1], ylim = [1,6]
      )