In [None]:
import os
import numpy as np
import pandas as pd
import scipy as sp
import scipy.spatial
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
import re

## Directories

In [None]:
base_dir = os.path.realpath('../..')
print(base_dir)
data_dir = base_dir + '/data'

## Subjects

In [None]:
all_subs = pd.read_csv(base_dir + '/Data/Subjects_and_exclusions/all_subjects.csv')['sub'].values.tolist()
ID_dat = pd.read_csv(data_dir +'/Cleaned/Surveys/ID_dat.csv',index_col=0)

subIDs = {}
for vid in [2,3]:
    exclude = pd.read_csv(base_dir + '/Data/Subjects_and_exclusions/exclude_video-watching_aggregate_run-%i.csv'%vid
                         )['sub'].values.tolist()
    subs_keep = [i for i in all_subs if i not in exclude]
    tmp = ID_dat.loc[ID_dat['SubID'].isin(subs_keep),:].copy()
    libs = tmp.query('IdeologyScale_1 < 50')['SubID'].unique()
    cons = tmp.query('IdeologyScale_1 > 50')['SubID'].unique()
    subIDs['vid_%i'%vid] = {}
    subIDs['vid_%i'%vid]['libs'] = libs
    subIDs['vid_%i'%vid]['cons'] = cons

## Load data

In [None]:
judgment_dat = pd.read_csv(data_dir + '/Cleaned/Surveys/Judgment_dat.csv',
                          index_col = 0)

In [None]:
len(judgment_dat['SubID'].unique())

It's OK that we don't exclude subjects here, as we want to compute all possible pairwise judgment similarity scores. Subject exclusions differ per video. We will therefore exclude the relevant subjects at the analysis stage in R, and at each distinct plot below.

## Select data columns

In [None]:
[print(i, end=',') for i in judgment_dat.columns];

In [None]:
video_2_cols = 'StatementsAbortion_1,StatementsAbortion_2,StatementsAbortion_3,StatementsAbortion_4'.split(',')
video_2_cols

In [None]:
video_3_cols = 'StatementsPolice_1,StatementsPolice_2,StatementsImmigratio_1,StatementsImmigratio_2,StatementsFaith_1,StatementsFaith_2,StatementsOverall_1,StatementsOverall_2,IntentionsPenceCop_1,IntentionsKaineMothe_1,IntentionsKaineGuns_1,IntentionsPenceCity_1,LikeKaine,LikePence,LikeTrump,LikeClinton,DebateQuestionBias,DebateAgreed,DebateWon'.split(',')
video_3_cols

## Plotting function

In [None]:
def plot_Likert_responses(judgment_dat, data_cols, question_labels,
                          libs, cons,
                          Likert_range = [1,7], Likert_label = 'Agreement',
                          ncols = 3, palette = sns.color_palette('RdBu_r',9)):
    
    nrows = int(np.ceil(len(data_cols)/ncols))
    fig,axes = plt.subplots(nrows = nrows, ncols = ncols,
                        figsize = [ncols*4, nrows*4], sharey = False, sharex = False)

    for ci,col in enumerate(data_cols):
        ax = axes.ravel()[ci]
        # Liberals
        sns.distplot(judgment_dat.loc[judgment_dat['SubID'].isin(libs),col],
            bins = np.arange(Likert_range[0],Likert_range[1]+2)-.5,
                     color = palette[0], ax = ax,
                    label = 'Liberals')
        # Conservatives
        sns.distplot(judgment_dat.loc[judgment_dat['SubID'].isin(cons),col],
            bins = np.arange(Likert_range[0],Likert_range[1]+2)-.5,
                     color = palette[-1], ax = ax,
                    label = 'Conservatives')
        ax.set(xticks = np.arange(Likert_range[0],Likert_range[1]+1),
               xlim = [Likert_range[0]-1,Likert_range[1]+1], xlabel = Likert_label,
            title = question_labels[ci], ylabel = 'Frequency',
               ylim = [0,1], yticks = np.arange(0,1.01,.1),
              )
        ax.set_yticklabels(
            ['0%', '10%', '20%', '30%', '40%', '50%', '60%', '70%', '80%', '90%', '100%'])
        ax.legend()
    plt.tight_layout()
    
    return fig

In [None]:
out_dir = base_dir + '/Results/Video_judgment'

## Plot judgment results video 2

In [None]:
data_cols = video_2_cols
libscons = subIDs['vid_2']

question_labels = ['Northam legal infanticide',
                   '3rd trimester exceptional',
                   'Abortion for any reason',
                   'Political correctness > truth']

fig = plot_Likert_responses(judgment_dat, data_cols, question_labels, libscons['libs'], libscons['cons'])

category_name = 'News_abortion_agreement'
fig.savefig(out_dir + '/%s.pdf'%category_name, bbox_inches = 'tight', transparent = True)

## Plot judgment results video 3

In [None]:
video_3_cols

##### Agreement questions 

In [None]:
data_cols = [video_3_cols[i] for i in [0,1,2,3,6,7]]
libscons = subIDs['vid_3']

data_cols

question_labels = ['Kaine: OK to bring up issues',
                   'Pence: Force for good',
                   'Kaine: Trump plan = Deportation nation',
                   'Pence: Clinton plan = Amnesty',
                   'Kaine: Insult-driven campaign',
                   'Pence: Small potatoes']

fig = plot_Likert_responses(judgment_dat, data_cols, question_labels, libscons['libs'], libscons['cons'])

category_name = 'Debate_agreement'
fig.savefig(out_dir + '/%s.pdf'%category_name, bbox_inches = 'tight', transparent = True)

##### Intentions questions

In [None]:
data_cols = [video_3_cols[i] for i in [8,9,10,11]]
data_cols

question_labels = ['Pence: Uncle cop',
                   'Kaine: Wife & mother',
                   'Kaine: Second amendment',
                   'Pence: Inner cities']

libscons = subIDs['vid_3']
fig = plot_Likert_responses(judgment_dat, data_cols, question_labels,  libscons['libs'], libscons['cons'],
                           Likert_range=[-5,5], Likert_label='Sincerity')

category_name = 'Debate_sincerity'
fig.savefig(out_dir + '/%s.pdf'%category_name, bbox_inches = 'tight', transparent = True)

##### Liking politicians

In [None]:
data_cols = [video_3_cols[i] for i in [12,13,14,15]]
data_cols

question_labels = ['Tim Kaine',
                   'Mike Pence',
                   'Donald Trump',
                   'Hillary Clinton']

libscons = subIDs['vid_3']
fig = plot_Likert_responses(judgment_dat, data_cols, question_labels, libscons['libs'], libscons['cons'],
                           Likert_range=[-5,5], Likert_label='<- Dislike   ...      Like ->')

category_name = 'Liking_politicians'
fig.savefig(out_dir + '/%s.pdf'%category_name, bbox_inches = 'tight', transparent = True)

##### Personal judgment of debate

In [None]:
data_cols = 'DebateQuestionBias,DebateAgreed,DebateWon'.split(',')
data_cols

question_labels = ['Debate question bias',
                   'I agreed more with...',
                   'The debate winner was...']

libscons = subIDs['vid_3']
fig = plot_Likert_responses(judgment_dat, data_cols, question_labels, libscons['libs'], libscons['cons'],
                            Likert_range=[-3,3],
                            Likert_label='<- Tim Kaine ... Mike Pence ->')
category_name = 'Debate_overall_judgment'
fig.savefig(out_dir + '/%s.pdf'%category_name, bbox_inches = 'tight', transparent = True)

## Video bias

In [None]:
data_cols = 'DebateVideoBias'.split(',')
data_cols

question_labels = ['Video selection bias']

tmplibs = np.intersect1d(subIDs['vid_2']['libs'], subIDs['vid_3']['libs'])
tmpcons = np.intersect1d(subIDs['vid_2']['cons'], subIDs['vid_3']['cons'])
fig = plot_Likert_responses(judgment_dat, data_cols, question_labels, libscons['libs'], libscons['cons'],
                            Likert_range=[-3,3],
                            Likert_label='<- Towards Kaine ... Towards Pence ->')

category_name = 'Video_selection_bias'
fig.savefig(out_dir + '/%s.pdf'%category_name, bbox_inches = 'tight', transparent = True)

## Inter-subject cosine similarity on responses per category

##### First recode agreement to be negative-to-positive

In [None]:
recode = ['StatementsAbortion_1', 'StatementsAbortion_2', 'StatementsAbortion_3', 'StatementsAbortion_4',
          'StatementsPolice_1', 'StatementsPolice_2',
          'StatementsImmigratio_1', 'StatementsImmigratio_2',
          'StatementsOverall_1', 'StatementsOverall_2']
for r in recode:
    judgment_dat[r] = judgment_dat[r] - 4
# judgment_dat[['StatementsPolice_1','StatementsPolice_2','StatementsImmigratio_1',
#                            'StatementsImmigratio_2','StatementsOverall_1','StatementsOverall_2']] = (
#     judgment_dat[['StatementsPolice_1','StatementsPolice_2','StatementsImmigratio_1',
#                            'StatementsImmigratio_2','StatementsOverall_1','StatementsOverall_2']] - 4)

In [None]:
# Data cols for each category
categories = {'agreement_v2':['StatementsAbortion_1', 'StatementsAbortion_2',
                              'StatementsAbortion_3', 'StatementsAbortion_4'],
              'agreement_v3':['StatementsPolice_1','StatementsPolice_2','StatementsImmigratio_1',
                           'StatementsImmigratio_2','StatementsOverall_1','StatementsOverall_2'],
              'intentions_v3':['IntentionsPenceCop_1','IntentionsKaineMothe_1',
                            'IntentionsKaineGuns_1','IntentionsPenceCity_1'],
              'likingpoliticians':['LikeKaine','LikePence','LikeTrump','LikeClinton'],
              'debate_v3':['DebateQuestionBias','DebateAgreed','DebateWon']
              }

# Loop over categories
all_data_cols = []
all_judgment_ISC = pd.DataFrame()
for ci,cat_name in enumerate(categories.keys()):
    print(cat_name)
    data_cols = categories[cat_name]
    all_data_cols.extend(data_cols)
    ISC = (pd.DataFrame(1 - scipy.spatial.distance.squareform(
                        scipy.spatial.distance.pdist(judgment_dat[data_cols].values, metric = 'cosine')),
                        columns = judgment_dat['SubID'], index = judgment_dat['SubID'])
                        .reset_index().melt(id_vars = 'SubID', var_name = 'SubID2')
                        .rename(columns = {'SubID':'SubID1','value':'%s_sim'%cat_name})
                        .sort_values(by=['SubID1','SubID2']).reset_index(drop=True))
    if ci == 0:
        all_judgment_ISC = ISC.copy()
    else:
        all_judgment_ISC = all_judgment_ISC.merge(ISC, on = ['SubID1','SubID2'])

# # Compute video 3 composite judgment similarity score
vid3_data_cols = categories['agreement_v3'] + categories['intentions_v3'] + categories['debate_v3']
vid3_composite_ISC = (pd.DataFrame(1 - scipy.spatial.distance.squareform(
                        scipy.spatial.distance.pdist(judgment_dat[vid3_data_cols].values, metric = 'cosine')),
                        columns = judgment_dat['SubID'], index = judgment_dat['SubID'])
                        .reset_index().melt(id_vars = 'SubID', var_name = 'SubID2')
                        .rename(columns = {'SubID':'SubID1','value':'vid3_composite_judgment_sim'})
                        .sort_values(by=['SubID1','SubID2']).reset_index(drop=True))

# Compute overall judgment similarity
overall_ISC = (pd.DataFrame(1 - scipy.spatial.distance.squareform(
                        scipy.spatial.distance.pdist(judgment_dat[all_data_cols].values, metric = 'cosine')),
                        columns = judgment_dat['SubID'], index = judgment_dat['SubID'])
                        .reset_index().melt(id_vars = 'SubID', var_name = 'SubID2')
                        .rename(columns = {'SubID':'SubID1','value':'overall_judgment_sim'})
                        .sort_values(by=['SubID1','SubID2']).reset_index(drop=True))
all_judgment_ISC = all_judgment_ISC.merge(vid3_composite_ISC, on = ['SubID1','SubID2']).merge(overall_ISC, on = ['SubID1','SubID2'])
all_judgment_ISC.head()

In [None]:
sns.distplot(all_judgment_ISC.agreement_v3_sim.values)

In [None]:
all_judgment_ISC.to_csv(out_dir + '/all_judgment_similarity.csv', index=False)

In [None]:
sns.pairplot(data = all_judgment_ISC.query('agreement_v3_sim != 0').iloc[:,2:], kind = 'reg',
             plot_kws = {'scatter_kws':{'alpha':.1}})

## Compare to ideology similarity

In [None]:
ideo_ISC = pd.DataFrame()
for s1i,sub1 in enumerate(ID_dat['SubID']):
    for s2i,sub2 in enumerate(ID_dat['SubID']):
        ideo1 = ID_dat.query('SubID == @sub1')['IdeologyScale_1'].iloc[0]
        ideo2 = ID_dat.query('SubID == @sub2')['IdeologyScale_1'].iloc[0]
        ideosim = 100 - np.abs(ideo1 - ideo2)
        ideo_ISC = ideo_ISC.append(pd.DataFrame([[sub1,sub2,ideosim]],
                                                columns = ['SubID1','SubID2','ideology_sim'])).reset_index(drop=True)

In [None]:
plot_dat = all_judgment_ISC.merge(ideo_ISC, on = ['SubID1','SubID2'])
plot_dat['same'] = plot_dat.apply((lambda x: x['SubID1'] == x['SubID2']),axis=1)# x['Sub1'] == x['Sub2']),)
plot_dat = plot_dat.query('same == False')
plot_dat.head()

In [None]:
sns.pairplot(data = plot_dat.iloc[:,2:-1], kind = 'reg',
             plot_kws = {'scatter_kws':{'alpha':.1}})