This script loads and parses video judgment survey data.

In [None]:
import os
import numpy as np
import pandas as pd
import scipy as sp
import scipy.spatial
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
import re

## Load data

In [None]:
base_dir = os.path.realpath('../..')
print(base_dir)
data_dir = base_dir + '/data'

In [None]:
CJ_dat = pd.read_csv(data_dir + '/Raw/Surveys/' +
     'DUMP_Political_Polarization_Comprehension-Judgment_October+30%2C+2019_18.15.csv',
    dtype = {'SubID':str})
CJ_dat = CJ_dat.iloc[2:,:] # Remove extra header rows

In [None]:
CJ_dat['SubID'].head()

## Clean data

##### Remove responses by video raters and empty lines

In [None]:
CJ_dat = CJ_dat[pd.to_numeric(CJ_dat['SubID'], errors = 'coerce').notnull()].reset_index(drop=True)

In [None]:
CJ_dat['SubID'] = pd.to_numeric(CJ_dat['SubID'])

In [None]:
CJ_dat = CJ_dat.query('SubID < 90')

In [None]:
exclusions = [1,3]
CJ_dat = CJ_dat.loc[~CJ_dat['SubID'].isin(exclusions),:].reset_index(drop=True)

In [None]:
CJ_dat['SubID'].unique()

In [None]:
CJ_dat['SubID'].head()

##### Remove columns of no interest, including comprehension

In [None]:
CJ_dat = CJ_dat.iloc[:,pd.np.r_[17,39:CJ_dat.shape[1]]].reset_index(drop=True).copy()

In [None]:
CJ_dat.head()

## Explore data

In [None]:
colnames = ','.join(CJ_dat.columns)
colnames

##### Parse Likert scale agree/disagree items:

In [None]:
questions_Likert = 'StatementsAbortion_1,StatementsAbortion_2,StatementsAbortion_3,StatementsAbortion_4,StatementsPolice_1,StatementsPolice_2,StatementsImmigratio_1,StatementsImmigratio_2,StatementsFaith_1,StatementsFaith_2,StatementsOverall_1,StatementsOverall_2'.split(',')
questions_Likert

In [None]:
Likert_mapping = {'Strongly disagree':1,
                  'Disagree':2,
                  'Somewhat disagree':3,
                  'Neither agree nor disagree':4,
                  'Somewhat agree':5,
                  'Agree':6,
                  'Strongly agree':7}

In [None]:
CJ_dat[questions_Likert] = CJ_dat[questions_Likert].replace(Likert_mapping)

In [None]:
CJ_dat[questions_Likert].head()

##### Parse intention items

In [None]:
questions_intention = 'IntentionsPenceCop_1,IntentionsKaineMothe_1,IntentionsKaineGuns_1,IntentionsPenceCity_1'.split(',')
questions_intention

In [None]:
CJ_dat[questions_intention] = CJ_dat[questions_intention].astype(int)

In [None]:
CJ_dat[['IntentionsPenceCop_1','IntentionsKaineMothe_1']] = (
    - CJ_dat[['IntentionsPenceCop_1','IntentionsKaineMothe_1']])

In [None]:
CJ_dat[questions_intention].head()

##### Parse affiliation questions

In [None]:
questions_affiliation = 'LikePoliticians_1,LikePoliticians_2,LikePoliticians_3,LikePoliticians_4'.split(',')
questions_affiliation

In [None]:
CJ_dat[questions_affiliation] = CJ_dat[questions_affiliation].astype(int)

In [None]:
questions_rename = ['LikeKaine','LikePence','LikeTrump','LikeClinton']

In [None]:
rename_dict = dict(zip(questions_affiliation,
                        questions_rename))

In [None]:
CJ_dat = CJ_dat.rename(
    columns = rename_dict)

In [None]:
CJ_dat.columns

In [None]:
CJ_dat[questions_rename].head()

##### Parse bias questions

In [None]:
questions_bias = 'DebateQuestionBias,DebateVideoBias'.split(',')
questions_bias

In [None]:
bias_mapping = {'Strongly biased in favor of Tim Kaine':-3,
                'Biased in favor of Tim Kaine':-2,
                'Somewhat biased in favor of Tim Kaine':-1,
                'Neutral':0,
                'Somewhat biased in favor of Mike Pence':1,
                'Biased in favor of Mike Pence':2,
                'Strongly biased in favor of Mike Pence':3}

In [None]:
CJ_dat[questions_bias] = CJ_dat[questions_bias].replace(bias_mapping)

In [None]:
CJ_dat[questions_bias].head()

##### Parse personal judgment of debate

In [None]:
question_agree = 'DebateAgreed'
question_win = 'DebateWon'

In [None]:
agree_mapping = {'Agreed entirely with Tim Kaine':-3,
                 'Agreed a lot more with Tim Kaine':-2,
                 'Agreed somewhat more with Tim Kaine':-1,
                 'Agreed with both about equally':0,
                 'Agreed somewhat more with Mike Pence':1,
                 'Agreed a lot more with Mike Pence':2,
                 'Agreed entirely with Mike Pence':3}

In [None]:
CJ_dat[question_agree] = CJ_dat[question_agree].replace(agree_mapping)

In [None]:
win_mapping = {'Total win for Tim Kaine':-3,
               'Large win for Tim Kaine':-2,
               'Slight win for Tim Kaine':-1,
               'It was a tie':0,
               'Slight win for Mike Pence':1,
               'Large win for Mike Pence':2,
               'Total win for Mike Pence':3}

In [None]:
CJ_dat[question_win] = CJ_dat[question_win].replace(win_mapping)

In [None]:
CJ_dat[[question_agree,question_win]].head()

##### Check out some free responses

In [None]:
[print(CJ_dat.loc[i,'SubID'], CJ_dat['FreeResponse'].iloc[i]) for i in range(len(CJ_dat))];

## Store

In [None]:
CJ_dat.to_csv(data_dir + '/Cleaned/Surveys/Judgment_dat.csv', index=True)