# 11-experiment-three-cogsci

## Integrity

In [84]:
# Analytical Tools
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

# General Utilities
import itertools
import json
import pprint

# Custom Utilities
import utilities.plotting as plotting
import utilities.describe as describe

# Some settings
log = pprint.pprint
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
pd.options.mode.chained_assignment = None

### Reading Data

In [85]:
GROUPS = 3
QUESTIONS = 4
JUDGMENTS = 4
SCORE_THRESHOLD = 240.9

FILE_NAMES = [
    'data/raw.json',
]

master_responses = []
for name in FILE_NAMES:
    with open(name) as file:
        master_responses.extend(json.loads(line) for line in file if line)

In [86]:
def _get_group(score):
    if not score:
        return 1
    if score < SCORE_THRESHOLD:
        return 0
    return 2

### Parsing Data

In [87]:
data = {
    'id': [],
    'consent': [],
    'attention': []
}

for g_num in range(GROUPS):
    for q_num in range(QUESTIONS):
        data['g{}_q{}_score'.format(g_num, q_num)] = []
        data['g{}_q{}_index'.format(g_num, q_num)] = []
        for j_num in range(JUDGMENTS):
            data['g{}_q{}_j{}'.format(g_num, q_num, j_num)] = []

In [88]:
ANSWER_KEYS = ['Q0', 'Q1', 'Q2', 'Q3']

for person in master_responses:
    # Grab data & ID
    person_data = person['data']  
    data['id'].append(person_data[0]['participantID'])
    
    # Fill in question labels
    question_scores = person_data[0]['questionScores']
    question_indices = person_data[0]['questionIndices']
    
    counters = [0, 0, 0]
    question_labels = []
    for score in question_scores:
        g_num = _get_group(score)
        question_labels.append('g{}_q{}'.format(g_num, counters[g_num]))
        counters[g_num] += 1
    
    for q_label, score in zip(question_labels, question_scores):
        data['{}_score'.format(q_label)].append(score)
    for q_label, index in zip(question_labels, question_indices):
        data['{}_index'.format(q_label)].append(index)
    
    # Get consent response
    consent_answer = json.loads(person_data[0]['responses'])
    consent_value = int(consent_answer['Q0'].startswith('I consent'))
    data['consent'].append(consent_value)

    # Fill in judgments
    judgment_indices = person_data[0]['judgmentIndices']
    
    for q_label, likert_index in zip(question_labels, range(2, 14)):
        answer = json.loads(person_data[likert_index]['responses'])
        for j_label, key in zip(judgment_indices, ANSWER_KEYS):
            rating = int(answer[key])
            data['{}_j{}'.format(q_label, j_label)].append(rating)

    # Attention check
    attention_answer = json.loads(person_data[8]['responses'])
    attention_value = int(attention_answer['Q4'] == '6')
    data['attention'].append(attention_value)

### Reading Responses into Data

In [89]:
data = pd.DataFrame(data)
sample_size = len(data)
data.head()

Unnamed: 0,id,consent,attention,g0_q0_score,g0_q0_index,g0_q0_j0,g0_q0_j1,g0_q0_j2,g0_q0_j3,g0_q1_score,...,g2_q2_j0,g2_q2_j1,g2_q2_j2,g2_q2_j3,g2_q3_score,g2_q3_index,g2_q3_j0,g2_q3_j1,g2_q3_j2,g2_q3_j3
0,c3l44ptjm8gwybzu7l50glec089buutn,1,1,25,20,4,4,5,3,33,...,3,4,4,6,2404,14,4,3,5,3
1,ssbqs437srxmda94319o8dj70afva4zo,1,1,33,22,5,2,3,2,18,...,1,3,2,2,2404,38,4,2,2,2
2,lrrjk5ftnpf2hhab51ztzrafocpt0ah3,1,1,14,37,5,1,5,4,48,...,3,0,1,1,2385,17,4,2,3,3
3,zak0ozs6f2cxxeulwryc2puypfsk4coe,1,1,31,8,1,2,1,1,11,...,1,3,1,1,2407,7,1,2,1,1
4,pkgqdho721m55cg70rxef9fqkr03pg5q,1,1,24,0,4,0,4,3,32,...,3,0,2,2,2395,13,4,0,4,2


In [90]:
print(len(data), data.size)
data = data[data.consent == 1]
print(len(data), data.size)
data = data[data.attention == 1]
print(len(data), data.size)

print('Inclusion: {:.4}% '.format(len(data) / sample_size * 100))

80 6000
80 6000
77 5775
Inclusion: 96.25% 


### Saving Data

In [91]:
data.to_pickle('data/responses.pkl')