In [None]:
%matplotlib inline

proj_path = "/Users/andrew/Desktop/projects/hidden_singles/"

In [None]:
src_path = proj_path + "python"

import sys
sys.path.append(src_path)

import pandas as pd
from hiddensingles.experiment.data_wrangler import *
from hiddensingles.misc import utils

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 300)

In [None]:
# Load and wrangle the data

raw_data = load_raw_data(proj_path + 'data/raw/round1')

dtest_results = diagnostic_test_results(raw_data)
results = get_puzzle_results(raw_data)
questionnaire_responses = get_questionnaire_responses(raw_data)
response_types = get_response_types(raw_data, results)
tutorial_house = get_tutorial_house(results)
survey_responses = get_survey_responses(raw_data)

In [None]:
def make_df_trial_puzzle(trial):
    rows = []
    rows.append({'type': 'target',
                 'key': 'goal',
                 'row': trial['coordinates']['goal']['x'],
                 'column': trial['coordinates']['goal']['y'],
                 'number': trial['digits']['target']})
    rows.append({'type': 'target',
                 'key': 'targetSingle',
                 'row': trial['coordinates']['targetSingle']['x'],
                 'column': trial['coordinates']['targetSingle']['y'],
                 'number': trial['digits']['target']})
    rows.append({'type': 'target',
                 'key': 'targetDouble',
                 'row': trial['coordinates']['targetDouble']['x'],
                 'column': trial['coordinates']['targetDouble']['y'],
                 'number': trial['digits']['target']})
    rows.append({'type': 'target',
                 'key': 'targetBox',
                 'row': trial['coordinates']['targetBox']['x'],
                 'column': trial['coordinates']['targetBox']['y'],
                 'number': trial['digits']['target']})
    rows.append({'type': 'distractor',
                 'key': 'distractorSingle',
                 'row': trial['coordinates']['distractorSingle']['x'],
                 'column': trial['coordinates']['distractorSingle']['y'],
                 'number': trial['digits']['distractor']})
    rows.append({'type': 'distractor',
                 'key': 'distractorDouble',
                 'row': trial['coordinates']['distractorDouble']['x'],
                 'column': trial['coordinates']['distractorDouble']['y'],
                 'number': trial['digits']['distractor']})
    rows.append({'type': 'distractor',
                 'key': 'distractorBox',
                 'row': trial['coordinates']['distractorBox']['x'],
                 'column': trial['coordinates']['distractorBox']['y'],
                 'number': trial['digits']['distractor']})
    for i in range(3):
        rows.append({'type': 'inhouse',
                     'key': f'inhouse{i}',
                     'row': trial['coordinates']['occupied'][i]['x'],
                     'column': trial['coordinates']['occupied'][i]['y'],
                     'number': trial['digits']['occupied'][i]})
    rows.append({'type': 'empty',
                 'key': 'emptySingle',
                 'row': trial['coordinates']['emptySingle']['x'],
                 'column': trial['coordinates']['emptySingle']['y'],
                 'number': 0})
    rows.append({'type': 'empty',
                 'key': 'emptyDouble',
                 'row': trial['coordinates']['emptyDouble']['x'],
                 'column': trial['coordinates']['emptyDouble']['y'],
                 'number': 0})
    for i in range(3):
        rows.append({'type': 'empty',
                     'key': f'emptyBox{i}',
                     'row': trial['coordinates']['emptyBox'][i]['x'],
                     'column': trial['coordinates']['emptyBox'][i]['y'],
                     'number': 0})
    df = pd.DataFrame(rows)
    df.row = df.row + 1
    df.column = df.column + 1
    return df


def make_df_subject_puzzles(subject_id, data):
    df_q = make_df_trial_puzzle(data['experimentDetails']['questionnaire'])
    df_q['phase'] = 'questionnaire'
    df_q['trial'] = 1
    df_puzzles = [df_q]
    
    for i, trial in enumerate(data['experimentDetails']['phase1'], 1):
        df = make_df_trial_puzzle(trial)
        df['phase'] = 'practice'
        df['trial'] = i
        df_puzzles.append(df)
        
    for i, trial in enumerate(data['experimentDetails']['phase2'], 1):
        df = make_df_trial_puzzle(trial)
        df['phase'] = 'test'
        df['trial'] = i
        df_puzzles.append(df)
    
    df_puzzles = pd.concat(df_puzzles)
    df_puzzles['subject_id'] = subject_id
    return df_puzzles

In [None]:
# Combine the dataframes into just subject data and trial data

subject_data = pd.merge(dtest_results, survey_responses, on='subject_id', how='outer')
subject_data = pd.merge(subject_data, questionnaire_responses, on='subject_id', how='outer')
subject_data = pd.merge(subject_data, tutorial_house, on='subject_id', how='outer')
subject_data['sid_hash'] = utils.short_hash([d['worker_id'] for d in raw_data.values()], 6)
trial_data = pd.merge(results, response_types, on=['subject_id', 'phase', 'trial'])

In [None]:
# Saves data about individual puzzles
df = subject_data[subject_data.subject_id.isin(set(trial_data.subject_id))][['subject_id', 'sid_hash']]
df_puzzles = []
for sid, sid_hash in tqdm(df.values):
    dfp = make_df_subject_puzzles(sid, raw_data[sid])
    dfp['sid_hash'] = sid_hash
    df_puzzles.append(dfp)
df_puzzles = pd.concat(df_puzzles)

In [None]:
# Saves data about each puzzle's house type
df = subject_data[subject_data.subject_id.isin(set(trial_data.subject_id))][['subject_id', 'sid_hash']]
rows = []
for sid, sid_hash in zip(df.subject_id, df.sid_hash):
    trial = raw_data[sid]['experimentDetails']['questionnaire']
    
    rows.append({'subject_id': sid,
                 'sid_hash': sid_hash,
                 'phase': 'questionnaire', 
                 'trial': 1,
                 'house_type': trial['houseType']})
    for i, trial in enumerate(raw_data[sid]['experimentDetails']['phase1'], 1):
        ht = trial['houseType']
        rows.append({'subject_id': sid,
                     'sid_hash': sid_hash,
                     'phase': 'practice', 
                     'trial': i,
                     'house_type': trial['houseType']})
    for i, trial in enumerate(raw_data[sid]['experimentDetails']['phase2'], 1):
        rows.append({'subject_id': sid,
                     'sid_hash': sid_hash,
                     'phase': 'test', 
                     'trial': i,
                     'house_type': trial['houseType']})
df_house_types = pd.DataFrame(rows)

In [None]:
# Save the dataframes as TSV

path = proj_path + 'data/processed/pre_R/'
utils.mkdir(path)
subject_data.to_csv(path + 'subjects.tsv', sep='\t', index=False)
trial_data.to_csv(path + 'trials.tsv', sep='\t', index=False)
df_puzzles.to_csv(path + 'puzzles.tsv', sep='\t', index=False)
df_house_types.to_csv(path + 'house_types.tsv', sep='\t', index=False)