In [49]:
import ast
import csv
import sys
import pandas as pd

In [19]:
def summarize_subject(subjectString):
    '''
    Take as input data from one subject in nested dictionary format.
    Returns a list of lists, where each nested list is one trial (or row)
    '''
    e = ast.literal_eval(open(subjectString, 'r').read())

    ## grab the top level data
    subjectLevel = [e[x] for x in e if x not in ['trialStruct', 'blockStruct', 'counterBalance_keys']]
    ## grab counter balance keys if they exist
    if 'counterBalance_keys' in e.keys(): subjectLevel += e['counterBalance_keys'].values()
    trials = []

    if 'blockStruct' in e.keys():
        for block in e['blockStruct']:
            for trial in block['trialStruct']:
                trials.append(subjectLevel + [block[x] for x in block if x not in 'trialStruct'] + [x for x in trial.values()])

    elif 'trialStruct' in e.keys():
        for trial in e['trialStruct']:
            trials.append(subjectLevel + trial.values())

    else:
        trials.append(subjectLevel)

    return trials

In [22]:
def compute_headers(data):
  ## make the headers equal to the first layer of keys in the data
    headers = data.keys()

    if 'counterBalance_keys' in data.keys(): 
    ## if counter balance keys are in the first layer, add them to the data
        headers += data['counterBalance_keys'].keys()

    ## if it's rvts data
    if 'blockStruct' in data.keys():
    ## add on top of the existing headers all the keys from the block level and the trial level
        headers += data['blockStruct'][0].keys() + data['blockStruct'][0]['trialStruct'][0].keys()
    ## it's it's cued data
    elif 'trialStruct' in data.keys():
    ## only add in the trial level headers
        headers += data['trialStruct'][0].keys()

    ## remove all the nested headers from the var names
    headers = [x for x in headers if x not in ['trialStruct', 'counterBalance_keys', 'blockStruct']]

    return headers

In [38]:
def summarize_data(args):
  ## run a different procedure for each portion of the experiment
    fTypes = ['cued','rvts','demo']
    out = {}

    for proc in fTypes:
    ## grab only the data we're interested in (from same phases of the experiment)
        relArgs = [x for x in args if proc in x]
        ## extract the headers from the first file to use for the whole dataset
        if relArgs:
            data = ast.literal_eval(open(relArgs[0],'r').read())
            headers = compute_headers(data)

            final_data = [headers]

            ## pass each subject to the summarize_subject function
            for subject in relArgs:
              ## this function returns a list of lists for each subject where each core list is a trial
              final_data += summarize_subject(subject)

            out[proc] = final_data

    return out


In [29]:
for e in assignmentID:
    print 'rvts' in e

True


In [43]:
assignmentID = ['../subject_data/308XBLVESJO5HGO8ZJXJW16P4MARBC_rvts.txt']

final_data = summarize_data(assignmentID)['rvts']

In [46]:
for count, line in enumerate(final_data):
    print line
    if count > 3:
        break

['screen_width', 'window_height', 'workerId', 'window_width', 'curId', 'curTime', 'counterBalance_order', 'userAgent', 'runTime', 'screen_height', 'circle_key', 'tri_key', 'blue_key', 'red_key', 'color', 'shape', 'blockTime', 'Block', 'rt', 'totalPoints', 'stim_color', 'response_location', 'transition', 'rightpoints', 'trial', 'stim_shape', 'leftpoints', 'task_attempt', 'error', 'response_key']
[1760, 8, 'A2JDJL3FQ56GO1', 1699, '308XBLVESJO5HGO8ZJXJW16P4MARBC', '10/10/2018@13:51:57', 'cued_first', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0', 61.059, 990, '70', '68', '75', '74', 'top_right', 'top_left', 12.49, 1, 1360, 2, 'blue', 'left', 'StartBlock', 2, 1, 'triangle', 2, 'shape', 0, 'tri_key']
[1760, 8, 'A2JDJL3FQ56GO1', 1699, '308XBLVESJO5HGO8ZJXJW16P4MARBC', '10/10/2018@13:51:57', 'cued_first', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0', 61.059, 990, '70', '68', '75', '74', 'top_right', 'top_left', 12.49, 1, 898, 

In [48]:
with open('example.csv', 'w') as fp:
    a = csv.writer(fp, delimiter = ',')
    for line in final_data:
        a.writerow(line)

In [51]:
df.to_csv('example1.csv')

In [50]:
df = pd.read_csv('example.csv')
df.head()

Unnamed: 0,screen_width,window_height,workerId,window_width,curId,curTime,counterBalance_order,userAgent,runTime,screen_height,...,stim_color,response_location,transition,rightpoints,trial,stim_shape,leftpoints,task_attempt,error,response_key
0,1760,8,A2JDJL3FQ56GO1,1699,308XBLVESJO5HGO8ZJXJW16P4MARBC,10/10/2018@13:51:57,cued_first,Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:62....,61.059,990,...,blue,left,StartBlock,2,1,triangle,2,shape,0,tri_key
1,1760,8,A2JDJL3FQ56GO1,1699,308XBLVESJO5HGO8ZJXJW16P4MARBC,10/10/2018@13:51:57,cued_first,Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:62....,61.059,990,...,blue,left,Repeat,2,2,circle,2,shape,0,circle_key
2,1760,8,A2JDJL3FQ56GO1,1699,308XBLVESJO5HGO8ZJXJW16P4MARBC,10/10/2018@13:51:57,cued_first,Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:62....,61.059,990,...,blue,right,Switch,3,3,triangle,1,color,0,blue_key
3,1760,8,A2JDJL3FQ56GO1,1699,308XBLVESJO5HGO8ZJXJW16P4MARBC,10/10/2018@13:51:57,cued_first,Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:62....,61.059,990,...,red,right,Repeat,3,4,circle,2,color,0,red_key
4,1760,8,A2JDJL3FQ56GO1,1699,308XBLVESJO5HGO8ZJXJW16P4MARBC,10/10/2018@13:51:57,cued_first,Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:62....,61.059,990,...,blue,right,Repeat,3,5,triangle,2,color,0,blue_key
