In [1]:
import json
import pandas as pd
import urllib.parse
from collections import Counter
import numpy as np

In [None]:
### PARAMETERS
# ------------

# The path to the raw data as json - NB: this should be unedited data straight from the server
jsonPath = "C:/Users/agrog/Documents/Oxford/analysis/effugium/y2/rw_24_all.json"
# The path to where the csv should be saved
csvPath = "C:/Users/agrog/Documents/Oxford/analysis/effugium/y2/rw_24_all.csv"


In [2]:
with open(jsonPath, 'rb') as h:
    qs = json.load(h)

jsonData = qs.replace("'", '"')
data = json.loads(jsonData)

In [19]:
def clean(rawData):
    cleanData = {}
    for userAll in rawData:
        pk = userAll['pk']
        user = userAll['fields']
        tmp = {
            'pk' : pk,
            'userId' : urllib.parse.quote(user['userId']),
            'rawData' : json.loads(user['rawData']),
            'sdata' : None,
            'edata' : user['edata'],
            'parameters' : user['parameters'],
            'totalAttempts' : None,
            'completed' : user['completeAttempt'],
            'lastCompletedRound' : None,
            'lastTrialGame' : None,
            'finalRooms' : [],
            'userIP' : user['userIP'],
            'urlParameters' : user['urlParameters'],
            'timestamps' : [],
            'timeCreated' : user['timeCreated'],
            'lastModified' : user['lastModified']
        }
        # Check how many attempts the user has had
        tmp['totalAttempts'] = len(tmp['rawData'])
        # Store timestamp/s
        tmp['timestamps'] = list(tmp['rawData'].keys())
        # If it's one, check if it's complete
        if tmp['totalAttempts'] == 1:
            # Get the attempt timestamp
            timestamp = list(tmp['rawData'].keys())[0]
        elif tmp['totalAttempts'] > 1:
            # For multiple attempts, find the occurence with the highest number of completed trial_layouts
            indAttempts = [] # individual attempts
            for i in range(tmp['totalAttempts']):
                # Get the sdata for this timestamp, and get the length of the expt_index array
                try:
                    attNum = len(json.loads(tmp['rawData'][tmp['timestamps'][i]]['sdata'])['expt_index'])
                    indAttempts.append(attNum)
                except:
                    indAttempts.append(0)
            # Get the index of the value with the greatest magnitude
            timestamp = tmp['timestamps'][np.argmax(indAttempts)]
            
        # Use the timestamp to add sdata to tmp
        try:
            tmp['sdata'] = json.loads(tmp['rawData'][timestamp]['sdata'])
        except:
            if tmp['rawData'][timestamp]['sdata'] == None or len(tmp['rawData'][timestamp]['sdata']) == 0:
                tmp['sdata'] = None
        if tmp['sdata'] != None:
            # Check if complete by:
            #    - trial_layout == 92 or
            #    - trial_game == 80
            # if len(Counter(tmp['sdata']['trial_layout']).keys()) >= 92:
            if max(np.array(tmp['sdata']['trial_game'], dtype=np.float64)) >= 80:
                tmp['completed'] = True
                tmp['lastCompletedRound'] = len(tmp['sdata']['trial_game'])
            else:
                tmp['completed'] = False
                tmp['lastCompletedRound'] = len(Counter(tmp['sdata']['trial_layout']).keys())
            # Store how many trial_games they've seen
            tmp['lastTrialGame'] = int(tmp['sdata']['trial_game'][-1])
        else:
            tmp['completed'] = False
            tmp['lastCompletedRound'] = 0
            
        cleanData[tmp['pk']] = tmp
    print("Dataset ready.")
    return cleanData

In [12]:
def getRounds(cleanData):
    rounds = {}
    count = 0
    for user in cleanData:
        u = cleanData[user]
        us = u['sdata']
        if us is not None:
            for i in range(len(u['sdata']['expt_index'])):
                rounds[str(count)] = {
                    "pk" : u['pk'],
                    "id" : u['userId'],
                    "iv" : None if (u['urlParameters'] is None or u['urlParameters']['iv'] == None) else urllib.parse.quote(u['urlParameters']['iv']),
                    "tag" : None if (u['urlParameters'] is None or u['urlParameters']['tag'] == None) else urllib.parse.quote(u['urlParameters']['tag']),
                    "expt_index": us['expt_index'][i],
                    "expt_trial": us['expt_trial'][i],
                    "trial_layout" : us['trial_layout'][i],
                    "trial_level" : us['trial_level'][i],
                    "trial_solved" : us['trial_solved'][i],
                    "trial_attempts" : us['trial_attempts'][i],
                    "trial_game" : us['trial_game'][i],
                    "trial_transfer" : us['trial_transfer'][i],
                    "trial_test" : us['trial_test'][i],
                    "round_start_time" : None,
                    "round_end_time" : None,
                    "last_room" : None,
                    "roundAttempted" : None,
                    "gameComplete" : u["completed"],
                }

                # Insert start time, end time, and roundAttempted
                if len(us["resp"][str(i)]["timestamp"]) != 0:
                    # If an attempt has been made
                    rounds[str(count)]["round_start_time"] = us["resp"][str(i)]["timestamp"][0] - us["resp"][str(i)]["reactiontime"][0]
                    rounds[str(count)]["round_end_time"] = us["resp"][str(i)]["timestamp"][-1]
                    rounds[str(count)]["roundAttempted"] = True
                else:
                    rounds[str(count)]["roundAttempted"] = False
                    
                # Compute final room position as [x, y]. Scale is 0->10 and includes 2 walls
                if len(us["resp"][str(i)]["xloc"]) != 0 and len(us["resp"][str(i)]["yloc"]) != 0:
                    xloc = us["resp"][str(i)]["xloc"][-1]
                    yloc = us["resp"][str(i)]["yloc"][-1]
                    rounds[str(count)]["last_room"] = []
                    if xloc < 3:
                        rounds[str(count)]["last_room"].append(0)
                    elif 3 < xloc < 7:
                        rounds[str(count)]["last_room"].append(1)
                    elif 7 < xloc < 11:
                        rounds[str(count)]["last_room"].append(2)
                    if yloc < 3:
                        rounds[str(count)]["last_room"].append(0)
                    elif 3 < yloc < 7:
                        rounds[str(count)]["last_room"].append(1)
                    elif 7 < yloc < 11:
                        rounds[str(count)]["last_room"].append(2)
                    rounds[str(count)]["last_room"] = str(rounds[str(count)]["last_room"]).replace(",", "-")

                # Increase round count
                count += 1

    # Store as dataframe
    rounds = pd.DataFrame.from_dict(rounds, orient="index")

    return rounds


In [23]:
cleanData = clean(data)
rounds = getRounds(cleanData)

Dataset ready.


In [25]:
rounds.to_csv(csvPath, index=False)

In [26]:
rounds.columns

Index(['pk', 'id', 'iv', 'tag', 'expt_index', 'expt_trial', 'trial_layout',
       'trial_level', 'trial_solved', 'trial_attempts', 'trial_game',
       'trial_transfer', 'trial_test', 'round_start_time', 'round_end_time',
       'last_room', 'roundAttempted', 'gameComplete'],
      dtype='object')

In [31]:
rounds.loc[rounds["gameComplete"] == False]

Unnamed: 0,pk,id,iv,tag,expt_index,expt_trial,trial_layout,trial_level,trial_solved,trial_attempts,trial_game,trial_transfer,trial_test,round_start_time,round_end_time,last_room,roundAttempted,gameComplete
0,19300,UeU551H7FP4q7Z8jBLkXiu90Q3iiwCG94U7RrOKll8skCi...,IJ%252BfGHZ0KV/uY0Rz%250A,kQWbg5J2SiuN8cmpO1Kw6A%253D%253D%250A,0,1,r1_1_23,1.0,1.0,1.0,1.0,False,False,1.694617e+12,1.694617e+12,[1- 0],True,False
1,19300,UeU551H7FP4q7Z8jBLkXiu90Q3iiwCG94U7RrOKll8skCi...,IJ%252BfGHZ0KV/uY0Rz%250A,kQWbg5J2SiuN8cmpO1Kw6A%253D%253D%250A,1,2,r1_2_15,1.0,1.0,1.0,2.0,False,False,1.694617e+12,1.694617e+12,[0- 2],True,False
2,19300,UeU551H7FP4q7Z8jBLkXiu90Q3iiwCG94U7RrOKll8skCi...,IJ%252BfGHZ0KV/uY0Rz%250A,kQWbg5J2SiuN8cmpO1Kw6A%253D%253D%250A,2,3,r1_3_19,1.0,1.0,1.0,3.0,False,False,1.694617e+12,1.694617e+12,[2- 2],True,False
3,19300,UeU551H7FP4q7Z8jBLkXiu90Q3iiwCG94U7RrOKll8skCi...,IJ%252BfGHZ0KV/uY0Rz%250A,kQWbg5J2SiuN8cmpO1Kw6A%253D%253D%250A,3,4,r1_4_20,1.0,1.0,1.0,4.0,False,False,1.694617e+12,1.694617e+12,[1- 2],True,False
4,19300,UeU551H7FP4q7Z8jBLkXiu90Q3iiwCG94U7RrOKll8skCi...,IJ%252BfGHZ0KV/uY0Rz%250A,kQWbg5J2SiuN8cmpO1Kw6A%253D%253D%250A,4,5,r1_5_23,1.0,1.0,1.0,5.0,False,False,1.694617e+12,1.694617e+12,[0- 2],True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913419,32137,i7BEOZ3h3MOuGky27Qco7ZHCmg8Nj030gIBC9kSQ1kYvIy...,ANUk1rceNvljlGJf%250A,c/HyOUlUr%252BB8pteRw7zi9A%253D%253D%250A,94,95,transfer_16,2.0,0.0,1.0,76.0,True,False,1.705525e+12,1.705525e+12,[2- 1],True,False
913420,32137,i7BEOZ3h3MOuGky27Qco7ZHCmg8Nj030gIBC9kSQ1kYvIy...,ANUk1rceNvljlGJf%250A,c/HyOUlUr%252BB8pteRw7zi9A%253D%253D%250A,95,96,transfer_15,2.0,0.0,1.0,77.0,True,False,1.705525e+12,1.705525e+12,[1- 1],True,False
913421,32137,i7BEOZ3h3MOuGky27Qco7ZHCmg8Nj030gIBC9kSQ1kYvIy...,ANUk1rceNvljlGJf%250A,c/HyOUlUr%252BB8pteRw7zi9A%253D%253D%250A,96,97,transfer_12,2.0,0.0,1.0,78.0,True,False,1.705525e+12,1.705525e+12,[1- 1],True,False
913422,32137,i7BEOZ3h3MOuGky27Qco7ZHCmg8Nj030gIBC9kSQ1kYvIy...,ANUk1rceNvljlGJf%250A,c/HyOUlUr%252BB8pteRw7zi9A%253D%253D%250A,97,98,transfer_2,2.0,0.0,1.0,79.0,True,False,1.705525e+12,1.705525e+12,[0- 0],True,False
