In [1]:
import json
import pandas as pd
import urllib.parse
from collections import Counter
import numpy as np

In [2]:
### PARAMETERS
# -------------

# jsonPath = "C:/Users/agrog/Documents/Oxford/analysis/effugium/studies/jan24Pilot/oxfordRoomworld24.json"
# csvPath = "C:/Users/agrog/Documents/Oxford/analysis/effugium/studies/jan24Pilot/roomworldPilotRounds.csv"
jsonPath = "20240207T1257_studies.json"
csvPath = "roomworldPilotRounds.csv"
# -------------

In [3]:
with open(jsonPath, 'rb') as h:
    qs = json.load(h)

data = [i['fields'] for i in qs]

In [4]:
dataDf = pd.DataFrame.from_dict(data)
df = dataDf.sort_values(by="lastModified", ascending=False)

In [5]:
df.iloc[0]["gameParameters"]["curriculumType"]

'strategy'

In [22]:
df = dataDf.loc[dataDf["UserId"].isin([
    "raddus",
    "testtesttest"
])]

In [6]:
def getRounds(df, save=False):
    # Expects a filtered dataframe from server json
    # Convert back to a dict to operate on
    data = pd.DataFrame.to_dict(df, orient="index")
    # Iterate over each participant
    roundLevelData = []
    for user in data:
        common = {'userId' : data[user]['UserId'], "lastModified" : data[user]["lastModified"], "timeCreated" : data[user]["timeCreated"], "debriefAnswers" : data[user]["riseTracking"], "studyId" : data[user]["gameParameters"]["study_id"], "sessionId" : data[user]["gameParameters"]["session_id"], "curriculumType" : data[user]["gameParameters"]["curriculumType"]}
        for attempt in data[user]['data']:
            tmp = {**common, **attempt}
            # The end game looping thing at the end was also spamming final round requests, so we need to remove these duplicates
            if len(roundLevelData) != 0 and roundLevelData[-1]["roundIndex"] == 94 and attempt["roundIndex"] == 94:
                pass
            else:
                roundLevelData.append(tmp)

    if save:
        rldf = pd.DataFrame.from_dict(roundLevelData)
        rldf.to_csv(csvPath, index=False)
    
    return roundLevelData

In [7]:
rld = getRounds(df, save=True)
# create dataframe
rldf = pd.DataFrame.from_dict(rld)

In [12]:
rldf["userId"].nunique()

101

In [26]:
rldf.to_csv("roomworldPilotRounds.csv", index=False)

In [8]:
Counter(rldf["curriculumType"])

Counter({'strategy': 4758, 'concept': 4701})

In [9]:
rldf.query("userId == 'raddus' & roundType == 'transfer'")

Unnamed: 0,userId,lastModified,timeCreated,debriefAnswers,studyId,sessionId,curriculumType,resp,level,layout,...,currLevel,roundType,startTime,attemptNum,fromReload,numCorrect,roundIndex,stratLevel,globalRoundNum,numCorrectPerLevel


In [29]:
rldf.query("userId == 'mtest'")

Unnamed: 0,userId,resp,level,layout,endTime,layoutId,completed,currLevel,roundType,startTime,attemptNum,fromReload,numCorrect,roundIndex,stratLevel,globalRoundNum,numCorrectPerLevel
125,mtest,"{'tool': ['none', 'k7', 'none', 'none', 'd7', ...",1,", , ,-, , , ,-, , , ,-, , , \n , , ,-, , , ,-...",1706868492364,level-c1-s1_49,True,1,train,1706868488280,0,False,1,0,1,0,1
126,mtest,"{'tool': ['none', 'skip'], 'xloc': [10, 10], '...",1,", , ,-, , , ,-, , , ,-, , , \n , , ,-, , , ,-...",1706868495850,level-c1-s1_4,False,1,train,1706868492976,0,False,1,1,1,1,0
127,mtest,"{'tool': ['k10', 'none', 'none', 'none', 'skip...",1,", , ,-, , , ,-, , , ,-, , , \n , , ,-, , , ,-...",1706868499105,level-c1-s1_31,False,1,train,1706868496461,0,False,1,2,1,2,0
128,mtest,"{'tool': ['none', 'none', 't10', 't10', 'none'...",1,", , ,d3, ,i, ,-, , , ,-,t1, , \n , , ,-,t2, ,...",1706868502615,level-c2-s1_45,True,1,train,1706868499716,0,False,2,3,1,3,1
129,mtest,"{'tool': ['t5', 't5', 'skip'], 'xloc': [6, 6, ...",1,", , ,-, , , ,-, , , ,-,t11, , \n , , ,-, , , ...",1706868505178,level-c2-s1_9,False,1,train,1706868503224,0,False,2,4,1,4,0
130,mtest,"{'tool': ['none', 'none', 't5', 't5', 'none', ...",1,", , ,-, , , ,-, , , ,-, , , \n , , ,-,t5,t1,k...",1706868510088,level-c2-s1_10,True,1,train,1706868505791,0,False,3,5,1,5,1
131,mtest,"{'tool': ['none', 'none', 'none', 'none', 'ski...",1,", , ,-,i,p4, ,-, , , ,-, , , \n , , ,-, ,p11,...",1706868513375,level-c3-s1_12,False,1,train,1706868510699,0,False,3,6,1,6,0
132,mtest,"{'tool': ['none', 'skip'], 'xloc': [2, 2], 'yl...",1,", , ,-, , , ,-, , , ,-,x, , \n , , ,-, , , ,-...",1706868515122,level-c3-s1_37,False,1,train,1706868513988,0,False,3,7,1,7,0
133,mtest,"{'tool': ['none', 'skip'], 'xloc': [9, 9], 'yl...",1,", , ,-, , , ,-, , , ,-, , , \n , , ,-, , , ,-...",1706868516700,level-c3-s1_23,False,1,train,1706868515733,0,False,3,8,1,8,0
134,mtest,"{'tool': ['none', 'skip'], 'xloc': [2, 2], 'yl...",1,", , ,-, , , ,-, , , ,-, , , \n , , ,-, , , ,-...",1706868517965,level-c4-s1_43,False,1,train,1706868517311,0,False,3,9,1,9,0


In [41]:
# Some summary statistics
accs = rldf.groupby(by="userId")["completed"].mean()
print(accs)
print(f"Mean accuracy: {np.mean(list(accs))}%")

userId
5e58fa19fc38d6104f798878    0.915789
5ee386d305dd19000acbe9eb    0.821053
Name: completed, dtype: float64
Mean accuracy: 0.868421052631579%
