In [1]:
import glob
import itertools
import json
import os
import pandas
import scipy
from scipy import stats
import matplotlib.pyplot as plt


In [2]:
path = "ExperimentData"
dir_list = os.listdir(path)
 
print("Files and directories in '", path, "' :")
 
# prints all files
print(dir_list)

exp_paths = []

session_map = {}
for filename in dir_list:
    if filename.isdigit():
        exp_paths.append(path + "/" + filename + "/ResponsiveAR/Experiment/")
        #session_map[filename] = user_id
        files = os.listdir(path + "/" + filename + "/ResponsiveAR/Experiment/")
        name = files[0]
        index = name.find(".")
        user_id = name[index - 13: index]
        session_map[user_id] = int(filename)

print(exp_paths)

Files and directories in ' ExperimentData ' :
['59', '68', '32', '35', '56', '51', '58', '60', '34', '33', '.DS_Store', '45', '44', '43', '36', '31', '30', '37', '39', '46', '41', '70', '49', '40', '78']
['ExperimentData/59/ResponsiveAR/Experiment/', 'ExperimentData/68/ResponsiveAR/Experiment/', 'ExperimentData/32/ResponsiveAR/Experiment/', 'ExperimentData/35/ResponsiveAR/Experiment/', 'ExperimentData/56/ResponsiveAR/Experiment/', 'ExperimentData/51/ResponsiveAR/Experiment/', 'ExperimentData/58/ResponsiveAR/Experiment/', 'ExperimentData/60/ResponsiveAR/Experiment/', 'ExperimentData/34/ResponsiveAR/Experiment/', 'ExperimentData/33/ResponsiveAR/Experiment/', 'ExperimentData/45/ResponsiveAR/Experiment/', 'ExperimentData/44/ResponsiveAR/Experiment/', 'ExperimentData/43/ResponsiveAR/Experiment/', 'ExperimentData/36/ResponsiveAR/Experiment/', 'ExperimentData/31/ResponsiveAR/Experiment/', 'ExperimentData/30/ResponsiveAR/Experiment/', 'ExperimentData/37/ResponsiveAR/Experiment/', 'ExperimentDa

In [3]:
def parseFileNumber(fileName):
    fragments = fileName.split('_')
    return float(fragments[1])

In [4]:
responsiveData = []
experimentEvents = []

def parseFrame(frames, user_id):
    frames_list = []
    for frame in frames:
        frameData = {
            "userID": float,
            "frameNum": int,
            "unixTime": float,
            "timestamp": float,
            "hPos": {"x":float,"y":float,"z":float},
            "hDir": {"x":float,"y":float,"z":float},
            "hRot": {"i":float,"j":float,"k":float},
            "hAngl": {"x":float,"y":float,"z":float},
            "gazeOrigin": {"x":float,"y":float,"z":float},
            "gazeDirection": {"x":float,"y":float,"z":float},
            "rightHandRay": {"x":float,"y":float,"z":float},
            "leftHandRay": {"x":float,"y":float,"z":float},
            "experimentEvents": [],
            "responsiveData": []
        }
        responsiveEvent = frame["responsiveData"]
        experimentEvent = frame["experimentEvents"]
        frameData["userID"] = user_id
        frameData["frameNum"] = frame["frameNum"]
        frameData["unixTime"] = frame["unixTime"]
        frameData["timestamp"] = frame["timestamp"]
        frameData["hPos"] = frame["hPos"]
        frameData["hDir"] = frame["hDir"]
        frameData["hRot"] = frame["hRot"]
        frameData["hAngl"] = frame["hAngl"]
        frameData["gazeOrigin"] = frame["gazeOrigin"]
        frameData["gazeDirection"] = frame["gazeDirection"]
        frameData["rightHandRay"] = frame["rightHandRay"]
        frameData["leftHandRay"] = frame["leftHandRay"]
        frameData["experimentEvents"] = experimentEvent
        frameData["responsiveData"] = responsiveEvent
        frames_list.append(frameData)
        if responsiveEvent:
            responsiveData.append(responsiveEvent)
        if experimentEvent:
            experimentEvents.append(experimentEvent)
    return frames_list

def parseSession(session, user_id):
    sessionData = {
    "userID": float,
    "numFrames": int,
    "task": str,
    "start_time": float,
    "end_time": float,
    "total_time": float,
    "sessionNumber": int,
    "isResponsive": int,
    "frames": []
    }
    sessionData["userID"] = user_id
    sessionData["numFrames"] = session["numFrames"]
    sessionData["task"] = session["task"]
    sessionData["start_time"] = session["start_time"]
    sessionData["end_time"] = session["end_time"]
    sessionData["total_time"] = session["total_time"]
    sessionData["sessionNumber"] = session["sessionNumber"]
    sessionData["isResponsive"] = session["isResponsive"]
    return sessionData


In [5]:
data = []
frames = []
allsession = []
user_id = 0

for exp in exp_paths:
    experiment_data = {
    "path" : str,
    "sessions": {}
    }
    sessions = {}
    experiment = os.listdir(exp)
    current_path = exp
    experiment_data["path"] = current_path
    experiment.sort(key=parseFileNumber)
    for filename in experiment:
        if filename.endswith(".json"):
            # Prints only text file present in My Folder
            print(filename)
            firstFileFlag = False
            if("Session_0" in filename):
                firstFileFlag = True
            with open(current_path + filename, 'r') as f:
                json_data = json.load(f)
                if(firstFileFlag):
                    user_id = session_map[json_data["obj"]["userID"]]
                    session = parseSession(json_data["obj"]["sessionRecordings"][0], user_id)
                    frame = parseFrame(json_data["obj"]["sessionRecordings"][0]["frames"], user_id)
                    session["frames"] = frame
                    num = session["sessionNumber"]
                    sessions[num] = session
                    frames.append(frame)
                    allsession.append(session)
                else:
                    session = parseSession(json_data, user_id)
                    frame = parseFrame(json_data["frames"], user_id)
                    session["frames"] = frame
                    num = session["sessionNumber"]
                    sessions[num] = session
                    frames.append(frame)
                    allsession.append(session)
    experiment_data["sessions"] = sessions
    data.append(experiment_data)

#print(data)


ExperimentSession_0_1652910130460.json
ExperimentSession_1_1652910130460.json
ExperimentSession_2_1652910130460.json
ExperimentSession_3_1652910130460.json
ExperimentSession_4_1652910130460.json
ExperimentSession_5_1652910130460.json
ExperimentSession_6_1652910130460.json
ExperimentSession_7_1652910130460.json
ExperimentSession_8_1652910130460.json
ExperimentSession_9_1652910130460.json
ExperimentSession_0_1653072453927.json
ExperimentSession_1_1653072453927.json
ExperimentSession_2_1653072453927.json
ExperimentSession_3_1653072453927.json
ExperimentSession_4_1653072453927.json
ExperimentSession_5_1653072453927.json
ExperimentSession_6_1653072453927.json
ExperimentSession_7_1653072453927.json
ExperimentSession_8_1653072453927.json
ExperimentSession_9_1653072453927.json
ExperimentSession_0_1652736949521.json
ExperimentSession_1_1652736949521.json
ExperimentSession_2_1652736949521.json
ExperimentSession_3_1652736949521.json
ExperimentSession_4_1652736949521.json
ExperimentSession_5_16527

In [6]:
allsession

sessionsUnpacked = list(itertools.chain(allsession))
sessionsDf = pandas.DataFrame(sessionsUnpacked)
sessionsDf = sessionsDf.sort_values(by='sessionNumber')

sessionsDf = sessionsDf.loc[sessionsDf["sessionNumber"] < 9 ]
sessionsDf.head(1)

Unnamed: 0,userID,numFrames,task,start_time,end_time,total_time,sessionNumber,isResponsive,frames
0,59,137,The task is to click on the 2nd article title....,1652910156140,1652910160750,4610,0,True,"[{'userID': 59, 'frameNum': 1, 'unixTime': 165..."


USER 56 was unable to finish task 7 (when starting from 0) remove their task 8 from data

In [7]:
session_to_drop = sessionsDf.loc[(sessionsDf["userID"] == 56) & (sessionsDf["sessionNumber"] == 7)]
sessionsDf.drop(session_to_drop.index)

session_to_drop

Unnamed: 0,userID,numFrames,task,start_time,end_time,total_time,sessionNumber,isResponsive,frames
47,56,1434,The task is to find the current location on th...,1652899626599,1652899774967,148368,7,False,"[{'userID': 56, 'frameNum': 1741, 'unixTime': ..."


In [8]:
framesUnpacked = list(itertools.chain(*sessionsDf["frames"]))
framesDf = pandas.DataFrame(framesUnpacked)
print(framesDf.shape)
framesDf.head(1)

(76199, 14)


Unnamed: 0,userID,frameNum,unixTime,timestamp,hPos,hDir,hRot,hAngl,gazeOrigin,gazeDirection,rightHandRay,leftHandRay,experimentEvents,responsiveData
0,59,1,1652910146930,16470,"{'x': -0.01730290986597538, 'y': -0.0262664388...","{'x': 0.0704965889453888, 'y': -0.144727870821...","{'x': 0.07085785269737244, 'y': 0.038738552480...","{'x': 8.321520805358887, 'y': 4.08559703826904...","{'x': -0.016758747398853302, 'y': -0.026269029...","{'x': 0.07254093885421753, 'y': -0.14517901837...","{'x': -0.04117072373628616, 'y': 1.03524494171...","{'x': 0.0, 'y': 0.0, 'z': 0.0}","[{'unixTime': 1652910146830, 'systemTime': '14...","{'name': '', 'scale': {'x': 0.0, 'y': 0.0, 'z'..."


In [9]:
users = framesDf.loc[framesDf["experimentEvents"].str.len() != 0]
print(users.shape)

experimentUnpacked = list(itertools.chain(*users["experimentEvents"]))
experimentDf = pandas.DataFrame(experimentUnpacked)
experimentDf.head(1)
print(experimentDf.shape)
experimentDf.shape

experimentDf["userID"] = users["userID"]

for i in users.index:
    eventDF = pandas.DataFrame(users["experimentEvents"][i])

    for j in eventDF.index:
        time = eventDF["unixTime"][j]
        userID = users["userID"][i]
        mask = experimentDf["unixTime"] == time
        experimentDf.loc[mask, "userID"] = userID

experimentDf.head(1)

(1293, 14)
(1739, 10)


Unnamed: 0,unixTime,systemTime,isResponsive,eventName,task_number,task_type,object_scale,object_position,correct_answer,guess,userID
0,1652910146830,14-42-26-83,True,instruction,0,Article Display,"{'x': 0.25, 'y': 0.25, 'z': 0.25}","{'x': -0.015964265912771225, 'y': 0.0036757867...",2nd Title,,59.0


In [10]:
startEvents = experimentDf.loc[experimentDf["eventName"] == "start"]
startEvents.head(3)

Unnamed: 0,unixTime,systemTime,isResponsive,eventName,task_number,task_type,object_scale,object_position,correct_answer,guess,userID
1,1652910156190,14-42-36-19,True,start,0,Article Display,"{'x': 0.25, 'y': 0.25, 'z': 0.25}","{'x': -0.015964265912771225, 'y': 0.0036757867...",2nd Title,,59.0
3,1652894765511,10-26-05-51,False,start,0,Article Display,"{'x': 0.25, 'y': 0.25, 'z': 0.25}","{'x': 0.05424434691667557, 'y': 0.044334668666...",2nd Title,,58.0
5,1652980907262,10-21-47-26,False,start,0,Article Display,"{'x': 0.25, 'y': 0.25, 'z': 0.25}","{'x': 0.030493268743157387, 'y': 0.01259822584...",2nd Title,,70.0


In [11]:
sessionsDf

Unnamed: 0,userID,numFrames,task,start_time,end_time,total_time,sessionNumber,isResponsive,frames
0,59,137,The task is to click on the 2nd article title....,1652910156140,1652910160750,4610,0,True,"[{'userID': 59, 'frameNum': 1, 'unixTime': 165..."
60,58,121,The task is to click on the 2nd article title....,1652894765461,1652894769851,4390,0,False,"[{'userID': 58, 'frameNum': 1, 'unixTime': 165..."
200,70,150,The task is to click on the 2nd article title....,1652980907206,1652980911836,4630,0,False,"[{'userID': 70, 'frameNum': 1, 'unixTime': 165..."
70,60,103,The task is to click on the 2nd article title....,1653093121501,1653093124821,3320,0,False,"[{'userID': 60, 'frameNum': 1, 'unixTime': 165..."
30,35,159,The task is to click on the 2nd article title....,1652807292687,1652807301206,8519,0,True,"[{'userID': 35, 'frameNum': 1, 'unixTime': 165..."
...,...,...,...,...,...,...,...,...,...
88,34,1464,The task is to find the low temperature on th...,1652747802011,1652747952843,150832,8,False,"[{'userID': 34, 'frameNum': 2625, 'unixTime': ..."
168,37,123,The task is to find the low temperature on th...,1652818570763,1652818580223,9460,8,True,"[{'userID': 37, 'frameNum': 4619, 'unixTime': ..."
68,58,157,The task is to find the low temperature on th...,1652894972394,1652894983335,10941,8,False,"[{'userID': 58, 'frameNum': 2059, 'unixTime': ..."
188,46,183,The task is to find the low temperature on th...,1653088667391,1653088681928,14537,8,False,"[{'userID': 46, 'frameNum': 2404, 'unixTime': ..."


In [12]:
sessionsDf["Actual_Start_Time"] = sessionsDf["start_time"]

for i in startEvents.index:
    userID = startEvents["userID"][i]
    task_num = startEvents["task_number"][i]
    mask = (sessionsDf["userID"] == userID) & (sessionsDf["sessionNumber"] == task_num)
    sessionsDf.loc[mask, "Actual_Start_Time"] = startEvents["unixTime"][i]

sessionsDf.head(1)

Unnamed: 0,userID,numFrames,task,start_time,end_time,total_time,sessionNumber,isResponsive,frames,Actual_Start_Time
0,59,137,The task is to click on the 2nd article title....,1652910156140,1652910160750,4610,0,True,"[{'userID': 59, 'frameNum': 1, 'unixTime': 165...",1652910156190


In [13]:
endEvents = experimentDf.loc[experimentDf["eventName"] == "complete"]
endEvents.head(1)

Unnamed: 0,unixTime,systemTime,isResponsive,eventName,task_number,task_type,object_scale,object_position,correct_answer,guess,userID
51,1653008142298,17-55-42-29,True,complete,0,Article Display,"{'x': 0.25, 'y': 0.25, 'z': 0.25}","{'x': 0.020655199885368347, 'y': 0.01999785564...",2nd Title,2nd Title,51.0


In [14]:
sessionsDf["Actual_End_Time"] = sessionsDf["end_time"]

for i in endEvents.index:
    userID = endEvents["userID"][i]
    task_num = endEvents["task_number"][i]
    mask = (sessionsDf["userID"] == userID) & (sessionsDf["sessionNumber"] == task_num)
    sessionsDf.loc[mask, "Actual_End_Time"] = endEvents["unixTime"][i]

sessionsDf.head(1)

Unnamed: 0,userID,numFrames,task,start_time,end_time,total_time,sessionNumber,isResponsive,frames,Actual_Start_Time,Actual_End_Time
0,59,137,The task is to click on the 2nd article title....,1652910156140,1652910160750,4610,0,True,"[{'userID': 59, 'frameNum': 1, 'unixTime': 165...",1652910156190,1652910160750


In [15]:
end_time_diff = sessionsDf["end_time"].to_numpy() - sessionsDf["Actual_End_Time"].to_numpy()
end_time_diff

array([  0,   0,   0, -10, -11, -11, -10,   0,   0,   0,   0,  -4,  -5,
       -10,   0, -11, -10,   0, -10, -11,   0, -10,   0, -10,   0,   0,
         0,   0, -11,   0, -10, -10,   0, -10,   0,   0, -10, -10,   0,
         0,   0, -10,  -2,   0,   0, -10,   0,   0,   0, -10,  -9,  -8,
         0,   0,   0, -10,   0,  -9,   0,   0,   0, -10,  -4,   0,   0,
         0, -10, -11,   0,   0,  -6,   0,  -9, -10,   0, -10, -10,   0,
         0,   0, -11, -10,  -4,  -6,   0,  -9,   0,   0, -10,   0, -10,
        -7, -10,   0, -10, -10,   0,  -7, -10,   0,   0,   0,   0,   0,
         0, -11,  -9,   0,  -8,   0,   0,   0,   0,  -5,  -5,   0,   0,
         0, -10,  -3,  -9,   0,   0,   0,   0,   0,   0,   0,   0,  -9,
       -10,   0,   0,   0,  -9,  -5,   0,   0,   0,  -9,  -9,  -5,  -5,
       -11, -11, -10,   0,   0,  -4,   0,   0,   0,   0,  -5, -10, -10,
         0,   0, -10, -15,   0,   0,   0, -10,   0,   0,  -9,   0,   0,
        -6, -10,  -9,   0,   0, -10, -11, -10,   0,   0,   0,   

In [16]:
guessEvents = experimentDf.loc[experimentDf["eventName"] == "answer"]
guessEvents.head(3)

sessionsDf["Guesses"] = 0

In [17]:
for i in guessEvents.index:
    userID = guessEvents["userID"][i]
    task_num = guessEvents["task_number"][i]
    mask = (sessionsDf["userID"] == userID) & (sessionsDf["sessionNumber"] == task_num)
    sessionsDf.loc[mask, "Guesses"] += 1

In [18]:
start_time_diff = sessionsDf["start_time"].to_numpy() - sessionsDf["Actual_Start_Time"].to_numpy()
start_time_diff

array([-50, -50, -56, -60, -50, -61, -60, -50, -50, -60, -50, -60, -60,
       -60, -49, -56, -51, -50, -59, -50, -60, -60, -45, -50, -41, -40,
       -40, -40, -40, -51, -28, -50, -39, -43, -46, -40, -40, -40, -40,
       -40, -40, -40, -40, -30, -39, -30, -45, -40, -40, -41, -40, -46,
       -40, -41, -41, -40, -39, -40, -40, -40, -40, -40, -30, -39, -40,
       -40, -40, -40, -40, -41, -41, -39, -40, -40, -45, -39, -41, -40,
       -40, -40, -50, -40, -40, -40, -37, -46, -40, -40, -39, -41, -41,
       -41, -40, -40, -40, -40, -20, -16, -20, -20, -19, -20, -19, -21,
       -20, -20, -21, -30, -22, -10, -19, -21, -10, -21, -10, -20, -19,
       -20, -10, -21, -10,  -4,  -3, -10, -10,   0, -10,  -4,  -6,   0,
         0,   0, -10, -10, -10,   0,   0, -10, -11, -10, -10, -10, -11,
       -10, -10, -11, -20, -20, -20,  -9, -10, -10, -16, -10, -19, -15,
       -10, -10, -10, -20,  -4, -11, -20,  -8, -11, -10, -10, -11, -11,
        -5, -10, -10,   0,  -8,   0, -10,   0,  -5, -10, -10, -1

In [19]:
sessionsDf["Actual_Total_Time"] = sessionsDf["Actual_End_Time"].to_numpy() - sessionsDf["Actual_Start_Time"].to_numpy()
sessionsDf.head(1)

Unnamed: 0,userID,numFrames,task,start_time,end_time,total_time,sessionNumber,isResponsive,frames,Actual_Start_Time,Actual_End_Time,Guesses,Actual_Total_Time
0,59,137,The task is to click on the 2nd article title....,1652910156140,1652910160750,4610,0,True,"[{'userID': 59, 'frameNum': 1, 'unixTime': 165...",1652910156190,1652910160750,1,4560


In [20]:
start_time_diff = sessionsDf["total_time"].to_numpy() - sessionsDf["Actual_Total_Time"].to_numpy()
start_time_diff

array([ 50,  50,  56,  50,  39,  50,  50,  50,  50,  60,  50,  56,  55,
        50,  49,  45,  41,  50,  49,  39,  60,  50,  45,  40,  41,  40,
        40,  40,  29,  51,  18,  40,  39,  33,  46,  40,  30,  30,  40,
        40,  40,  30,  38,  30,  39,  20,  45,  40,  40,  31,  31,  38,
        40,  41,  41,  30,  39,  31,  40,  40,  40,  30,  26,  39,  40,
        40,  30,  29,  40,  41,  35,  39,  31,  30,  45,  29,  31,  40,
        40,  40,  39,  30,  36,  34,  37,  37,  40,  40,  29,  41,  31,
        34,  30,  40,  30,  30,  20,   9,  10,  20,  19,  20,  19,  21,
        20,   9,  12,  30,  14,  10,  19,  21,  10,  16,   5,  20,  19,
        20,   0,  18,   1,   4,   3,  10,  10,   0,  10,   4,   6,  -9,
       -10,   0,  10,  10,   1,  -5,   0,  10,  11,   1,   1,   5,   6,
        -1,  -1,   1,  20,  20,  16,   9,  10,  10,  16,   5,   9,   5,
        10,  10,   0,   5,   4,  11,  20,  -2,  11,  10,   1,  11,  11,
        -1,   0,   1,   0,   8, -10,  -1, -10,   5,  10,  10,  1

In [21]:
responsiveDf = sessionsDf.loc[sessionsDf["isResponsive"] == True]
responsiveDf = responsiveDf.sort_values("Actual_Total_Time")
responsiveDf

Unnamed: 0,userID,numFrames,task,start_time,end_time,total_time,sessionNumber,isResponsive,frames,Actual_Start_Time,Actual_End_Time,Guesses,Actual_Total_Time
91,33,53,The task is to click the 5th article title.\n\...,1652743242306,1652743244466,2160,1,True,"[{'userID': 33, 'frameNum': 95, 'unixTime': 16...",1652743242346,1652743244466,1,2120
161,37,44,The task is to click the 5th article title.\n\...,1652818011143,1652818013353,2210,1,True,"[{'userID': 37, 'frameNum': 53, 'unixTime': 16...",1652818011183,1652818013353,1,2170
141,31,67,The task is to click the 5th article title.\n\...,1652726021550,1652726023839,2289,1,True,"[{'userID': 31, 'frameNum': 189, 'unixTime': 1...",1652726021589,1652726023839,1,2250
171,39,95,The task is to click the 5th article title.\n\...,1652829269506,1652829272446,2940,1,True,"[{'userID': 39, 'frameNum': 221, 'unixTime': 1...",1652829269546,1652829272456,1,2910
190,41,80,The task is to click on the 2nd article title....,1652904601496,1652904604747,3251,0,True,"[{'userID': 41, 'frameNum': 1, 'unixTime': 165...",1652904601546,1652904604747,1,3201
...,...,...,...,...,...,...,...,...,...,...,...,...,...
178,39,1429,The task is to find the low temperature on th...,1652829423309,1652829574206,150897,8,True,"[{'userID': 39, 'frameNum': 1712, 'unixTime': ...",1652829423319,1652829574206,10,150887
147,31,1467,The task is to find the current location on th...,1652726388185,1652726544687,156502,7,True,"[{'userID': 31, 'frameNum': 3527, 'unixTime': ...",1652726388185,1652726544687,24,156502
145,31,1719,The task is to type phone number 805-UCSB-EDU ...,1652726084621,1652726272953,188332,5,True,"[{'userID': 31, 'frameNum': 806, 'unixTime': 1...",1652726084621,1652726272953,4,188332
125,43,2221,The task is to type phone number 805-UCSB-EDU ...,1652990824367,1652991077151,252784,5,True,"[{'userID': 43, 'frameNum': 1430, 'unixTime': ...",1652990824377,1652991077162,3,252785


In [22]:
nonresponsiveDf = sessionsDf.loc[sessionsDf["isResponsive"] == False]
nonresponsiveDf = nonresponsiveDf.sort_values("Actual_Total_Time")
nonresponsiveDf

Unnamed: 0,userID,numFrames,task,start_time,end_time,total_time,sessionNumber,isResponsive,frames,Actual_Start_Time,Actual_End_Time,Guesses,Actual_Total_Time
22,32,76,The task is to click the Author of the 4th art...,1652736991131,1652736994072,2941,2,False,"[{'userID': 32, 'frameNum': 223, 'unixTime': 1...",1652736991161,1652736994076,1,2915
182,46,77,The task is to click the Author of the 4th art...,1653088327966,1653088331007,3041,2,False,"[{'userID': 46, 'frameNum': 715, 'unixTime': 1...",1653088328006,1653088331007,1,3001
112,44,100,The task is to click the Author of the 4th art...,1652995937411,1652995940511,3100,2,False,"[{'userID': 44, 'frameNum': 340, 'unixTime': 1...",1652995937451,1652995940511,1,3060
130,36,120,The task is to click on the 2nd article title....,1652812829574,1652812832864,3290,0,False,"[{'userID': 36, 'frameNum': 1, 'unixTime': 165...",1652812829624,1652812832874,1,3250
70,60,103,The task is to click on the 2nd article title....,1653093121501,1653093124821,3320,0,False,"[{'userID': 60, 'frameNum': 1, 'unixTime': 165...",1653093121561,1653093124831,1,3270
...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,46,722,The task is to find the current location on th...,1653088505254,1653088645361,140107,7,False,"[{'userID': 46, 'frameNum': 1682, 'unixTime': ...",1653088505259,1653088645367,37,140108
47,56,1434,The task is to find the current location on th...,1652899626599,1652899774967,148368,7,False,"[{'userID': 56, 'frameNum': 1741, 'unixTime': ...",1652899626604,1652899774967,45,148363
88,34,1464,The task is to find the low temperature on th...,1652747802011,1652747952843,150832,8,False,"[{'userID': 34, 'frameNum': 2625, 'unixTime': ...",1652747802021,1652747952843,45,150822
78,60,2011,The task is to find the low temperature on th...,1653093267183,1653093491077,223894,8,False,"[{'userID': 60, 'frameNum': 1445, 'unixTime': ...",1653093267194,1653093491077,55,223883


In [23]:
#task 0
i = 0
taskDf = sessionsDf.loc[sessionsDf["sessionNumber"] == i]
taskDf
task_r = taskDf.loc[taskDf["isResponsive"] == True]
task_n = taskDf.loc[taskDf["isResponsive"] == False]

taskRAvgTime = task_r['Actual_Total_Time'].mean()
taskRStd = task_r['Actual_Total_Time'].std()
print(f"responsive task {str(i)} Time: {taskRAvgTime:.3f} ± {taskRStd:.3f}")

taskNAvgTime = task_n['Actual_Total_Time'].mean()
taskNStd = task_n['Actual_Total_Time'].std()
print(f"normal task {str(i)} Time: {taskNAvgTime:.3f} ± {taskNStd:.3f}")

print(scipy.stats.ttest_ind(task_r['Actual_Total_Time'], task_n['Actual_Total_Time']))


responsive task 0 Time: 6082.909 ± 2091.542
normal task 0 Time: 10686.077 ± 12046.905
Ttest_indResult(statistic=-1.247318371986381, pvalue=0.22539606233801399)


In [24]:
#Task 1
i = 1
taskDf = sessionsDf.loc[sessionsDf["sessionNumber"] == i]
taskDf
task_r = taskDf.loc[taskDf["isResponsive"] == True]
task_n = taskDf.loc[taskDf["isResponsive"] == False]

taskRAvgTime = task_r['Actual_Total_Time'].mean()
taskRStd = task_r['Actual_Total_Time'].std()
print(f"responsive task {str(i)} Time: {taskRAvgTime:.3f} ± {taskRStd:.3f}")

taskNAvgTime = task_n['Actual_Total_Time'].mean()
taskNStd = task_n['Actual_Total_Time'].std()
print(f"normal task {str(i)} Time: {taskNAvgTime:.3f} ± {taskNStd:.3f}")

print(scipy.stats.ttest_ind(task_r['Actual_Total_Time'], task_n['Actual_Total_Time']))

responsive task 1 Time: 4145.455 ± 2265.859
normal task 1 Time: 17486.769 ± 15627.594
Ttest_indResult(statistic=-2.797167271659591, pvalue=0.010502896066303355)


In [25]:
#Task 2
i = 2
taskDf = sessionsDf.loc[sessionsDf["sessionNumber"] == i]
taskDf
task_r = taskDf.loc[taskDf["isResponsive"] == True]
task_n = taskDf.loc[taskDf["isResponsive"] == False]

taskRAvgTime = task_r['Actual_Total_Time'].mean()
taskRStd = task_r['Actual_Total_Time'].std()
print(f"responsive task {str(i)} Time: {taskRAvgTime:.3f} ± {taskRStd:.3f}")

taskNAvgTime = task_n['Actual_Total_Time'].mean()
taskNStd = task_n['Actual_Total_Time'].std()
print(f"normal task {str(i)} Time: {taskNAvgTime:.3f} ± {taskNStd:.3f}")

print(scipy.stats.ttest_ind(task_r['Actual_Total_Time'], task_n['Actual_Total_Time']))

taskDf = taskDf.sort_values("Actual_Total_Time")
taskDf

responsive task 2 Time: 15173.182 ± 24063.659
normal task 2 Time: 11130.308 ± 7523.474
Ttest_indResult(statistic=0.5754632102689233, pvalue=0.5708178145671166)


Unnamed: 0,userID,numFrames,task,start_time,end_time,total_time,sessionNumber,isResponsive,frames,Actual_Start_Time,Actual_End_Time,Guesses,Actual_Total_Time
22,32,76,The task is to click the Author of the 4th art...,1652736991131,1652736994072,2941,2,False,"[{'userID': 32, 'frameNum': 223, 'unixTime': 1...",1652736991161,1652736994076,1,2915
182,46,77,The task is to click the Author of the 4th art...,1653088327966,1653088331007,3041,2,False,"[{'userID': 46, 'frameNum': 715, 'unixTime': 1...",1653088328006,1653088331007,1,3001
112,44,100,The task is to click the Author of the 4th art...,1652995937411,1652995940511,3100,2,False,"[{'userID': 44, 'frameNum': 340, 'unixTime': 1...",1652995937451,1652995940511,1,3060
212,49,88,The task is to click the Author of the 4th art...,1652916271940,1652916275511,3571,2,True,"[{'userID': 49, 'frameNum': 237, 'unixTime': 1...",1652916271980,1652916275520,2,3540
162,37,65,The task is to click the Author of the 4th art...,1652818016233,1652818019923,3690,2,True,"[{'userID': 37, 'frameNum': 97, 'unixTime': 16...",1652818016273,1652818019923,1,3650
192,41,74,The task is to click the Author of the 4th art...,1652904616827,1652904620987,4160,2,True,"[{'userID': 41, 'frameNum': 167, 'unixTime': 1...",1652904616867,1652904620987,1,4120
142,31,79,The task is to click the Author of the 4th art...,1652726027400,1652726031870,4470,2,True,"[{'userID': 31, 'frameNum': 256, 'unixTime': 1...",1652726027439,1652726031870,1,4431
82,34,127,The task is to click the Author of the 4th art...,1652747575188,1652747579718,4530,2,False,"[{'userID': 34, 'frameNum': 466, 'unixTime': 1...",1652747575228,1652747579727,1,4499
172,39,109,The task is to click the Author of the 4th art...,1652829278777,1652829283567,4790,2,True,"[{'userID': 39, 'frameNum': 316, 'unixTime': 1...",1652829278816,1652829283567,1,4751
92,33,92,The task is to click the Author of the 4th art...,1652743246906,1652743253766,6860,2,True,"[{'userID': 33, 'frameNum': 148, 'unixTime': 1...",1652743246947,1652743253766,1,6819


In [26]:
#task 3
i = 3
taskDf = sessionsDf.loc[sessionsDf["sessionNumber"] == i]
taskDf
task_r = taskDf.loc[taskDf["isResponsive"] == True]
task_n = taskDf.loc[taskDf["isResponsive"] == False]

taskRAvgTime = task_r['Actual_Total_Time'].mean()
taskRStd = task_r['Actual_Total_Time'].std()
print(f"responsive task {str(i)} Time: {taskRAvgTime:.3f} ± {taskRStd:.3f}")

taskNAvgTime = task_n['Actual_Total_Time'].mean()
taskNStd = task_n['Actual_Total_Time'].std()
print(f"normal task {str(i)} Time: {taskNAvgTime:.3f} ± {taskNStd:.3f}")

print(scipy.stats.ttest_ind(task_r['Actual_Total_Time'], task_n['Actual_Total_Time']))

responsive task 3 Time: 7834.364 ± 6250.077
normal task 3 Time: 10985.462 ± 6058.189
Ttest_indResult(statistic=-1.2514712543106266, pvalue=0.22390672819390758)


In [27]:
#task 4
i = 4
taskDf = sessionsDf.loc[sessionsDf["sessionNumber"] == i]
taskDf
task_r = taskDf.loc[taskDf["isResponsive"] == True]
task_n = taskDf.loc[taskDf["isResponsive"] == False]

taskRAvgTime = task_r['Actual_Total_Time'].mean()
taskRStd = task_r['Actual_Total_Time'].std()
print(f"responsive task {str(i)} Time: {taskRAvgTime:.3f} ± {taskRStd:.3f}")

taskNAvgTime = task_n['Actual_Total_Time'].mean()
taskNStd = task_n['Actual_Total_Time'].std()
print(f"normal task {str(i)} Time: {taskNAvgTime:.3f} ± {taskNStd:.3f}")

print(scipy.stats.ttest_ind(task_r['Actual_Total_Time'], task_n['Actual_Total_Time']))

responsive task 4 Time: 28433.545 ± 12561.233
normal task 4 Time: 24613.154 ± 8343.192
Ttest_indResult(statistic=0.8904084857539926, pvalue=0.38288084383409005)


In [28]:
#task 5
i = 5
taskDf = sessionsDf.loc[sessionsDf["sessionNumber"] == i]
taskDf
task_r = taskDf.loc[taskDf["isResponsive"] == True]
task_n = taskDf.loc[taskDf["isResponsive"] == False]

taskRAvgTime = task_r['Actual_Total_Time'].mean()
taskRStd = task_r['Actual_Total_Time'].std()
print(f"responsive task {str(i)} Time: {taskRAvgTime:.3f} ± {taskRStd:.3f}")

taskNAvgTime = task_n['Actual_Total_Time'].mean()
taskNStd = task_n['Actual_Total_Time'].std()
print(f"normal task {str(i)} Time: {taskNAvgTime:.3f} ± {taskNStd:.3f}")

print(scipy.stats.ttest_ind(task_r['Actual_Total_Time'], task_n['Actual_Total_Time']))

responsive task 5 Time: 63137.000 ± 79383.601
normal task 5 Time: 22833.923 ± 9126.820
Ttest_indResult(statistic=1.8237436049484217, pvalue=0.08180645392103386)


In [29]:
#task 6
i = 6
taskDf = sessionsDf.loc[sessionsDf["sessionNumber"] == i]
taskDf
task_r = taskDf.loc[taskDf["isResponsive"] == True]
task_n = taskDf.loc[taskDf["isResponsive"] == False]

taskRAvgTime = task_r['Actual_Total_Time'].mean()
taskRStd = task_r['Actual_Total_Time'].std()
print(f"responsive task {str(i)} Time: {taskRAvgTime:.3f} ± {taskRStd:.3f}")

taskNAvgTime = task_n['Actual_Total_Time'].mean()
taskNStd = task_n['Actual_Total_Time'].std()
print(f"normal task {str(i)} Time: {taskNAvgTime:.3f} ± {taskNStd:.3f}")

print(scipy.stats.ttest_ind(task_r['Actual_Total_Time'], task_n['Actual_Total_Time']))

responsive task 6 Time: 67130.091 ± 116602.743
normal task 6 Time: 22913.846 ± 9993.546
Ttest_indResult(statistic=1.3669138532609222, pvalue=0.18545908011269885)


In [30]:
#task 7
i = 7
taskDf = sessionsDf.loc[sessionsDf["sessionNumber"] == i]
taskDf
task_r = taskDf.loc[taskDf["isResponsive"] == True]
task_n = taskDf.loc[taskDf["isResponsive"] == False]

taskRAvgTime = task_r['Actual_Total_Time'].mean()
taskRStd = task_r['Actual_Total_Time'].std()
print(f"responsive task {str(i)} Time: {taskRAvgTime:.3f} ± {taskRStd:.3f}")

taskNAvgTime = task_n['Actual_Total_Time'].mean()
taskNStd = task_n['Actual_Total_Time'].std()
print(f"normal task {str(i)} Time: {taskNAvgTime:.3f} ± {taskNStd:.3f}")

print(scipy.stats.ttest_ind(task_r['Actual_Total_Time'], task_n['Actual_Total_Time']))

responsive task 7 Time: 65731.727 ± 39121.515
normal task 7 Time: 72177.000 ± 88483.062
Ttest_indResult(statistic=-0.223250518251281, pvalue=0.8254004563208638)


In [31]:
#task 8
i = 8
taskDf = sessionsDf.loc[sessionsDf["sessionNumber"] == i]
taskDf
task_r = taskDf.loc[taskDf["isResponsive"] == True]
task_n = taskDf.loc[taskDf["isResponsive"] == False]

taskRAvgTime = task_r['Actual_Total_Time'].mean()
taskRStd = task_r['Actual_Total_Time'].std()
print(f"responsive task {str(i)} Time: {taskRAvgTime:.3f} ± {taskRStd:.3f}")

taskNAvgTime = task_n['Actual_Total_Time'].mean()
taskNStd = task_n['Actual_Total_Time'].std()
print(f"normal task {str(i)} Time: {taskNAvgTime:.3f} ± {taskNStd:.3f}")

print(scipy.stats.ttest_ind(task_r['Actual_Total_Time'], task_n['Actual_Total_Time']))

responsive task 8 Time: 38329.182 ± 41198.082
normal task 8 Time: 48980.615 ± 65756.934
Ttest_indResult(statistic=-0.4647256848475711, pvalue=0.6466945150636145)


In [32]:
#each task in a loop
for i in range(0,9):
    print(f"TASK {str(i)}:")
    taskDf = sessionsDf.loc[sessionsDf["sessionNumber"] == i]
    taskDf
    task_r = taskDf.loc[taskDf["isResponsive"] == True]
    task_n = taskDf.loc[taskDf["isResponsive"] == False]

    taskRAvgTime = task_r['Actual_Total_Time'].mean()
    taskRStd = task_r['Actual_Total_Time'].std()
    print(f"responsive task {str(i)} Time: {taskRAvgTime:.3f} ± {taskRStd:.3f}")

    taskNAvgTime = task_n['Actual_Total_Time'].mean()
    taskNStd = task_n['Actual_Total_Time'].std()
    print(f"normal task {str(i)} Time: {taskNAvgTime:.3f} ± {taskNStd:.3f}")

    print(scipy.stats.ttest_ind(task_r['Actual_Total_Time'], task_n['Actual_Total_Time']))

TASK 0:
responsive task 0 Time: 6082.909 ± 2091.542
normal task 0 Time: 10686.077 ± 12046.905
Ttest_indResult(statistic=-1.247318371986381, pvalue=0.22539606233801399)
TASK 1:
responsive task 1 Time: 4145.455 ± 2265.859
normal task 1 Time: 17486.769 ± 15627.594
Ttest_indResult(statistic=-2.797167271659591, pvalue=0.010502896066303355)
TASK 2:
responsive task 2 Time: 15173.182 ± 24063.659
normal task 2 Time: 11130.308 ± 7523.474
Ttest_indResult(statistic=0.5754632102689233, pvalue=0.5708178145671166)
TASK 3:
responsive task 3 Time: 7834.364 ± 6250.077
normal task 3 Time: 10985.462 ± 6058.189
Ttest_indResult(statistic=-1.2514712543106266, pvalue=0.22390672819390758)
TASK 4:
responsive task 4 Time: 28433.545 ± 12561.233
normal task 4 Time: 24613.154 ± 8343.192
Ttest_indResult(statistic=0.8904084857539926, pvalue=0.38288084383409005)
TASK 5:
responsive task 5 Time: 63137.000 ± 79383.601
normal task 5 Time: 22833.923 ± 9126.820
Ttest_indResult(statistic=1.8237436049484217, pvalue=0.08180645

In [33]:
for i in range(0,9):
    print(f"TASK {str(i)}:")
    taskDf = sessionsDf.loc[sessionsDf["sessionNumber"] == i]
    taskDf
    task_r = taskDf.loc[taskDf["isResponsive"] == True]
    task_n = taskDf.loc[taskDf["isResponsive"] == False]

    taskRAvgTime = task_r['Guesses'].mean()
    taskRStd = task_r['Guesses'].std()
    print(f"responsive task {str(i)} Time: {taskRAvgTime:.3f} ± {taskRStd:.3f}")

    taskNAvgTime = task_n['Guesses'].mean()
    taskNStd = task_n['Guesses'].std()
    print(f"normal task {str(i)} Time: {taskNAvgTime:.3f} ± {taskNStd:.3f}")

    print(scipy.stats.ttest_ind(task_r['Guesses'], task_n['Guesses']))

TASK 0:
responsive task 0 Time: 1.091 ± 0.302
normal task 0 Time: 1.077 ± 0.277
Ttest_indResult(statistic=0.11830008851000817, pvalue=0.9069033903170751)
TASK 1:
responsive task 1 Time: 1.091 ± 0.302
normal task 1 Time: 1.692 ± 0.947
Ttest_indResult(statistic=-2.014864918232127, pvalue=0.056300862927160786)
TASK 2:
responsive task 2 Time: 1.909 ± 2.386
normal task 2 Time: 2.000 ± 1.871
Ttest_indResult(statistic=-0.10465571473861787, pvalue=0.9175972949235279)
TASK 3:
responsive task 3 Time: 1.182 ± 0.405
normal task 3 Time: 1.538 ± 0.967
Ttest_indResult(statistic=-1.138326861441063, pvalue=0.26722892681185895)
TASK 4:
responsive task 4 Time: 1.091 ± 0.302
normal task 4 Time: 1.077 ± 0.277
Ttest_indResult(statistic=0.11830008851000817, pvalue=0.9069033903170751)
TASK 5:
responsive task 5 Time: 1.455 ± 1.036
normal task 5 Time: 1.000 ± 0.000
Ttest_indResult(statistic=1.5889359691706568, pvalue=0.12634390199381382)
TASK 6:
responsive task 6 Time: 9.455 ± 15.293
normal task 6 Time: 5.000 ±

In [34]:
usersDf = sessionsDf[["userID"]]
usersDf = usersDf.drop_duplicates()
usersDf = usersDf.sort_values("userID")
usersDf

Unnamed: 0,userID
150,30
140,31
20,32
90,33
80,34
30,35
130,36
160,37
170,39
220,40


In [35]:
allTasksDf = usersDf

In [36]:
for i in range(0,9):
    allTasksDf[f"task{i}"] = i

allTasksDf["all_tasks"] = 0
allTasksDf["group"] = "A"
allTasksDf

Unnamed: 0,userID,task0,task1,task2,task3,task4,task5,task6,task7,task8,all_tasks,group
150,30,0,1,2,3,4,5,6,7,8,0,A
140,31,0,1,2,3,4,5,6,7,8,0,A
20,32,0,1,2,3,4,5,6,7,8,0,A
90,33,0,1,2,3,4,5,6,7,8,0,A
80,34,0,1,2,3,4,5,6,7,8,0,A
30,35,0,1,2,3,4,5,6,7,8,0,A
130,36,0,1,2,3,4,5,6,7,8,0,A
160,37,0,1,2,3,4,5,6,7,8,0,A
170,39,0,1,2,3,4,5,6,7,8,0,A
220,40,0,1,2,3,4,5,6,7,8,0,A


In [37]:
for i in allTasksDf.index:
    user = allTasksDf["userID"][i]
    userSessions = sessionsDf.loc[sessionsDf["userID"] == user]
    total = 0
    task = userSessions.loc[userSessions["sessionNumber"] == 0]
    allTasksDf.loc[i, "task0"] = task["Actual_Total_Time"].to_numpy()[0]
    total = int(task["Actual_Total_Time"])
    task = userSessions.loc[userSessions["sessionNumber"] == 1]
    allTasksDf.loc[i, "task1"] = task["Actual_Total_Time"].to_numpy()[0]
    total += int(task["Actual_Total_Time"])
    task = userSessions.loc[userSessions["sessionNumber"] == 2]
    allTasksDf.loc[i, "task2"] = task["Actual_Total_Time"].to_numpy()[0]
    total += int(task["Actual_Total_Time"])
    task = userSessions.loc[userSessions["sessionNumber"] == 3]
    allTasksDf.loc[i, "task3"] = task["Actual_Total_Time"].to_numpy()[0]
    total += int(task["Actual_Total_Time"])
    task = userSessions.loc[userSessions["sessionNumber"] == 4]
    allTasksDf.loc[i, "task4"] = task["Actual_Total_Time"].to_numpy()[0]
    total += int(task["Actual_Total_Time"])
    task = userSessions.loc[userSessions["sessionNumber"] == 5]
    allTasksDf.loc[i, "task5"] = task["Actual_Total_Time"].to_numpy()[0]
    total += int(task["Actual_Total_Time"])
    task = userSessions.loc[userSessions["sessionNumber"] == 6]
    allTasksDf.loc[i, "task6"] = task["Actual_Total_Time"].to_numpy()[0]
    total += int(task["Actual_Total_Time"])
    task = userSessions.loc[userSessions["sessionNumber"] == 7]
    allTasksDf.loc[i, "task7"] = task["Actual_Total_Time"].to_numpy()[0]
    total += int(task["Actual_Total_Time"])
    task = userSessions.loc[userSessions["sessionNumber"] == 8]
    allTasksDf.loc[i, "task8"] = task["Actual_Total_Time"].to_numpy()[0]
    total += int(task["Actual_Total_Time"])
    allTasksDf.loc[i, "all_tasks"] = total
    if(task["isResponsive"].to_numpy()[0] == True):
        allTasksDf.loc[i, "group"] = "B"

#allTasksDf = allTasksDf.drop(columns=["task{i}"])

In [75]:
allTasksDf = allTasksDf.sort_values("all_tasks")
allTasksDf

Unnamed: 0,userID,task0,task1,task2,task3,task4,task5,task6,task7,task8,all_tasks,group
130,36,3250,7150,9881,5910,13390,12400,17100,17020,10820,96921,A
20,32,3994,4471,2915,7051,16741,28440,10321,39910,7950,121793,A
200,70,4574,10060,12190,6031,25960,15810,18989,27610,10460,131684,A
60,58,4340,11560,11071,16220,22540,38590,20421,20420,10928,156090,A
150,30,17191,12730,17180,9970,21701,16280,16921,41531,9373,162877,A
90,33,4600,2120,6819,4950,21896,17790,11970,54381,55260,179786,B
10,68,5242,8141,15202,4435,14831,13650,32120,38646,49680,181947,A
190,41,3201,6310,4120,6479,36301,26860,15320,72051,13641,184283,B
230,78,9690,8510,27099,5949,32371,33200,25280,21980,28561,192640,A
0,59,4560,6420,10361,10390,30771,32559,12940,52901,35621,196523,B


In [39]:
groupA = allTasksDf.loc[allTasksDf["group"] == "A"]
groupB = allTasksDf.loc[allTasksDf["group"] == "B"]

groupATime = groupA['all_tasks'].mean()
groupAStd = groupA['all_tasks'].std()
print(f"group A {str(i)} Time: {groupATime:.3f} ± {groupAStd:.3f}")

groupBTime = groupB['all_tasks'].mean()
groupBStd = groupB['all_tasks'].std()
print(f"group B {str(i)} Time: {groupBTime:.3f} ± {groupBStd:.3f}")

print(scipy.stats.ttest_ind(groupB['all_tasks'], groupA['all_tasks']))

group A 230 Time: 241807.154 ± 128591.865
group B 230 Time: 295997.455 ± 141723.123
Ttest_indResult(statistic=0.9818697568402492, pvalue=0.33683935596842807)


In [40]:
sessionsDf.loc[(sessionsDf["userID"] == 30) & (sessionsDf["sessionNumber"] == 0)]

Unnamed: 0,userID,numFrames,task,start_time,end_time,total_time,sessionNumber,isResponsive,frames,Actual_Start_Time,Actual_End_Time,Guesses,Actual_Total_Time
150,30,240,The task is to click on the 2nd article title....,1652721816735,1652721833975,17240,0,False,"[{'userID': 30, 'frameNum': 1, 'unixTime': 165...",1652721816794,1652721833985,1,17191


In [41]:
groupA


Unnamed: 0,userID,task0,task1,task2,task3,task4,task5,task6,task7,task8,all_tasks,group
130,36,3250,7150,9881,5910,13390,12400,17100,17020,10820,96921,A
70,60,3270,6630,6981,8949,16890,17030,25051,28271,223883,336955,A
20,32,3994,4471,2915,7051,16741,28440,10321,39910,7950,121793,A
40,56,4110,33331,21496,10404,24967,19651,14430,148363,38121,314873,A
60,58,4340,11560,11071,16220,22540,38590,20421,20420,10928,156090,A
200,70,4574,10060,12190,6031,25960,15810,18989,27610,10460,131684,A
10,68,5242,8141,15202,4435,14831,13650,32120,38646,49680,181947,A
110,44,5831,8560,3060,10120,35321,12710,48740,34145,67541,226028,A
230,78,9690,8510,27099,5949,32371,33200,25280,21980,28561,192640,A
80,34,13890,19251,4499,12880,33459,26190,24821,50521,150822,336333,A


In [72]:
surveyDf = pandas.read_csv("PostStudy.csv")
surveyDf["group"] = surveyDf["What is your Participant Id?"].to_numpy() % 2
surveyDf

surveyDf = surveyDf.drop_duplicates(subset=['What is your Participant Id?'])
surveyDf

Unnamed: 0,Timestamp,What is your Participant Id?,Describe the task you found the easiest? Why?,Describe the task you found the most difficult? Why?,Did you find anything confusing about the interface?,"When the application was small, was all the content easy to read? Please explain",Did you notice any changes in the application as the size of the application changed?,"If yes, please explain what you noticed.",What would you change about the experiment?,It was easy to read with the applications.,It was easy to interact with the applications.,It was enjoyable to interact with the applications.,The tasks were easy to understand,It was easy to complete the tasks.,The content of the applications was easy to see.,Less application content would make the tasks easier.,I enjoyed the overall experience in terms of enjoyment.,The augmented reality (AR) headset was comfortable to wear.,I experienced fatigue during the experience.,group
0,2022/05/16 11:35:53 AM MDT,30,Click 2nd paper tile. Easy to click.,Type Isla Vista. Virtual keyboard is not easy.,"The scroll bar, should I press? Should I drag?",No. Get too close and text will be blurry again.,Yes,The weather window is very small.,It would be nice to avoid the scroll bar.,1,2,1,1,1,1,1,1,1,7,0
1,2022/05/16 12:45:28 PM MDT,31,"clicking on the links, they were pretty big an...",the keyboard typing,no. not really,"yes, but maybe not for everyone.",Yes,"there was more detail as I came closer, like l...","Nothing, it was really cool and fun.",2,2,3,2,2,2,6,2,2,6,1
2,2022/05/16 3:45:14 PM MDT,32,"Selecting titles/author names, they were clear...","The weather-related tasks, the text was very s...",Searching for information in the smaller displ...,"No, when the application was small the content...",No,,"I don't have any suggestions, it was an intere...",5,5,4,6,6,4,5,6,6,7,0
3,2022/05/16 5:27:07 PM MDT,33,It was easiest to click on the title of things,using the keyboard was super annoying,"Nope, it seems straight forward","No, it was super far and I had to go super clo...",Yes,the temperature app became smaller,"I did not like typing on the keyboard in VR, i...",2,4,4,1,1,1,1,1,1,1,1
4,2022/05/16 6:29:53 PM MDT,34,The second time in Mode A was the easiest. I r...,The first time (in Mode A) was the hardest. I ...,No.,No. I could only distinguish letters within te...,No,,Have it be stated that each code is split up a...,3,4,2,1,3,3,4,2,2,7,0
6,2022/05/17 11:20:33 AM MDT,35,"I don't remember the exact task name, there we...",Typing isla vista was very difficult. It was h...,I thought '<' represented delete but it was to...,"No, I had to move a little closer.",No,,The buttons are very hard to press. Typing on ...,2,4,4,1,1,3,5,5,3,6,1
7,2022/05/17 12:47:50 PM MDT,36,Clicking on the article title was easiest beca...,Using the number keypad on the keyboard was th...,The scrollbar feels a bit clunky... as if it's...,"When the application was small, like the tempe...",No,,Nothing I can think of.,2,3,5,1,3,5,5,5,3,3,0
8,2022/05/17 2:19:27 PM MDT,37,clicking the articles,i guess trying to pin the right letter or numb...,not really,"yes, everything was clear and legible",Yes,they were better calibrated when the app was s...,nothing!,2,1,2,1,1,1,4,1,1,4,1
9,2022/05/17 5:22:37 PM MDT,39,selecting an article. needed only a click,entering text. It is hard to press letters,No,No. I had to get really close to them,No,,I would reduce the lag in the VR,3,6,3,2,3,3,2,6,6,1,1
10,2022/05/17 7:34:07 PM MDT,40,entering numbers,type a location. I made a mistake and backspac...,no,not really .,No,,improving keyboard. I found it hard to work with,6,4,4,1,3,6,4,3,5,7,0


In [73]:
groupA = surveyDf.loc[surveyDf["group"] == 0]
groupA = groupA[groupA.columns[9]]

groupA

0     1
2     5
4     3
7     2
10    6
11    6
12    3
16    6
18    1
21    1
22    7
23    6
24    1
Name: It was easy to read with the applications., dtype: int64

In [74]:
for i in range(9,19):
    groupA = surveyDf.loc[surveyDf["group"] == 0]
    groupB = surveyDf.loc[surveyDf["group"] == 1]

    groupAresponseDf = groupA[groupA.columns[i]]
    groupBresponseDf = groupB[groupB.columns[i]]

    groupBResponse = groupBresponseDf.mean()
    groupBStd = groupBresponseDf.std()
    print(f"group B {str(i)}: {groupBResponse:.3f} ± {groupBStd:.3f}")

    groupAResponse = groupAresponseDf.mean()
    groupAStd = groupAresponseDf.std()
    print(f"group A {str(i)}: {groupAResponse:.3f} ± {groupAStd:.3f}")

    print(scipy.stats.ttest_ind(groupBresponseDf, groupAresponseDf))

group B 9: 2.636 ± 1.206
group A 9: 3.692 ± 2.359
Ttest_indResult(statistic=-1.3406955111499796, pvalue=0.19369995144719723)
group B 10: 3.182 ± 1.471
group A 10: 3.923 ± 1.977
Ttest_indResult(statistic=-1.0249671192478496, pvalue=0.3165174595673361)
group B 11: 3.182 ± 1.168
group A 11: 3.462 ± 1.854
Ttest_indResult(statistic=-0.4323560684250719, pvalue=0.6696910874440272)
group B 12: 1.909 ± 1.446
group A 12: 2.077 ± 2.019
Ttest_indResult(statistic=-0.2299435227030463, pvalue=0.8202602091877658)
group B 13: 2.091 ± 1.221
group A 13: 3.000 ± 1.958
Ttest_indResult(statistic=-1.3336442423578072, pvalue=0.195964670719945)
group B 14: 2.636 ± 1.502
group A 14: 3.077 ± 2.060
Ttest_indResult(statistic=-0.5884743945816516, pvalue=0.5622074955285298)
group B 15: 3.636 ± 1.859
group A 15: 4.308 ± 1.437
Ttest_indResult(statistic=-0.9979937431847866, pvalue=0.32913347121473324)
group B 16: 2.818 ± 1.940
group A 16: 3.154 ± 1.772
Ttest_indResult(statistic=-0.44279099955933326, pvalue=0.6622400722