# Setups and Data

In [1]:
# imports
import os
import json  
import pandas as pd 
pd.options.display.float_format = '{:20,.2f}'.format
import numpy as np
from datetime import datetime, date, timedelta

In [2]:
# mount appropriate google drive and authenticate using instructions below
# this will show you the file system on the left under the folder icon
from google.colab import drive
drive.mount('/content/drive')

import sys
path_to_my_drive_folder = "/content/drive/My Drive/ASIST/Analysis" 
sys.path.append(path_to_my_drive_folder)

import functions as f  # import functions for processing metadata

Mounted at /content/drive


In [3]:
# Populate identifiers for different message and topic types
# see message_summary.md for details: 
# https://drive.google.com/drive/u/1/folders/1QqY861SUTdQkCP2WelJbm4WNTOnlk97A

trial_identifier = '"message_type":"trial"'
score_identifier = '"topic":"observations/events/scoreboard"'
location_identifier = '"topic":"observations/events/player/location"'
triage_identifier =  '"topic":"observations/events/player/triage"'
mission_identifier = '"topic":"observations/events/mission"'
competencyTask_identifier = '"topic":"observations/events/competency/task"'
jumped_identifier = '"topic":"observations/events/player/jumped"'

In [4]:
# use the file system to copy and paste the exact filepath for the .metadata file you wish to open
# location that stores all .metadata files from participant competency tests
competency_dir = '/content/drive/My Drive/ASIST/Analysis/CompetencyData' # REPLACE with your directory path

# Competency Test Data Analysis

In [36]:
# Create dataframe with task completion times: participant x task_id 
df = pd.DataFrame(index=range(0,16))

for subdir, dirs, files in os.walk(competency_dir):
  for file in files:
    filepath = subdir + os.sep + file 
    id = file.split('.')[0]
    messages = f.loadMetadata(filepath)
    timing = f.competencyTestState(messages, competencyTask_identifier)
    df = pd.concat([df, timing['timeSpent']], axis=1)
    df = df.rename(columns={'timeSpent': id})

df.index.name = 'task_id'
df = df.loc[1:14]
df.loc['total'] = df.sum()

# manually add 2 tasks times for rescue skills 
# saving victims is homogeneous and predetermined in this version 
df.loc[15] = 7.5
df.loc[16] = 15

df = df.reindex(sorted(df.columns), axis=1)
# df

In [38]:
# read skill requirements for each subtask (external file; manually created)
skill_req = f.readDfFromGSheetTab('competency_data_analysis','competency_skills')
skill_req = skill_req.iloc[:-1]
skill_req['task_id'] = skill_req['task_id'].astype(float)

# prep participant completion times
dt = df.drop('total').reset_index()
dt['task_id'] = dt['task_id'].astype(float)

# create system of equations
# 4 eqns are generated by collapsing competency tasks times by unique search skill combos assessed in each task 
x = pd.merge(skill_req, dt, on='task_id').set_index('task_id')
x = x.drop(columns=['task_name']).apply(pd.to_numeric)
eq = x.groupby('unique_skills_assessed')[x.columns[1:]].sum()  
eq = eq.loc[1:] # drop redundant col

score = pd.DataFrame(columns=eq.columns[0:6])
for p in eq.columns[-6:]:
  score.loc[p] = np.linalg.solve(eq[eq.columns[0:6]].to_numpy(dtype=np.float),eq[p].to_numpy(dtype=np.float))
score = score.T
score.loc['aggregate_time'] = df.loc['total']

f.writeDfToGSheetTab('competency_data_analysis', 'skill_estimates', score)
score

   Reading GSheet: competency_data_analysis > competency_skills ...
   Writing GSheet: competency_data_analysis > skill_estimates ...


Unnamed: 0,c17,c18,c19,c21,c22,c25
physical_search_walk,0.21,0.25,0.23,0.36,0.25,0.52
physical_search_obstacle,0.36,-0.39,0.17,0.11,0.87,-1.02
cognitive_search_pathing_decision,-5.55,-8.0,-2.61,-0.35,0.48,-4.78
cognitive_search_knowledge_based,2.6,1.74,3.58,7.7,2.59,16.1
rescue_green,7.5,7.5,7.5,7.5,7.5,7.5
rescue_yellow,15.0,15.0,15.0,15.0,15.0,15.0
aggregate_time,189.49,170.44,182.25,388.75,229.6,393.24


# Interpretation of Skill Estimates
- Each cell indicates the a skill index in terms of time impact on performance for each instance of skill is used during a task. 
- Example 1: walk = 0.36 means this player requires addition 0.36s for each step they need to take.
- Example 2: obstacle = -0.39 means this player saves 0.39s for each obstacle they jump over (may indicate they are better at jumping over 3-4 blocks as against walking 3-4 blocks)
- Note rescue skills are static by tasks design. In the future this may be varied across participants.