In [1]:
import pandas as pd
import numpy as np
from pandas import *
from scipy import stats
import os
import re
import math

In [2]:
file = '../../../../Google Drive File Stream/My Drive/USC Expeditions Year 5/Analysis/Help-Seeking/Data/p10_data1-2.csv'
data = pd.read_csv(file)

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
data.shape

(477281, 1553)

In [4]:
engaged = data.loc[data['engagement'] == 'engaged']

In [5]:
engaged.shape

(244603, 1553)

In [6]:
# non open face/open pose columns

for i in data.columns:
    if 'op' not in i and 'of' not in i:
        print(i)

activity
difficulty
engagement
game_correct
game_incorrect
game_start
mistake_made
participant
ros_PARTICIPANT_STATE
ros_ROBOT_STATE
session_date
session_num
timestamp
transcript_spk_0
transcript_spk_1
transcript_spk_2


In [7]:
# add skills column

act_to_skill = {
    1: 'NC',
    2: 'NC',
    3: 'NC',
    4: 'NC',
    5: 'NC',
    6: 'NC',
    7: 'OS',
    8: 'OS',
    9: 'OS',
    10: 'OS',
    16: 'EM',
}

data['skill'] = data['activity'].map(act_to_skill)

In [8]:
# sort by session number, then timestamp (chronological) for feature extraction

data = data.sort_values(['session_num', 'timestamp'], ascending=[True, True])

In [9]:
# set up new features

fe_columns = ['ts_robot_talked', 'ts_game_start', 'ts_attempt', 'games_total', 'games_session', 'mistakes_total', 'mistakes_session', 'mistakes_game', 'aptitude']

for c in fe_columns:
    if (c == 'aptitude'):
        data[c] = 0.5
    else:
        data[c] = 0.0

In [10]:
last_session = 0.0

# time since robot talked
last_robot = 0.0

# time since game started (elapsed time on current attempt)
game_started = False
last_game_start = 0.0
last_game_finished = 0.0

# games played
games_total = 128
games_session = 0

# mistakes  
mistakes_total = 182
mistakes_session = 0
mistakes_game = 0

# time since last record attempt
last_record = 0.0

# iterate through all rows, tracking new features :) 
for i,r in data.iterrows():
    # if beginning of new session, reset all variables
    if last_session != r['session_num']:
        # reset all variables
        last_session = r['session_num']
        
        last_robot = 0.0
        
        game_started = False
        last_game_start = 0.0
        last_game_finished = 0.0
        
        games_session = 0
        mistakes_session = 0
        mistakes_game = 0
        
        last_record = 0.0

        
    # time since robot talked (initialized to 0)
    if isinstance(r['ros_ROBOT_STATE'], str):
        last_robot = r['timestamp']
    else:
        data.at[i, 'ts_robot_talked'] = r['timestamp'] - last_robot
    
    # time since game started (initialized to 0)
    if (r['game_start'] == 1):
        last_game_start = r['timestamp']
        game_started = True
    
    # we have a recorded attempt! 
    if (r['game_start'] == 1) or (r['mistake_made'] == 1):
        last_record = r['timestamp']
    
    # need to check whether game hasn't finished yet (time between games == 0)
    if (game_started) and (last_game_finished <= last_game_start):
        data.at[i, 'ts_game_start'] = r['timestamp'] - last_game_start
        data.at[i, 'ts_attempt'] = r['timestamp'] - last_record

    
    if (r['game_correct'] == 1) or (r['game_incorrect'] == 1):
        last_game_finished = r['timestamp']
        
        
    # games total calculations
    if (r['game_start'] == 1):
        games_total += 1
        games_session += 1
    data.at[i, 'games_total'] = games_total
    data.at[i, 'games_session'] = games_session
     
    # mistakes calculations
    if (r['mistake_made'] == 1):
        mistakes_total += 1
        mistakes_session += 1
        mistakes_game += 1
 
    data.at[i, 'mistakes_total'] = mistakes_total
    data.at[i, 'mistakes_session'] = mistakes_session
    data.at[i, 'mistakes_game'] = mistakes_game
    
    # aptitude calculation:
    difficulty = r['difficulty']
    if not (math.isnan(r['difficulty'])):
        d = difficulty / 5.0;
        m = mistakes_game / 5.0;
        apt = 0.5*((1.0-m)+d)
        data.at[i, 'aptitude'] = apt
        
    
    if (r['game_correct'] == 1) or (r['game_incorrect'] == 1):        
        game_mistakes = 0

In [11]:
print(data.iloc[-1:]['games_total'])
print(data.iloc[-1:]['mistakes_total'])

58447    350.0
Name: games_total, dtype: float64
58447    561.0
Name: mistakes_total, dtype: float64


In [12]:
col_order = ['participant',
'session_num',
'session_date',
'timestamp',
'engagement',
'activity',
'skill',
'difficulty',
'aptitude',
'games_total', 
'games_session', 
'mistakes_total', 
'mistakes_session', 
'mistakes_game', 
'ts_robot_talked', 
'ts_game_start', 
'ts_attempt', 
'game_start',
'game_correct',
'game_incorrect',
'mistake_made',
'ros_PARTICIPANT_STATE',
'ros_ROBOT_STATE',
'transcript_spk_0',
'transcript_spk_1',
'transcript_spk_2',]

other_cols = []
for i in data.columns:
    if 'of' in i or 'op' in i:
        other_cols.append(i)

col_order = col_order + other_cols

data = data[col_order]

In [13]:
data.shape

(477281, 1563)

In [None]:
path_data = '../../../../Google Drive File Stream/My Drive/USC Expeditions Year 5/Analysis/Help-Seeking/Data/p10_data2-2.csv'

data.to_csv(path_data, index=False)