### Calculate accuracy indicators for each user

In [2]:
import pandas as pd
import numpy as np

In [3]:
scores = pd.read_csv('../../data/interim/processing/scores.csv')
scores.head()

Unnamed: 0,condition_id,worker_id,type,granularity,label.type,true_pos,false_pos,true_neg,false_neg
0,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,5_meter,Problem,27,26,186,17
1,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,10_meter,Problem,27,15,76,12
2,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,street,Problem,17,0,0,0
3,72,9501513f-3822-4921-861e-8f1440dee102,volunteer,5_meter,Problem,22,58,161,7
4,72,9501513f-3822-4921-861e-8f1440dee102,volunteer,10_meter,Problem,21,44,59,3


In [4]:
# combine scores for each type of label
def combine_label_types(df):
        
    g = df.groupby(by=['condition_id', 'worker_id', 'type', 'granularity'])
    return g.sum()
    
scores_comb = combine_label_types(scores)
scores_comb = scores_comb.reset_index()
scores_comb.head(10)

Unnamed: 0,condition_id,worker_id,type,granularity,true_pos,false_pos,true_neg,false_neg
0,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,10_meter,116,86,1564,54
1,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,5_meter,112,136,3262,74
2,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,street,76,18,140,4
3,70,A1DATRS7IUV9B3,turker,10_meter,120,110,1564,99
4,70,A1DATRS7IUV9B3,turker,5_meter,95,151,3264,132
5,70,A1DATRS7IUV9B3,turker,street,126,55,136,48
6,70,A2U9OI2A0C2DG7,turker,10_meter,172,301,1414,47
7,70,A2U9OI2A0C2DG7,turker,5_meter,153,348,3100,74
8,70,A2U9OI2A0C2DG7,turker,street,161,172,136,13
9,70,A2X3QCJK0H18T8,turker,10_meter,102,407,1370,117


In [5]:
# ratio of correct to all
def calc_accuracy(r):
    top = r['true_pos'] + r['true_neg']
    bot = r['true_pos'] + r['true_neg'] + r['false_pos'] + r['false_neg']
    if bot == 0:
        return np.nan
    return top/bot

# positive predictive value, or precision
def calc_ppv(r):
    top = r['true_pos']
    bot = r['true_pos'] + r['false_pos'] 
    if bot == 0:
        return np.nan
    return top/bot

# negative predictive value
def calc_npv(r):
    top = r['true_neg']
    bot = r['true_neg'] + r['false_neg']
    if bot == 0:
        return np.nan
    return top/bot

# recall, or true positive rate
def calc_recall(r):
    top = r['true_pos']
    bot = r['true_pos'] + r['false_neg']
    if bot == 0:
        return np.nan
    return top/bot

# specificity or true negative rate
def calc_specificity(r):
    top = r['true_neg']
    bot = r['true_neg'] + r['false_pos']
    if bot == 0:
        return np.nan
    return top/bot

In [6]:
funcs = [calc_accuracy, calc_ppv, calc_npv, calc_recall, calc_specificity]

def calc_all(df):
    for func in funcs:
        func_name = func.__name__[5:]
        df[func_name] = df.apply(func, axis=1)
        
calc_all(scores_comb)
scores_comb.head()

Unnamed: 0,condition_id,worker_id,type,granularity,true_pos,false_pos,true_neg,false_neg,accuracy,ppv,npv,recall,specificity
0,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,10_meter,116,86,1564,54,0.923077,0.574257,0.966625,0.682353,0.947879
1,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,5_meter,112,136,3262,74,0.941406,0.451613,0.977818,0.602151,0.959976
2,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,street,76,18,140,4,0.907563,0.808511,0.972222,0.95,0.886076
3,70,A1DATRS7IUV9B3,turker,10_meter,120,110,1564,99,0.889593,0.521739,0.940469,0.547945,0.934289
4,70,A1DATRS7IUV9B3,turker,5_meter,95,151,3264,132,0.922295,0.386179,0.961131,0.418502,0.955783


In [7]:
calc_all(scores)
scores.head()

Unnamed: 0,condition_id,worker_id,type,granularity,label.type,true_pos,false_pos,true_neg,false_neg,accuracy,ppv,npv,recall,specificity
0,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,5_meter,Problem,27,26,186,17,0.832031,0.509434,0.916256,0.613636,0.877358
1,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,10_meter,Problem,27,15,76,12,0.792308,0.642857,0.863636,0.692308,0.835165
2,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,street,Problem,17,0,0,0,1.0,1.0,,1.0,
3,72,9501513f-3822-4921-861e-8f1440dee102,volunteer,5_meter,Problem,22,58,161,7,0.737903,0.275,0.958333,0.758621,0.73516
4,72,9501513f-3822-4921-861e-8f1440dee102,volunteer,10_meter,Problem,21,44,59,3,0.629921,0.323077,0.951613,0.875,0.572816


In [9]:
scores.to_csv('../../data/interim/processing/scores-acc.csv')
scores_comb.to_csv('../../data/interim/processing/scores-comb-acc.csv')

### Combine features with labels

In [10]:
events = pd.read_csv('../../data/interim/processing/events.csv')
events.head()

Unnamed: 0,condition_id,worker_id,type,Click_LabelDelete_per_pan_mean,Click_LabelDelete_per_pan_std,Click_LabelDelete_total,Click_ModeSwitch_CurbRamp_per_pan_mean,Click_ModeSwitch_CurbRamp_per_pan_std,Click_ModeSwitch_CurbRamp_total,Click_ModeSwitch_NoCurbRamp_per_pan_mean,...,ViewControl_DoubleClick_total,ViewControl_MouseDown_per_pan_mean,ViewControl_MouseDown_per_pan_std,ViewControl_MouseDown_total,ViewControl_MouseUp_per_pan_mean,ViewControl_MouseUp_per_pan_std,ViewControl_MouseUp_total,WalkTowards_per_pan_mean,WalkTowards_per_pan_std,WalkTowards_total
0,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,1.0,0.0,4,2.204545,1.249736,97,1.588235,...,137,2.3952,2.899322,1497,2.3584,2.775153,1474,0,0,0
1,72,9501513f-3822-4921-861e-8f1440dee102,volunteer,1.0,0.0,1,1.943925,1.294632,208,1.425532,...,63,2.187234,1.966415,1028,2.170213,1.926354,1020,0,0,0
2,73,32f21407-253f-46ea-a01b-55bcf4ac2113,volunteer,1.166667,0.408248,7,3.247059,2.092537,276,1.4,...,33,3.468619,4.165605,1658,3.351464,4.002346,1602,0,0,0
3,74,b65c0864-7c3a-4ba7-953b-50743a2634f6,volunteer,1.333333,0.57735,4,1.874172,1.318103,283,1.323529,...,4,2.286598,2.967163,1109,2.25567,2.831456,1094,0,0,0
4,75,0bfed786-ce24-43f9-9c58-084ae82ad175,volunteer,1.0,0.0,4,3.2,2.388004,128,1.666667,...,8,4.848684,6.557701,737,4.651316,6.087943,707,0,0,0


In [26]:
interaction = scores.merge(events, on=['type', 'condition_id', 'worker_id'])
interaction_comb = scores_comb.merge(events, on=['type', 'condition_id', 'worker_id'])
interaction_comb.head()

Unnamed: 0,condition_id,worker_id,type,granularity,true_pos,false_pos,true_neg,false_neg,accuracy,ppv,...,ViewControl_DoubleClick_total,ViewControl_MouseDown_per_pan_mean,ViewControl_MouseDown_per_pan_std,ViewControl_MouseDown_total,ViewControl_MouseUp_per_pan_mean,ViewControl_MouseUp_per_pan_std,ViewControl_MouseUp_total,WalkTowards_per_pan_mean,WalkTowards_per_pan_std,WalkTowards_total
0,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,10_meter,116,86,1564,54,0.923077,0.574257,...,137,2.3952,2.899322,1497,2.3584,2.775153,1474,0,0,0
1,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,5_meter,112,136,3262,74,0.941406,0.451613,...,137,2.3952,2.899322,1497,2.3584,2.775153,1474,0,0,0
2,70,01232fef-5a19-4435-8be6-c0da3b38cabd,volunteer,street,76,18,140,4,0.907563,0.808511,...,137,2.3952,2.899322,1497,2.3584,2.775153,1474,0,0,0
3,70,A1DATRS7IUV9B3,turker,10_meter,120,110,1564,99,0.889593,0.521739,...,90,6.857143,7.531887,528,6.415584,6.701042,494,0,0,0
4,70,A1DATRS7IUV9B3,turker,5_meter,95,151,3264,132,0.922295,0.386179,...,90,6.857143,7.531887,528,6.415584,6.701042,494,0,0,0


In [25]:
interaction.to_csv('../../data/final/interaction.csv', index=False)
interaction_comb.to_csv('../../data/final/interaction-comb.csv', index=False)