### Collect events for each volunteer user

In [1]:
import psycopg2 as pg
import numpy as np
import pandas as pd
import datetime
import logging
import math

In [2]:
connection = pg.connect(
    database="sidewalk",
    user="sidewalk",
    password="sidewalk",
    host="localhost",
    port="5432" )

In [3]:
vol_ids = pd.read_csv('../../data/interim/vol-ids.csv')
vol_ids.head()

Unnamed: 0,condition_id,worker_id,worker_id_type
0,70,01232fef-5a19-4435-8be6-c0da3b38cabd,user_id
1,72,9501513f-3822-4921-861e-8f1440dee102,user_id
2,73,32f21407-253f-46ea-a01b-55bcf4ac2113,user_id
3,74,b65c0864-7c3a-4ba7-953b-50743a2634f6,user_id
4,75,0bfed786-ce24-43f9-9c58-084ae82ad175,user_id


In [4]:
event_types = pd.read_csv('../../data/interim/event-types.csv')
event_types = event_types['event_type'].tolist()
event_types[:5]

['Click_LabelDelete',
 'Click_ModeSwitch_CurbRamp',
 'Click_ModeSwitch_NoCurbRamp',
 'Click_ModeSwitch_NoSidewalk',
 'Click_ModeSwitch_Obstacle']

In [5]:
feature_names = pd.read_csv('../../data/interim/feature-names.csv')
feature_names = feature_names['feature'].tolist()
feature_names[:5]

['Click_LabelDelete_per_pan_mean',
 'Click_LabelDelete_per_pan_std',
 'Click_LabelDelete_total',
 'Click_ModeSwitch_CurbRamp_per_pan_mean',
 'Click_ModeSwitch_CurbRamp_per_pan_std']

In [6]:
'''
Queries to retreive volunteer interactions
'''

def get_audit_tasks(user_id):
    return pd.read_sql(
    '''
        SELECT audit_task_id
        FROM audit_task
        WHERE user_id='%(user_id)s'
    ''' % locals(), connection)

def get_audit_interactions(audit_task_id):
    return pd.read_sql(
    '''
        SELECT *
        FROM audit_task_interaction
        WHERE audit_task_id='%(audit_task_id)s'
    ''' % locals(), connection)

In [7]:
'''
Find the total, mean, and std of each event type
'''

def get_action_counts(audit_inter):
    features = {}
    
    for action in event_types:
        a = audit_inter[audit_inter['action'] == action]
        g = a.groupby('gsv_panorama_id').size()
        
        total = len(a)
        mean = g.mean()
        std = g.std()
        
        # if there are too few elements to compute
        if math.isnan(mean):
            mean = 0
            
        if math.isnan(std):
            std = 0
            
        features[action + '_total'] = total
        features[action + '_per_pan_mean'] = mean
        features[action + '_per_pan_std'] = std
    
    return features

In [8]:
'''
Get features for a volunteer user
'''

def get_features(condition_id, user_id):
    all_audit_interactions = None
    
    feature_df = pd.DataFrame(columns=(['condition_id', 'worker_id'] + feature_names))

    audit_task_ids = get_audit_tasks(user_id)
    
    for audit_task_id in audit_task_ids.values:
        audit_interactions = get_audit_interactions(audit_task_id[0])
        
        if all_audit_interactions is None:
            all_audit_interactions = audit_interactions
        else:
            all_audit_interactions = all_audit_interactions.append(audit_interactions)
            
    if all_audit_interactions is None:
        logging.warning('SKIPPING worker_id because no iteraction: ' + str(user_id))
        return
    action_counts = get_action_counts(all_audit_interactions)
    
    feature_list = [action_counts[feature] for feature in feature_names]
    feature_df.loc[len(feature_df)] = [condition_id, user_id] + feature_list
    return feature_df

In [9]:
'''
Get features for each volunteer
'''

features = None

# exclude ip_address ids
selected_vols = vol_ids[vol_ids['worker_id_type'] == 'user_id']

for i, user in selected_vols.iterrows():
    
    f = get_features(user['condition_id'], user['worker_id'])
    if features is None:
        features = f
    else:
        features = features.append(f)

features.reset_index(inplace=True)

In [10]:
features.head()

Unnamed: 0,index,condition_id,worker_id,Click_LabelDelete_per_pan_mean,Click_LabelDelete_per_pan_std,Click_LabelDelete_total,Click_ModeSwitch_CurbRamp_per_pan_mean,Click_ModeSwitch_CurbRamp_per_pan_std,Click_ModeSwitch_CurbRamp_total,Click_ModeSwitch_NoCurbRamp_per_pan_mean,...,ViewControl_DoubleClick_total,ViewControl_MouseDown_per_pan_mean,ViewControl_MouseDown_per_pan_std,ViewControl_MouseDown_total,ViewControl_MouseUp_per_pan_mean,ViewControl_MouseUp_per_pan_std,ViewControl_MouseUp_total,WalkTowards_per_pan_mean,WalkTowards_per_pan_std,WalkTowards_total
0,0,70,01232fef-5a19-4435-8be6-c0da3b38cabd,1.0,0.0,4,2.204545,1.24974,97,1.58824,...,137,2.3952,2.899322,1497,2.3584,2.775153,1474,0,0,0
1,0,72,9501513f-3822-4921-861e-8f1440dee102,1.0,0.0,1,1.943925,1.29463,208,1.42553,...,63,2.187234,1.966415,1028,2.170213,1.926354,1020,0,0,0
2,0,73,32f21407-253f-46ea-a01b-55bcf4ac2113,1.16667,0.408248,7,3.247059,2.09254,276,1.4,...,33,3.468619,4.165605,1658,3.351464,4.002346,1602,0,0,0
3,0,74,b65c0864-7c3a-4ba7-953b-50743a2634f6,1.33333,0.57735,4,1.874172,1.3181,283,1.32353,...,4,2.286598,2.967163,1109,2.25567,2.831456,1094,0,0,0
4,0,75,0bfed786-ce24-43f9-9c58-084ae82ad175,1.0,0.0,4,3.2,2.388,128,1.66667,...,8,4.848684,6.557701,737,4.651316,6.087943,707,0,0,0


In [11]:
len(features)

44

In [12]:
len(selected_vols)

44

In [13]:
del features['index']
features.to_csv('../../data/interim/collected/vol-features.csv', index=False)