In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from trace_utils import *

In [2]:
d_survey, d_survey_in_el = load_survey_dfs()
print('Num Survey Responses:', d_survey.shape[0])
print('Num Survey Responses in EL:', d_survey_in_el.shape[0])

Num Survey Responses: 35948
Num Survey Responses in EL: 32792


In [3]:
d_click_traces = load_click_trace_data('rs3v3')
print('Num Click Traces:', d_click_traces.shape[0])

Num Click Traces: 49591


In [4]:
# merge traces and survey data
df = d_survey_in_el.merge(d_click_traces, how = 'inner', right_on = 'survey_token', left_on = 'token')
print('Num Responses with a trace', df.shape[0])

Num Responses with a trace 29816


In [5]:
# clean up odds and ends
df['click_dt_utc'] = df['click_data'].apply(lambda x: x['timestamp'])
df['click_title'] = df['click_data'].apply(lambda x: x['title'])
del df['click_data']
del df['token']

df.rename(columns={  'submit_timestamp': 'survey_submit_dt',
                     'key': 'client_token',
                     'requests': 'trace_data',
                    }, inplace=True)

In [6]:
# parse all dates
df['click_dt_utc'] = df['click_dt_utc'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
df['survey_submit_dt'] = df['survey_submit_dt'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
df['trace_data'] = df['trace_data'].apply(parse_trace_ts)
df = df[df['trace_data'].apply(len)>0]

In [7]:
# split trace into sessions
df['sessions'] = df['trace_data'].apply(sessionize)

In [8]:
# find session with the click
df = df.apply(get_click_session, axis=1)

In [9]:
df['trace_data'] = df['trace_data'].apply(trace_ts_to_str)

In [10]:
df.to_csv('../../data/responses_with_traces.tsv', sep = '\t', index = False, date_format = '%Y-%m-%d %H:%M:%S')

In [11]:
df2 =  load_responses_with_traces()

In [12]:
df2.head()

Unnamed: 0,survey_submit_dt,raw_information_depth,raw_prior_knowledge,raw_motivation,information depth,prior knowledge,motivation,host,geo_data,client_token,trace_data,ua_data,survey_token,click_dt_utc,click_title,sessions,click_session
0,2016-02-29 16:17:07,look up a specific fact or to get a quick answer.,I was already familiar with the topic.,the topic came up in a conversation.,fact,familiar,conversation,mobile,"{'city': 'São Paulo', 'latitude': '-23.5475', ...",102c3643440ee9685fd7c80c530f263ab2437550Mozill...,"[{'ts': 2016-03-01 00:14:59, 'lang': 'en', 'pr...","{'os_major': '5', 'os_family': 'Android', 'bro...",0707bbe2a7e3ecbb,2016-03-01 00:15:45,Leonardo_DiCaprio,"[[{'ts': '2016-03-01 00:14:59', 'referer_class...","[{'ts': '2016-03-01 00:14:59', 'referer_class'..."
1,2016-02-29 16:17:46,look up a specific fact or to get a quick answer.,I was not familiar with the topic and I am lea...,Writing story and need a specific fact,fact,unfamiliar,other,mobile,"{'city': 'Newport', 'latitude': '34.7766', 'co...",b7a409892837396f45d85fe51f53ab1a5a150bf3Mozill...,"[{'ts': 2016-03-01 00:15:13, 'lang': 'en', 'pr...","{'os_major': '9', 'os_family': 'iOS', 'browser...",c662db883c7d389c,2016-03-01 00:15:21,"Methoni,_Messenia","[[{'ts': '2016-03-01 00:15:13', 'referer_class...","[{'ts': '2016-03-01 00:15:13', 'referer_class'..."
2,2016-02-29 16:17:48,get an overview of the topic.,I was already familiar with the topic.,I am bored or randomly exploring Wikipedia for...,overview,familiar,bored/random,mobile,"{'city': 'Point Pleasant Beach', 'latitude': '...",8f12b97665f452ba83dc14b774d17dfd2f35207cMozill...,"[{'ts': 2016-03-01 00:00:47, 'lang': 'en', 'pr...","{'os_major': '9', 'os_family': 'iOS', 'browser...",0e7f539e27dcf10f,2016-03-01 00:16:55,Micropenis,"[[{'ts': '2016-03-01 00:00:47', 'referer_class...","[{'ts': '2016-03-01 00:00:47', 'referer_class'..."
3,2016-02-29 16:17:58,look up a specific fact or to get a quick answer.,I was not familiar with the topic and I am lea...,I am bored or randomly exploring Wikipedia for...,fact,unfamiliar,bored/random,desktop,"{'city': 'Woburn', 'latitude': '42.4897', 'cou...",c83237a436c2c62a8fab26d7b0bac7a5b48817f3Mozill...,"[{'ts': 2016-03-01 00:17:23, 'lang': 'en', 'pr...","{'os_major': '-', 'os_family': 'Windows 8.1', ...",8cb0d465ee08e2c6,2016-03-01 00:17:30,Moons_of_Uranus,"[[{'ts': '2016-03-01 00:17:23', 'referer_class...","[{'ts': '2016-03-01 00:17:23', 'referer_class'..."
4,2016-02-29 16:18:04,get an in-depth understanding of the topic.,I was already familiar with the topic.,I want to know more about a current event (e.g...,in-depth,familiar,current event|intrinsic learning,mobile,"{'city': 'Unknown', 'latitude': '53.55', 'coun...",733934449598ce695e955fcfd050f5d5c610ed12Mozill...,"[{'ts': 2016-03-01 00:16:18, 'lang': 'en', 'pr...","{'os_major': '8', 'os_family': 'iOS', 'browser...",42be8c727b522606,2016-03-01 00:16:33,Patrick_Maroon,"[[{'ts': '2016-03-01 00:16:18', 'referer_class...","[{'ts': '2016-03-01 00:16:18', 'referer_class'..."
