In [106]:
import pandas as pd
import numpy as np
import json
import ast

%matplotlib inline

In [107]:
follow_up_ques = {
    'fu_ques_0':'Do you use ad blocking software on your computer?',
    'fu_ques_1':'How relevant was the ad to you?',
    'fu_ques_2':'Do you generally find that ads are relevant to you?',
    'fu_ques_3':'How important is it to you that ads help you find products you are interested in?',
    'fu_ques_4':'Did the ad you saw help you find a product you are interested in?',
    'fu_ques_5':'Do you think this ad technology can reliably help you find products you are interested in?',
    'fu_ques_6':'Do you have a smart phone?',
    'fu_ques_7':'How often do you make purchases using your mobile phone?',
    'fu_ques_8':'How often do you make purchases using your computer?'
}

profile_ques = {
    'pro_ques_0':'What is your age?',
    'pro_ques_1':'What is your gender?',
    'pro_ques_2':'What type of environment do you most identify with?'
}

In [108]:
rawData = open('surveyResponses.json').read().split('\n')[:-1]
surveyResponses = [ json.loads(res) for res in rawData ]

### Process Mouse Movement

In [109]:
def calcDist(x, y):
    #assert len(x) == len(y), 'Length of mouse movements must be equal'
    if len(x) != len(y):
        return np.nan
    
    dist = 0
    for i in range(2, len(x)):
        dist += np.sqrt((x[i] - x[i-1])**2 + (y[i] - y[i-1])**2)
        
    return dist

In [110]:
for response in surveyResponses:
    if 'mouse_x_pos' in response.keys():
        response['mouse_x_pos'] = [float(num) for num in response['mouse_x_pos'].split(',')]
        response['mouse_y_pos'] = [float(num) for num in response['mouse_y_pos'].split(',')]
        response['dist'] = calcDist(response['mouse_x_pos'], response['mouse_y_pos'])
    else:
        response['dist'] = np.nan

### Process Responses

In [112]:
def processQuestions(response):
    ad_cats = ast.literal_eval(response['ad_cats'])
    products = ast.literal_eval(response['products'])
    follow_ups = response['follow_up'].strip('{}').split(',')[:-1]
    follow_ups = dict([('fu_'+ques).split(':') for ques in follow_ups])
    for key in follow_ups:
        if ' - ' in follow_ups[key]:
            follow_ups[key] = follow_ups[key].split(' - ')[0]
    profile = response['profile'].strip('{}').split(',')[:-1]
    profile = dict([('pro_'+ques).split(':') for ques in profile])
    
    return {**ad_cats, **products,  **follow_ups, **profile}

In [113]:
keysToKeep = ['date', 'ad', 'ad_type', 'primed', 'browser_size', 'dist', 'result']
otherKeys = list(processQuestions(surveyResponses[90]).keys())
otherKeys.sort()

In [114]:
flattened = {}

for res in surveyResponses:
    if 'profile' in res.keys():
    
        resID = res['__key__']['id']
        resData = {}

        for key in keysToKeep:
            try:
                resData[key] = res[key]
            except KeyError:
                resData[key] = 'NA'

        ques = processQuestions(res)

        flattened[resID] = {**resData, **ques}
    

In [115]:
data = pd.DataFrame.from_dict(flattened, orient='index')[keysToKeep + otherKeys]

origCols = list(data.columns)

for i in range(len(origCols)):
    if origCols[i] in follow_up_ques.keys():
        origCols[i] = follow_up_ques[origCols[i]]
    elif origCols[i] in profile_ques.keys():
        origCols[i] = profile_ques[origCols[i]]
        
data.columns = origCols

In [116]:
data.to_csv('surveyResponses.csv')

In [27]:
data.columns

Index(['date', 'ad', 'ad_type', 'primed', 'browser_size', 'dist', 'Air Travel',
       'Eyewear', 'Food and Drinks', 'Headphones', 'Men's Fashion',
       'None of the above', 'None of these products', 'Soda', 'TV Shows',
       'Women's Fashion', 'Do you use ad blocking software on your computer?',
       'How relevant was the ad to you?',
       'Do you generally find that ads are relevant to you?',
       'How important is it to you that ads help you find products you are interested in?',
       'Did the ad you saw help you find a product you are interested in?',
       'Do you think this ad technology can reliably help you find products you are interested in?',
       'Do you have a smart phone?',
       'How often do you make purchases using your mobile phone?',
       'How often do you make purchases using your computer?',
       'What is your age?', 'What is your gender?',
       'What type of environment do you most identify with?'],
      dtype='object')

## Summarize Data

In [120]:
data['val'] = 1

#### CTR

In [146]:
click = []

for i in data.index:
    if data.loc[i, 'result'] == 'skip':
        click.append(False)
    else:
        click.append(True)

data['click'] = click

dataCounts = data[['ad_type', 'val']].groupby(['ad_type']).count()
ctrByAdType = data[['ad_type','result', 'val']].groupby(['ad_type', 'result']).count()/dataCounts

In [147]:
ctrByAdType

Unnamed: 0_level_0,Unnamed: 1_level_0,val
ad_type,result,Unnamed: 2_level_1
new,fashion,0.018868
new,main_ad,0.232704
new,skip,0.716981
new,soda,0.031447
old,main_ad,0.243243
old,skip,0.756757


In [141]:
dataCounts

Unnamed: 0_level_0,Unnamed: 1_level_0,val
ad,ad_type,Unnamed: 2_level_1
Air Travel,new,47
Air Travel,old,25
Food and Drinks,new,46
Food and Drinks,old,26
TV Shows,new,66
TV Shows,old,23


In [130]:
ctrByAdType

Unnamed: 0_level_0,Unnamed: 1_level_0,val
ad_type,click,Unnamed: 2_level_1
new,False,114
new,True,45
old,False,56
old,True,18


#### Quadrant Summaries

In [45]:
imp_eff_cols = ['How important is it to you that ads help you find products you are interested in?',
        'Did the ad you saw help you find a product you are interested in?']
data['Imp-Eff Grp'] = data[imp_eff_cols[0]].astype(str) + data[imp_eff_cols[1]].astype(str)

In [44]:
imp_bel_cols = ['How important is it to you that ads help you find products you are interested in?',
        'Do you think this ad technology can reliably help you find products you are interested in?']
data['Imp-Bel Grp'] = data[imp_bel_cols[0]].astype(str) + data[imp_bel_cols[1]].astype(str)

In [99]:
distGrp = []

for i in data.index:
    if (data.loc[i,'ad_type'] == 'new') and data.loc[i,'primed']:
        distGrp.append(1)
    elif (data.loc[i,'ad_type'] == 'old') and (not data.loc[i,'primed']):
        distGrp.append(1)
    else:
        distGrp.append(0)
        
data['Dist Grp'] = distGrp

In [102]:
dataForScatter = data.loc[data['Dist Grp'] == 1]
dataCounts = dataForScatter[['ad_type', 'val']].groupby('ad_type').count()
grouping = data[imp_eff_cols + ['ad_type', 'val']].groupby(imp_eff_cols + ['ad_type']).count()
fracGrouping = grouping/dataCounts
fracGrouping.reset_index(inplace=True)
fracGrouping['Imp-Eff Grp'] = fracGrouping[imp_eff_cols[0]].astype(str) + fracGrouping[imp_eff_cols[1]].astype(str)

In [103]:
fracGrouping.to_csv('impEffResults.csv')

In [104]:
dataForScatter = data.loc[data['Dist Grp'] == 1]
dataCounts = dataForScatter[['ad_type', 'val']].groupby('ad_type').count()
grouping = data[imp_bel_cols + ['ad_type', 'val']].groupby(imp_bel_cols + ['ad_type']).count()
fracGrouping = grouping/dataCounts
fracGrouping.reset_index(inplace=True)
fracGrouping['Imp-Bel Grp'] = fracGrouping[imp_bel_cols[0]].astype(str) + fracGrouping[imp_bel_cols[1]].astype(str)
fracGrouping.to_csv('impBelResults.csv')

In [98]:
dataCounts

Unnamed: 0_level_0,val
ad_type,Unnamed: 1_level_1
new,77
old,74
