# Imports and Constants

In [1]:
import glob
import json
import os

import pandas as pd

# Load Data

In [2]:
control_dfs = []
i = 1
print("loading control logs ...")
for f in glob.glob(os.path.join("formative 2", "user data", "Control", "*", "logs*")):
    print(i)
    df = pd.read_csv(f, sep='\t')
    input_data = df['input_data'].map(eval).apply(pd.Series)
    data = input_data['data'].apply(pd.Series)
    control_dfs.append(pd.concat([input_data.drop(columns=['data']), data], axis=1))
    i += 1

print()

mitigation_dfs = []
i = 1
print("loading mitigation logs ...")
for f in glob.glob(os.path.join("formative 2", "user data", "Mitigation", "*", "logs*")):
    print(i)
    df = pd.read_csv(f, sep='\t')
    input_data = df['input_data'].map(eval).apply(pd.Series)
    data = input_data['data'].apply(pd.Series)
    mitigation_dfs.append(pd.concat([input_data.drop(columns=['data']), data], axis=1))
    i += 1

control_df = pd.concat(control_dfs)
mitigation_df = pd.concat(mitigation_dfs)
dataset_df = pd.concat([control_df, mitigation_df])
politicians_df = pd.read_csv(os.path.join("datasets", "dataset_formative2.csv"))

dataset_df.to_csv(os.path.join("formative 2", "all_interactions.csv"), index=False)
dataset_df

loading control logs ...
1
2
3
4
5
6
7
8
9
10
11
12

loading mitigation logs ...
1
2
3
4
5
6
7
8
9
10
11
12


Unnamed: 0,appMode,appType,appPhase,interactionType,interactionDuration,interactionAt,participantId,axisChanged,x,y,size,eventX,eventY,id,attribute,value
0,live,CONTROL,phase_1,axes_attribute_changed,0,1566828722650,0YhSRQx5Cdp5,y_axis,age,policy_strength_legalize_medical_marijuana,,333.5,278.0,,,
1,live,CONTROL,phase_1,axes_attribute_changed,0,1566828736150,0YhSRQx5Cdp5,y_axis,age,policy_strength_ban_abortion_after_6_weeks,,333.5,278.0,,,
2,live,CONTROL,phase_1,axes_attribute_changed,0,1566828746056,0YhSRQx5Cdp5,x_axis,policy_strength_ban_abortion_after_6_weeks,policy_strength_ban_abortion_after_6_weeks,,805.0,508.5,,,
3,live,CONTROL,phase_1,axes_attribute_changed,0,1566828748639,0YhSRQx5Cdp5,y_axis,policy_strength_ban_abortion_after_6_weeks,age,,333.5,278.0,,,
4,live,CONTROL,phase_1,axes_attribute_changed,0,1566828759811,0YhSRQx5Cdp5,y_axis,policy_strength_ban_abortion_after_6_weeks,policy_strength_ban_abortion_after_6_weeks,,333.5,278.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
353,live,MITIGATION,phase_2,mouseover,254,1566240537762,xGba4x11AgqF,,{'name': 'policy_strength_budget_for_free_scho...,{'name': 'policy_strength_ban_abortion_after_6...,,798.0,148.0,p100,,
354,live,MITIGATION,phase_2,add_to_list_via_scatterplot_click,2981,1566240540489,xGba4x11AgqF,,{'name': 'policy_strength_budget_for_free_scho...,{'name': 'policy_strength_ban_abortion_after_6...,,804.0,148.0,p100,,
355,live,MITIGATION,phase_2,mouseout,3200,1566240540708,xGba4x11AgqF,,{'name': 'policy_strength_budget_for_free_scho...,{'name': 'policy_strength_ban_abortion_after_6...,,823.0,162.0,p100,,
356,live,MITIGATION,phase_2,mouseover_from_list,252,1566240541928,xGba4x11AgqF,,{'name': 'policy_strength_budget_for_free_scho...,{'name': 'policy_strength_ban_abortion_after_6...,,1472.0,640.5,p60,,


# Process Data

In [3]:
#
# Interaction Types
# : add_to_list_via_card_click
# : add_to_list_via_scatterplot_click
# : axes_attribute_changed
# : change_attribute_distribution
# : change_attribute_distribution_results
# : filter_changed
# : mouseout
# : mouseout_from_list
# : mouseover
# : mouseover_from_list
# : remove_from_list_via_card_click
# : remove_from_list_via_list_item_click
#

interactions = [
    "add_to_list_via_card_click",
    "add_to_list_via_scatterplot_click",
    "mouseover",
    "mouseover_from_list",
    "remove_from_list_via_card_click",
    "remove_from_list_via_list_item_click"
]


# bin observations by age
bins = pd.IntervalIndex.from_tuples([
    (30, 35),
    (35, 40),
    (40, 45),
    (45, 50),
    (50, 55),
    (55, 60),
    (60, 65),
    (65, 70),
    (70, 75),
    (75, 80),
    (80, 85),
    (85, 90),
])
ages = politicians_df[['id', 'age']]
binned = pd.cut(ages['age'].tolist(), bins, right=True, include_lowest=True)
ages_binned = pd.concat([ages, pd.Series(binned)], axis=1).rename(columns={0: 'age_binned'})


# keep only data point interactions (mouseover, etc.)
# join `dataset_df` and `politicians_df` on `id` column
# join binned ages with merged dataset
columns_to_keep = ['appType', 'participantId', 'interactionType', 'id', 'age', 'gender', 'party']
filtered_df = dataset_df[dataset_df['interactionType'].isin(interactions)].reset_index(drop=True)
merged_df = filtered_df.merge(politicians_df, how='left', on='id')[columns_to_keep]
final_df = merged_df.merge(ages_binned[['id', 'age_binned']], how='left', on='id')

final_df.to_csv(os.path.join("formative 2", "interactions_with_politicians.csv"), index=False)


# get aggregated counts from each bin (distribution, control, mitigation)
ages_binned_distribution = (
    ages_binned['age_binned'].value_counts()
    .reset_index()
    .rename(columns={'index':'age', 'age_binned':'distribution'})
)
control_interaction_distribution = (
    final_df[final_df['appType'] == 'CONTROL']['age_binned'].value_counts()
        .reset_index()
        .rename(columns={'index':'age', 'age_binned':'control'})
)
mitigation_interaction_distribution = (
    final_df[final_df['appType'] == 'MITIGATION']['age_binned'].value_counts()
        .reset_index()
        .rename(columns={'index':'age', 'age_binned':'mitigation'})
)
total_interaction_distribution = (
    final_df['age_binned'].value_counts()
        .reset_index()
        .rename(columns={'index':'age', 'age_binned':'total'})
)


# merge together into final counts
dfs = [
    ages_binned_distribution.set_index('age'),
    control_interaction_distribution.set_index('age'),
    mitigation_interaction_distribution.set_index('age'),
    total_interaction_distribution.set_index('age'),
]
df = pd.concat(dfs, axis=1).T
df.to_csv(os.path.join("formative 2", "interaction_distribution_by_age.csv"), index_label='age')

df

age,"(30, 35]","(35, 40]","(40, 45]","(45, 50]","(50, 55]","(55, 60]","(60, 65]","(65, 70]","(70, 75]","(75, 80]","(80, 85]","(85, 90]"
distribution,0,4,7,10,19,24,22,4,5,5,0,0
control,0,182,250,264,288,260,605,88,62,58,0,0
mitigation,0,148,160,210,300,443,652,46,52,59,0,0
total,0,330,410,474,588,703,1257,134,114,117,0,0
