In [None]:
import analysis
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
import scipy.stats as ss

# id, sessionConfig, sessionResp, finalQuestions, summary, survey
batch_1 = [w for w in analysis.readWorkers('search/Batch_full_between_0.csv', 'search')]
batch_2 = [w for w in analysis.readWorkers('search/Batch_full_between_1.csv', 'search')]
batch_3 = [w for w in analysis.readWorkers('search/Batch_full_between_2.csv', 'search')]
batch_4 = [w for w in analysis.readWorkers('search/Batch_full_between_3.csv', 'search')]
batch_5 = [w for w in analysis.readWorkers('search/Batch_full_between_4.csv', 'search')]
raw_workers = batch_1 + batch_2 + batch_3 + batch_4 + batch_5
workers = analysis.filter(raw_workers, 'search')
print(f'There are {len(workers)} workers who passed quality check out of {len(raw_workers)}')
ids = [id for id, _, _, _, _, _ in workers]
# Assert set uniqueness
print(f'{len(set(ids))} unique workers')
group = [s for id, _, _, fq, s, _ in workers if s['']['perf-pattern'] == 'alt,alt,full,full']
print(f'{len(group)}')

In [None]:
def pivotAndAggregate(df, values, index):
    renameMap = {}
    if isinstance(values, list):
        for name in values:
            renameMap[name] = name + '_err'
    else:
        renameMap[values] = values + '_err'
    df_p = df.pivot_table(values=values, aggfunc='mean', index=index)
    df_pe = df.pivot_table(values=values, aggfunc=ss.sem, index=index).rename(columns=renameMap)
    return df_p.join(df_pe).reset_index()

# Freeform Answers

In [None]:
explanations = sorted([(s['']['perf-pattern'],fq['explanation']) for id, _, _, fq, s, _ in workers])
for cond, text in explanations:
    print(f'{cond}: {text}')

# Performance over sessions

In [None]:
f1_scores = [(id, e, s['']['perf-pattern'], analysis.f1(s[e]), s[e]['read'] / s[e]['total'])  for e in analysis.EXPERIMENTS for id, cfg, rsp, q, s, sr in workers]
df_f1 = pd.DataFrame(f1_scores, columns=('id', 'exp', 'cond', 'f1', 'read'))
df_f1_agg = pivotAndAggregate(df_f1, values=['f1', 'read'], index=['exp', 'cond'])
fig = px.line(df_f1, x='exp', y='f1', color='id', range_y = [0, 1.1], facet_col = 'cond')
fig.show()
fig = px.line(df_f1_agg, x='exp', y='f1', range_y = [0, 1.1], color = 'cond', error_y = 'f1_err')
for d in fig.data:
    d.update(mode='markers+lines')
fig.show()
df_f1_agg

In [None]:
fig = px.line(df_f1, x='exp', y='read', color='id', range_y = [0, 1.1], facet_col = 'cond')
fig.show()

fig = px.line(df_f1_agg, x='exp', y='read', range_y = [0, 0.2], color = 'cond', error_y = 'read_err')
for d in fig.data:
    d.update(mode='markers+lines')
fig.show()

# Overall Responses

In [None]:
overall_questions = ['preference', 'understand']
overall = [(id, q, s['']['perf-pattern'], int(fq[q])) 
                for q in overall_questions
                for id, _, _, fq, s, _ in workers]
df_overall = pd.DataFrame(overall, columns=('id', 'question', 'cond', 'answer'))
fig = px.box(df_overall, y='cond', x='answer', points="all", facet_col='question', range_x =[0, 8])
fig.show()

# Survey Responses

In [None]:
questions = ["confidence", "effort", "utility", "mentalmodel", "trust", "stickiness"]

surveys = [(id, e, q, s['']['perf-pattern'], int(sr[e][q]) if q in sr[e] else None) 
               for q in questions 
               for e in analysis.EXPERIMENTS[:]
               for id, _, _, _, s, sr in workers]
df_survey = pd.DataFrame(surveys, columns=('id', 'exp', 'question', 'cond', 'answer'))
df_survey_agg = pivotAndAggregate(df_survey, 'answer', ['exp', 'question', 'cond'])
fig = px.line(df_survey_agg, x='exp', y='answer', facet_col='question', color='cond', range_y=[1, 7], error_y = 'answer_err')
for d in fig.data:
    d.update(mode='markers+lines')
#fig = px.violin(df_survey, x='exp', y='answer', facet_col='question', color='cond')
#fig = px.box(df_survey, x='exp', y='answer', facet_col='question', color='cond', points='all')
fig.show()
df_survey_agg[df_survey_agg['cond'] == 'alt,alt,full,full']

# Time and effort on task

In [None]:
time = [(id, e, s['']['perf-pattern'], s[e]['log']['taskTime'], s[e]['log']['view']) for e in analysis.EXPERIMENTS for id, cfg, rsp, q, s, sr in workers]
df_time = pd.DataFrame(time, columns=('id', 'exp', 'cond', 'time', 'view'))
df_time_agg =pivotAndAggregate(df_time, values=['time', 'view'], index=['exp', 'cond'])
fig = px.line(df_time, x='exp', y='time', color='id', facet_col='cond')
fig.show()
fig = px.line(df_time_agg, x='exp', y='time', color='cond', error_y = 'time_err')
for d in fig.data:
    d.update(mode='markers+lines')
fig.show()

In [None]:
fig = px.line(df_time, x='exp', y='view', color='id', facet_col='cond')
fig.show()
fig = px.line(df_time_agg, x='exp', y='view', color='cond', error_y = 'view_err')
for d in fig.data:
    d.update(mode='markers+lines')
fig.show()

In [None]:
time = [(id, e, s['']['perf-pattern'], s[e]['uptake'], s[e]['corrections']) for e in analysis.EXPERIMENTS for id, cfg, rsp, q, s, sr in workers]
df_uptake = pd.DataFrame(time, columns=('id', 'exp', 'cond', 'uptake', 'corrections'))
fig = px.box(df_uptake, x='exp', y='uptake', facet_col='cond')
fig.show()

In [None]:
df_uptake_agg = pivotAndAggregate(df_uptake, values=['uptake', 'corrections'], index=['exp', 'cond'])
fig = px.line(df_uptake_agg, x='exp', y='uptake', color='cond', error_y = 'uptake_err')
for d in fig.data:
    d.update(mode='markers+lines')
fig.show()

In [None]:
fig = px.bar(df_uptake, x='exp', y='corrections', color='id', facet_col='cond')
fig.show()

In [None]:
fig = px.line(df_uptake_agg, x='exp', y='corrections', color='cond', error_y = 'corrections_err')
for d in fig.data:
    d.update(mode='markers+lines')
fig.show()