In [None]:
import pandas as pd
import numpy as np

from resample import bootstrap as res_bootstrap

from ast import literal_eval

import altair as alt
alt.renderers.enable('jupyterlab', embed_options={'renderer': 'svg'})
# Avoids writing all the data to the notebook or disk. 
# See https://altair-viz.github.io/user_guide/faq.html#local-data-server
# Note that this may not work on some cloud-based Jupyter notebook services.
alt.data_transformers.enable('data_server')

# Set Jupyter and Pandas to show 3 decimal places
%precision 3
pd.options.display.float_format = '{:,.3f}'.format

In [None]:
# Specify a consistent, useful theme for Altair
def theme_1(*args, **kwargs):
    labelFont = 'CMU Serif'
    labelFontSize = 12
    labelFontWeight = 'normal'
    markColor = '#82c6df'
    headerFont = labelFont
    headerFontWeight = 'bold'
    headerFontSize = 12
    titleFont = labelFont
    titleFontWeight = 'bold'
    titleFontSize = 14
    
    return {
        'config': {
            'background' : '#ffffff',
            
            'view': {
                'height': 450,
                'width': 700,
            },

            'title': {
                'anchor': 'center',
                'color': '#000000',
                'font': titleFont,
                'fontSize': titleFontSize,
                'fontWeight': titleFontWeight,
                'fontStyle': 'italic'
            },

            'arc': {'fill': markColor},
            'area': {'fill': markColor},
            'line': {
                'stroke': markColor, 
                'strokeWidth': 2
                    },
            'path': {'stroke': markColor},
            'rect': {'fill': markColor},
            'shape': {'stroke': markColor},
            'symbol': {
                'fill': markColor, 
                'size': 30
            },

            'axis': {
                'labelFont': labelFont,
                'labelFontSize': labelFontSize,
                'labelFontWeight': labelFontWeight,
                'titleFont': titleFont,
                'titleFontSize': titleFontSize,
                'titleFontWeight': titleFontWeight
            },

            'axisX': {
                #'labelAngle': 0,
                #'labelPadding': 4,
                'tickSize': 3,
                'titleFont': headerFont,
                'titleFontSize': headerFontSize,
                'titleFontWeight': headerFontWeight,
            },

            'axisY': {
                #'labelBaseline': 'middle',
                #'maxExtent': 45,
                #'minExtent': 45,
                'tickSize': 2,
                #'titleAlign': 'left',
                #'titleAngle': 0,
                #'titleX': -45,
                #'titleY': -11
                'titleFont': headerFont,
                'titleFontSize': headerFontSize,
                'titleFontWeight': headerFontWeight,
            },
            
            'header': {
                'labelFont': labelFont,
                'labelFontSize': labelFontSize,
                'labelFontWeight': labelFontWeight,
                'titleFont': headerFont,
                'titleFontSize': headerFontSize,
                'titleFontWeight': headerFontWeight,
            },           
            
            'legend': {
                'labelFont': labelFont,
                'labelFontSize': labelFontSize,
                #'symbolType': 'square',
                'titleFont': titleFont,
                'titleFontSize': titleFontSize,
                'titleFontWeight': titleFontWeight
            },

            'range': {
                # any color scheme from https://vega.github.io/vega/docs/schemes/#categorical 
                # in an object with scheme attribute. Alternatively an array of hex colors e.g. 
                # ['#ec8431', '#829eb1', '#c89d29', '#3580b1', '#adc839', '#ab7fb4']
                'category': {'scheme': 'tableau10'},
                'diverging': {'scheme': 'purpleorange'},
                'heatmap': {'scheme': 'blues'},
                'ordinal': {'scheme': 'blues'},
                'ramp': {'scheme': 'blues'},
            }
        }
    }
alt.themes.register('theme_1', theme_1)
alt.themes.enable('theme_1');

## One of these themes would work too
# alt.themes.enable('latimes')
# alt.themes.enable('default')


# Styling function for pandas dataframes that highlights values less than 0.05.
# To be used with stat_sign_df.style.applymap()
def color_df(val):
    significant = True if (val < 0.05) else False
    if significant:
        return 'background-color: yellow; font-weight: bold'
    else:
        return ""
    
# Bootstrap method, adapted from: http://www.jtrive.com/the-empirical-bootstrap-for-confidence-intervals-in-python.html
# This is a python closure function
def bootstrap(data, n=3000, func=np.mean):
    """
    Generate `n` bootstrap samples, evaluating `func`
    at each resampling. `bootstrap` returns a function,
    which can be called to obtain confidence intervals
    of interest.
    """
    simulations = list()
    sample_size = len(data)
    xbar_init = np.mean(data)
    for c in range(n):
        itersample = np.random.choice(data, size=sample_size, replace=True)
        simulations.append(func(itersample))
    simulations.sort()
    def ci(p):
        """
        Return 2-sided symmetric confidence interval specified
        by p. p is a percentage (i.e. 95%, 90% etc.)
        """
        u_pval = (1+p)/2.
        l_pval = (1-u_pval)
        l_indx = int(np.floor(n*l_pval))
        u_indx = int(np.floor(n*u_pval))
        return[simulations[l_indx],simulations[u_indx]]
    return(ci)

## Global Variables Setup

In [None]:
# A set of constant global variables used throughout the notebook
num_questions = 32
modes = ['SQL', 'RD']
mode_to_name = {0: 'SQL', 1: 'RD'}

## Analysis

In [None]:
# This should point to our full study data when it is available
filename = 'data/users-table-pilot.csv'

df = pd.read_csv(filename)
df['pattern_order']= df['pattern_order'].apply(literal_eval)# turn string to array
df = df.reset_index()
df

      elif (sequence_num + question_num) % 2 == 1:
            image_key_base = 'sqlpattern'
        elif (sequence_num + question_num) % 2 == 0:
            image_key_base = 'diagrampattern'
            
sequence_num = 0 means start SQL, 1 means start RD. Then, alternate.



As a within-subjects study, we first determined the per-participant difference between \diagrams and SQL.
We computed the (individual) difference between the median time spent on \diagrams and the median time on SQL.
We also computed the median time spent on each for the first and second halves of the study and the difference of the two for each mode.
The rate of improvement was calculated as $(1st - 2nd)/1st$.
We then found the difference for each participant between the rate for \diagrams and SQL.
We similarly computed the proportion of correct responses for each mode and their difference, as well as the proportion of correct responses for each mode on the first and second halves and the the difference of the two for each mode.

Then, across all participants, we calculated the median time difference and mean difference in proportion of correct responses.
We also computed the overall mean rate of improvement and mean difference in correct responses between the first and second halves.

In [None]:
def compute_per_user(user):
    incorrect_sql = []
    incorrect_rd = []  
    times_sql = []
    times_rd = []


    for i in range(0, num_questions):
        one_indexed_q_num = i + 1
        q_col = "q" + str(one_indexed_q_num)
        q_time_col = q_col + "_time"

        user_answer = user[q_col]
        user_time = user[q_time_col] / 1000 # ms to s
        answer = user['pattern_order'][i]

        incorrect_int = 1 if user_answer != answer else 0
        if (user['sequence_num'] + one_indexed_q_num) % 2 == 1:
            incorrect_sql.append(incorrect_int)
            times_sql.append(user_time)
        else:
            incorrect_rd.append(incorrect_int)
            times_rd.append(user_time)
            
    num_each_mode = int(num_questions / 2)
    halfway = int(num_each_mode / 2)
    
    incorrect_sql_proportion = np.sum(incorrect_sql) / num_each_mode
    incorrect_rd_proportion = np.sum(incorrect_rd) / num_each_mode
    diff_incorrect_proportion = incorrect_rd_proportion - incorrect_sql_proportion
    
    first_incorrect_sql_proportion = np.sum(incorrect_sql[0:halfway]) / (num_each_mode / 2)
    first_incorrect_rd_proportion = np.sum(incorrect_rd[0:halfway]) / (num_each_mode / 2)
    first_diff_incorrect_proportion = first_incorrect_rd_proportion - first_incorrect_sql_proportion
                                                    
    second_incorrect_sql_proportion = np.sum(incorrect_sql[halfway:num_questions]) / (num_each_mode / 2)
    second_incorrect_rd_proportion = np.sum(incorrect_rd[halfway:num_questions]) / (num_each_mode / 2)
    second_diff_incorrect_proportion = second_incorrect_rd_proportion - second_incorrect_sql_proportion

    median_sql_time = np.median(times_sql)
    median_rd_time = np.median(times_rd)
    diff_time = median_rd_time - median_sql_time
    
    first_median_sql_time = np.median(times_sql[0:halfway])
    first_median_rd_time = np.median(times_rd[0:halfway])
    first_diff_time = first_median_rd_time - first_median_sql_time
    
    second_median_sql_time = np.median(times_sql[halfway:num_questions])
    second_median_rd_time = np.median(times_rd[halfway:num_questions])
    second_diff_time = second_median_rd_time - second_median_sql_time
    
    time_improvement_sql = (first_median_sql_time - second_median_sql_time) / first_median_sql_time
    time_improvement_rd = (first_median_rd_time - second_median_rd_time) / first_median_rd_time
    diff_time_improvement = time_improvement_rd - time_improvement_sql
    
    incorrectness_improvement_sql = first_incorrect_sql_proportion - second_incorrect_sql_proportion
    incorrectness_improvement_rd = first_incorrect_rd_proportion - second_incorrect_rd_proportion
    diff_incorrectness_improvement = incorrectness_improvement_rd - incorrectness_improvement_sql
    
    return {
        'incorrect_sql_proportion' : incorrect_sql_proportion,
        'incorrect_rd_proportion' : incorrect_rd_proportion,
        'diff_incorrect_proportion' : diff_incorrect_proportion,        

        'first_incorrect_sql_proportion' : first_incorrect_sql_proportion,
        'first_incorrect_rd_proportion' : first_incorrect_rd_proportion,
        'first_diff_incorrect_proportion' : first_diff_incorrect_proportion,
        'second_incorrect_sql_proportion' : second_incorrect_sql_proportion,
        'second_incorrect_rd_proportion' : second_incorrect_rd_proportion,
        'second_diff_incorrect_proportion' : second_diff_incorrect_proportion,
        
        'median_sql_time' : median_sql_time,
        'median_rd_time' : median_rd_time,
        'diff_time' : diff_time,
        
        'first_median_sql_time' : first_median_sql_time,
        'first_median_rd_time' : first_median_rd_time,
        'first_diff_time' : first_diff_time,
        'second_median_sql_time' : second_median_sql_time,
        'second_median_rd_time' : second_median_rd_time,
        'second_diff_time' : second_diff_time,
        
        'time_improvement_sql' : time_improvement_sql,
        'time_improvement_rd' : time_improvement_rd,
        'diff_time_improvement' : diff_time_improvement,
        'incorrectness_improvement_sql' : incorrectness_improvement_sql,
        'incorrectness_improvement_rd' : incorrectness_improvement_rd,
        'diff_incorrectness_improvement' : diff_incorrectness_improvement
    }

In [None]:
per_participant_df = df.apply(lambda row: compute_per_user(row), axis='columns', result_type='expand')
per_participant_df = pd.concat([df, per_participant_df], axis='columns')
per_participant_df

In [None]:
np.median(per_participant_df['diff_time'])

# alt.Chart(per_participant_df['diff_time']).mark_point().encode(
#     x = 'median(diff_time:Q)'
# )

In [None]:
np.median(per_participant_df['diff_incorrect_proportion'])

In [None]:
alt_data = per_participant_df[['diff_time', 'diff_incorrect_proportion']]

In [None]:
alt.Chart(alt_data).mark_tick().encode(
    x='diff_time:Q'
)

In [None]:
alt.Chart(alt_data).mark_tick().encode(
    x='diff_incorrect_proportion:Q'
)

In [None]:
alt.Chart(alt_data).transform_density(
    'diff_time',
    as_=['diff_time', 'density'],
    extent=[np.min(alt_data['diff_time']), np.max(alt_data['diff_time'])]
).mark_area(orient='vertical').encode(
    x=alt.X(
        'diff_time:Q',
        title=None
    ),
    y=alt.Y(
        'density:Q',
        stack='center',
        impute=None,
        title=None,
        axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True),
    )
).properties(
    width=400,
    height=100,
    title='Δ median time per question (Diagrams − SQL)'
).configure_facet(
    spacing=0
).configure_view(
    stroke=None
)

In [None]:
alt.Chart(alt_data).transform_density(
    'diff_incorrect_proportion',
    as_=['diff_incorrect_proportion', 'density'],
    extent=[np.min(alt_data['diff_incorrect_proportion']), np.max(alt_data['diff_incorrect_proportion'])]
).mark_area(orient='vertical').encode(
    x=alt.X(
        'diff_incorrect_proportion:Q',
        title=None,
    ),
    y=alt.Y(
        'density:Q',
        stack='center',
        impute=None,
        title=None,
        axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True),
    )
).properties(
    width=400,
    height=60,
    title='Δ percent errors (Diagrams − SQL)'
).configure_facet(
    spacing=0
).configure_view(
    stroke=None
)


In [None]:
# Funtions used to calculate the confidence intervals and construct the graphs

# Returns as a dataframe the differrence of QV and Both modes from SQL together with their bca confidence intervals for a given metric.
def get_conf_interval_bca(values_per_mode, mode, column_list):
    
    values_per_with_bca_conf_intervals = {'value': [], 'mode': column_list, 'conf_interval_delta_from_value': []}
    
    if mode == 'median':
        function = np.median
    elif mode == 'gmean':
        function = stats.gmean
    elif mode == 'mean':
        function = np.mean

    for mode in column_list:
        values_per_worker = values_per_mode[mode].values
        value = function(values_per_worker)
        values_per_with_bca_conf_intervals['value'].append(value)
        value_conf_interval = res_bootstrap.bootstrap_ci(values_per_worker, f=function, b=10000, ci_method="bca")

        # calculate the distance between the summary statistic and the lower and upper bounds of the confidence intervals
        left_delta = abs(value - value_conf_interval[0])
        right_delta = abs(value_conf_interval[1] - value)
        values_per_with_bca_conf_intervals['conf_interval_delta_from_value'].append((left_delta, right_delta))

    return pd.DataFrame(values_per_with_bca_conf_intervals)

# Returns an altair graph for the percentage differences + confidence interval graph
def get_conf_interval_graph(time_per_mode_with_bca_intervals, axis_domain, percent=False):
    axis_limits = alt.Scale(domain=axis_domain,zero=False)
        
    if percent:
        text_encoding = alt.Text('value:Q', format=".2%")
        x_encoding = alt.X('value:Q', title=None, scale=axis_limits, axis=alt.Axis(format='%'))
    else:
        text_encoding = alt.Text('value:Q', format=".2f")
        x_encoding = alt.X('value:Q', title=None, scale=axis_limits)
        
    points = alt.Chart().transform_calculate(
        percent_diff=alt.datum.percent_diff
    ).mark_point(
        filled=True,
        color='black'
    ).encode(
        x=x_encoding,
        color=alt.Color('mode:N', legend=None),
    )

    # generate the error bars
    errorbars = points.mark_rule(size=1).encode(
        x='xmin:Q',
        x2='xmax:Q',
        color=alt.Color('mode:N', legend=None)
    ).transform_calculate(
        xmin='datum.value-datum.conf_interval_delta_from_value[0]',
        xmax='datum.value+datum.conf_interval_delta_from_value[1]'
    )

    text = points.mark_text(
        align='center',
        baseline='middle',
        dy = -7,
        dx = 0,
        fontSize=8
    ).encode(
        text=text_encoding
    )

    graph = alt.layer(points, errorbars, text).facet(
        data=time_per_mode_with_bca_intervals,
        row=alt.Row(
            'mode:N',
            header=alt.Header(
    #             titleOrient='bottom',
    #             labelOrient='bottom',
    #             labelPadding=0,
                labelAngle=0,
                titlePadding=0,
                title=None
            ),
         sort=['SQL', 'QV', 'Both']
        ),
    ).configure_facet(
        spacing=0,
    ).configure_view(
        stroke=None,
        width=200,
    ).properties(
    #     title='Mean time per mode',
    ).configure_axis(
        gridOpacity=0.7
    #     orient = "top",
    )

    return graph

In [None]:
median_time_per_mode_with_bca_conf_intervals = get_conf_interval_bca(mean_time_per_mode, 'median', modes)

In [None]:
modes