In [142]:
import csv
import random
from collections import defaultdict
import matplotlib.pyplot as plt

import ipywidgets as widgets
from ipywidgets import interact

In [120]:
condition_names = ['equal', 'proportional', 'shapley', 'unfair']
condition_colours = {'equal': '#990000','proportional': '#3333BB', 'shapley': '#009900', 'unfair': '#999933'}
errs = ['err_sh', 'err_sym', 'err_eff', 'err_mrg', 'err_null', 'err_add']

def avg(lst):
    return sum(lst) / len(lst)

def jitter(lst, magnitude):
    return [x + random.uniform(-magnitude/2, magnitude/2) for x in lst]

# Load Data

In [121]:
with open('error_data.csv') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    error_data = [row for row in csv_reader]
    
print("Loaded %d rows" % len(error_data))

data = defaultdict(list)
for row in error_data:
    obj = {err: float(row[err]) for err in errs}
    obj['rating'] = int(row['rating'])
    data[row['condition']].append(obj)
print(data['shapley'][0])

Loaded 1848 rows
{'err_sh': 0.5, 'err_sym': 0.0, 'err_eff': 0.33333333333333365, 'err_mrg': 0.16666666666666624, 'err_null': 0.0, 'err_add': 0.16666666666666624, 'rating': 3}


# Summary

In [124]:
for condition_name in condition_names:
    avgs = {}
    for err in errs:
        avgs[err] = avg([d[err] for d in data[condition_name]])
        
    print(condition_name)
    print("err_sh^2  : %5.2f" % avgs['err_sh'])
    print("err_sym^2 : %5.2f (%5.2f%%)" % (avgs['err_sym'], avgs['err_sym'] / avgs['err_sh'] * 100))
    print("err_eff^2 : %5.2f (%5.2f%%)" % (avgs['err_eff'], avgs['err_eff'] / avgs['err_sh'] * 100))
    print("err_mrg^2 : %5.2f (%5.2f%%)" % (avgs['err_mrg'], avgs['err_mrg'] / avgs['err_sh'] * 100))
    print("err_null^2: %5.2f (%5.2f%%)" % (avgs['err_null'], avgs['err_null'] / avgs['err_sh'] * 100))
    print("err_add^2 : %5.2f (%5.2f%%)" % (avgs['err_add'], avgs['err_add'] / avgs['err_sh'] * 100))
    print()

equal
err_sh^2  : 26.57
err_sym^2 :  0.00 ( 0.00%)
err_eff^2 :  0.67 ( 2.51%)
err_mrg^2 : 25.90 (97.49%)
err_null^2:  1.29 ( 4.87%)
err_add^2 : 24.61 (92.62%)

proportional
err_sh^2  :  6.25
err_sym^2 :  0.34 ( 5.41%)
err_eff^2 :  0.81 (12.92%)
err_mrg^2 :  5.11 (81.67%)
err_null^2:  0.38 ( 6.02%)
err_add^2 :  4.73 (75.64%)

shapley
err_sh^2  :  0.65
err_sym^2 :  0.00 ( 0.00%)
err_eff^2 :  0.53 (82.28%)
err_mrg^2 :  0.11 (17.72%)
err_null^2:  0.00 ( 0.17%)
err_add^2 :  0.11 (17.55%)

unfair
err_sh^2  : 76.21
err_sym^2 :  3.55 ( 4.65%)
err_eff^2 :  0.41 ( 0.54%)
err_mrg^2 : 72.25 (94.81%)
err_null^2:  4.63 ( 6.07%)
err_add^2 : 67.62 (88.74%)



# Interactive Plots

## Histogram

In [132]:
def makeHistograms(var, autoscale_x=True, x_max=100, autoscale_y=True, y_max=100):
    plt.figure(figsize=(10, 4))
    for (i, condition_name) in enumerate(condition_names):
        plt.subplot(1, 4, i+1)
        plt_data = [d[var] for d in data[condition_name]]
        plt.hist(plt_data, color=condition_colours[condition_name], rwidth=0.8)

        if not autoscale_x:
            plt.xlim(-x_max/10, x_max)
        if not autoscale_y:
            plt.ylim(0, y_max)
        plt.title(condition_name, size=10)
    plt.tight_layout()
    plt.show()

In [141]:
var_options = ['err_sh', 'err_sym', 'err_eff', 'err_mrg', 'err_null', 'err_add', 'rating']

interact(
    makeHistograms, 
    var=var_options,
    autoscale_x=True,
    x_max=widgets.FloatLogSlider(value=10, base=10, min=0, max=2.6, step=0.1),
    autoscale_y=True,
    y_max=widgets.FloatLogSlider(value=10, base=10, min=0, max=2.9, step=0.1),
);

interactive(children=(Dropdown(description='var', options=('err_sh', 'err_sym', 'err_eff', 'err_mrg', 'err_nul…

## Scatter Plot

In [127]:
def makeScatterPlots(x_var, y_var, autoscale_x=True, x_max=100, autoscale_y=True, y_max=100, jitter_y=False):
    plt.figure(figsize=(10, 4))
    for (i, condition_name) in enumerate(condition_names):
        plt.subplot(1, 4, i+1)
        plt_x = [d[x_var] for d in data[condition_name]]
        plt_y = [d[y_var] for d in data[condition_name]]
        if jitter_y:
            plt_y = jitter(plt_y, 0.2)
        plt.scatter(plt_x, plt_y, color=condition_colours[condition_name], s=8)

        if not autoscale_x:
            plt.xlim(-x_max / 10, x_max)
        if not autoscale_y:
            plt.ylim(-y_max / 10, y_max)
        plt.title(condition_name, size=10)

    plt.tight_layout()
    plt.show()

In [138]:
var_options = ['err_sh', 'err_sym', 'err_eff', 'err_mrg', 'err_null', 'err_add', 'rating']

interact(
    makeScatterPlots, 
    x_var=var_options,
    y_var=widgets.Dropdown(options=var_options, value='rating'),
    autoscale_x=True,
    x_max=widgets.FloatLogSlider(value=10, base=10, min=0, max=2.6, step=0.1),
    autoscale_y=True,
    y_max=widgets.FloatLogSlider(value=10, base=10, min=0, max=2.6, step=0.1),
    y_jitter=False
);

interactive(children=(Dropdown(description='x_var', options=('err_sh', 'err_sym', 'err_eff', 'err_mrg', 'err_n…