In [None]:
# Imports
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as sts
import seaborn as sns

from lib.utility_functions import *
from lib.exp4 import *

# Config
sns.set_style('white')
sns.set_context('poster')

pd.set_option('display.max_columns', 40)

% matplotlib inline

In [None]:
# Read in tidy dataframe (produced in 4 Data Analysis and Exploration)
tidy = pd.read_csv('./tidy_data.csv', index_col=0)
tidy.head()

In [None]:
freq_table = tidy.pivot_table(
    index='Condition', 
    values=['Type I Errors', 'Type II Errors', 'Type III Errors'], 
    aggfunc=np.sum
)

freq_table

In [None]:
prob_table = freq_table.copy()
prob_table.loc['Naive'] = freq_table.loc['Naive'] / freq_table.loc['Naive'].sum()
prob_table.loc['Trained'] = freq_table.loc['Trained'] / freq_table.loc['Trained'].sum()

In [None]:
prob_table

p(e | c=naive) = p(e | c=trained) = p(e) # null hypothesis for chisq


In [None]:
chisq, p, dof, ex = sts.chi2_contingency(freq_table)
print('Chi-square test result: {:.2f}, p={:.4f}'.format(chisq, p))

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(16, 9), dpi=300)

axes.bar(np.arange(3) - .175, prob_table.loc['Naive'], width=.3, edgecolor='white', label='Untrained')
axes.bar(np.arange(3) + .175, prob_table.loc['Trained'], width=.3, edgecolor='white', label='Trained')
axes.legend(loc=0)

plt.setp(
    axes, 
    ylabel='Error Probability', 
    xlabel='Error Type', xticks=[0, 1, 2], xticklabels=['Type I', 'Type II', 'Type III']
)

sns.despine()

In [None]:
# Make tidier data on error types and check out figures
#    (for own edification)
melted = pd.melt(
    tidy, 
    id_vars=['Condition'],
    value_vars=['Type I Errors', 'Type II Errors', 'Type III Errors'], 
    var_name='Error Type', value_name='Num Errors'
)

melted['Num Errors Dummy'] = melted['Num Errors']
melted.head()

In [None]:
x = tidy['Nume']

In [None]:


g = sns.factorplot(
    x='Error Type', y='Num Errors', hue='Condition', 
    data=melted, 
    kind='bar',
    legend_out=True, size=8, aspect=16/9
)

sns.despine()

In [None]:
pivoted = melted.pivot_table(
    index='Num Errors', columns='Error Type', 
    values='Num Errors Dummy', 
    aggfunc=len
)

remelted = pd.melt(
    pivoted, 
    value_vars=['Type I Errors', 'Type II Errors', 'Type III Errors'], 
    value_name='Count'
)

remelted['Num Errors'] = remelted.index.values % 16

g = sns.factorplot(
    x='Num Errors', y='Count', hue='Error Type', 
    data=remelted, 
    kind='bar', legend_out=True,
    size=8, aspect=16/9
)

sns.despine(ax=axes)

## Old

The below was a communication mistake, looking at chisq over different error types x different number of occurrences of those errors.

In [None]:
tidy['Type I Error Dummies'] = tidy['Type I Errors']
tidy['Type II Error Dummies'] = tidy['Type II Errors']
tidy['Type III Error Dummies'] = tidy['Type III Errors']

# Get number of observations at each variable level (number of errors)
e1 = tidy.pivot_table(index='Type I Errors', values='Type I Error Dummies', aggfunc=len)
e2 = tidy.pivot_table(index='Type II Errors', values='Type II Error Dummies', aggfunc=len)
e3 = tidy.pivot_table(index='Type III Errors', values='Type III Error Dummies', aggfunc=len)

# Create placeholder dataframe, add error types, and fill in nans with 0
e = pd.DataFrame(index=e2.index.values, columns=['I', 'II', 'III'])
e.loc[e1.index, 'I'] = e1['Type I Error Dummies']
e.loc[e2.index, 'II'] = e2['Type II Error Dummies']
e.loc[e3.index, 'III'] = e3['Type III Error Dummies']

e.fillna(0, inplace=True)

Some of the data is missing. To work with the missing data, I treat errors >= 12 as a single bin. Chi-square test is senstive to binning choices, but here difference is so stark it hardly matters. Is there a principled way to deal with this circumstance?

In [None]:
e

In [None]:
# Bin observations of greater than or equal to 12 errors together
e.loc[12, 'I'] += e.iloc[13:]['I'].sum()
e.loc[12, 'II'] += e.iloc[13:]['II'].sum()

In [None]:
# SciPy ChiSq test
chisq, p, dof, ex = sts.chi2_contingency(e.iloc[1:12])
print('Chi-square test result: {:.2f}, p={:.4f}'.format(chisq, p))

In [None]:
# Make tidier data on error types and check out figures
#    (for own edification)
melted = pd.melt(tidy, value_vars=['Type I Errors', 'Type II Errors', 'Type III Errors'], var_name='Error Type', value_name='Num Errors')

melted['Num Errors Dummy'] = melted['Num Errors']

pivoted = melted.pivot_table(
    index='Num Errors', columns='Error Type', 
    values='Num Errors Dummy', 
    aggfunc=len
)

remelted = pd.melt(
    pivoted, 
    value_vars=['Type I Errors', 'Type II Errors', 'Type III Errors'], 
    value_name='Count'
)

remelted['Num Errors'] = remelted.index.values % 16

g = sns.factorplot(
    x='Num Errors', y='Count', hue='Error Type', 
    data=remelted, 
    kind='bar', legend_out=True,
    size=8, aspect=16/9
)

sns.despine(ax=axes)

### Comments

One alternative to Chi-Sq test could be Kolmogorov-Smirnov test. But I think raw data violate continuous assumption of KS test, as data are relatively low count values in [0, 15]; Chi-Sq is probably more natural choice.

In [None]:
sts.ks_2samp(tidy['Type II Errors'], tidy['Type III Errors'])