## Notebook setup

In [None]:
# Standard libraries
import os
import sys

# Plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Scientific libraries
import numpy as np
import pandas as pd

import pymc3 as pm

# Internal libraries
sys.path.append('../../src')

# import lib.reconstruction.errors as errs
from lib.reconstruction.errors import get_errors_per_location
from lib.reconstruction.neighbors import get_adjacency, get_adjacency_per_location

from lib.reconstruction.bayes.data import BayesDFCompute
from lib.reconstruction.bayes.binomial import build_binomial_model

In [None]:
# Notebook configuration
pd.set_option('display.max_columns', 40)

sns.set_style('white')

colors = sns.cubehelix_palette(n_colors=2, start=0.5, hue=1, rot=.1, light=.65) 
colors += sns.cubehelix_palette(n_colors=2, start=2.5, hue=1, rot=.1, light=.65)

%matplotlib inline

## Load data

In [None]:
tidy = pd.read_csv('../../etc/reconstruction/tidy_data.csv', index_col=0)

tidy['Condition'] = tidy['Condition'].map(lambda x: 'Untrained' if x == 'Naive' else x)
tidy['Position ID'] = tidy['Position ID'].map(int)

vals = ['Black Position', 'White Position', 
        'Is Real', 'Num Pieces']

board_set = tidy.pivot_table(index='Position ID', 
                             values=vals, 
                             aggfunc=lambda x: x.unique()[0])[vals]

### Data preprocessing

In [None]:
# Compute the adjacency of each location

adjacencies = board_set.apply(get_adjacency_per_location, axis=1)

adjacency_column_names = ['adjacency_all', 
                          'adjacency_same', 
                          'adjacency_opposite']

adjacency_df = pd.DataFrame(adjacencies.tolist(), 
                            index=board_set.index, 
                            columns=adjacency_column_names)

In [None]:
def get_occupied_mask(row):
    bp = np.stack([int(i) for i in row['Black Position']])
    wp = np.stack([int(i) for i in row['White Position']])
    p = bp + wp
    return p.tolist()

def get_condition_mask(condition):
    return [condition, ] * 36

tidy['occupied'] = tidy.apply(get_occupied_mask, axis=1)
tidy['condition_mask'] = tidy['Condition'].map(get_condition_mask)

In [None]:
tidy['adjacency_same'] = tidy['Position ID'].map(adjacency_df['adjacency_same'])
tidy['adjacency_opposite'] = tidy['Position ID'].map(adjacency_df['adjacency_opposite'])

In [None]:
# Auxilliary data structures

# Get a dummy array of location indices for convenience
board_set['location_idx'] = np.tile(np.arange(36, dtype=np.uint8), [len(board_set), 1]).tolist()

# Get distances to center as a dummy field
blank_board = np.zeros((4, 9))
center = (blank_board.shape[0] / 2 - .5, blank_board.shape[1] / 2 - .5)

distances = np.sqrt(((np.argwhere(blank_board == 0) - center) ** 2).sum(axis=1))
board_set['distance_to_center'] = np.tile(distances, [len(board_set), 1]).tolist()

In [None]:
unique_ids = np.arange(len(tidy['Subject ID'].unique()))
subject_idx_map = dict(zip(tidy['Subject ID'].unique(), unique_ids))

tidy['subject_idx'] = tidy['Subject ID'].map(subject_idx_map)

In [None]:
get_model_df = BayesDFCompute()

model_df = get_model_df(tidy, board_set)
# Filter for occupied positions only
# model_df = model_df.loc[model_df['occupied'] == '1']
model_df = model_df.loc[model_df['occupied'] == '0']

In [None]:
trained_sel = model_df['condition_mask'] == 'Trained'
untrained_sel = model_df['condition_mask'] == 'Untrained'
natural_sel  = model_df['position_type'] == '1'
synthetic_sel = model_df['position_type'] == '0'

### Difference between stimulus types, per condition

In [None]:
# Choose error type
error_type = 'errors_1'

nt_sel = natural_sel & trained_sel
st_sel = synthetic_sel & trained_sel

nu_sel = natural_sel & untrained_sel
su_sel = synthetic_sel & untrained_sel

data_dict_schema = {'trained': {'natural': nt_sel, 'synthetic': st_sel},
                    'untrained': {'natural': nu_sel, 'synthetic_selhetic': su_sel},
                    'natural': {'trained': nt_sel, 'untrained': nu_sel},
                    'synthetic': {'trained': st_sel, 'untrained': su_sel}}

# More like dictionary non-comprehension, amirite?
data_dict = {
    k_static: {
        k_compare: {'x': model_df.loc[v_compare, 'position_id'].values.astype(int),
                    'y': model_df.loc[v_compare, error_type].values.astype(int)}
        for k_compare, v_compare in v_static.items()}
    for k_static, v_static in data_dict_schema.items()
}

In [None]:
def run_inference(data_dict_item):
    model = build_binomial_model(data_dict_item)
    
    nuts_kwargs = {'target_accept': .98}
    
    with model:
        race = pm.sample(16000, 
                         cores=4, tune=4000, 
                         nuts_kwargs=nuts_kwargs)
        
    sample = trace.get_values('difference in means')

    print('p ROPE', len(np.where((-.01 < sample) & (sample < .01))[0]) / len(sample))
    print('p < ROPE', len(sample[sample < -.01]) / len(sample))
    print('p > ROPE', len(sample[sample > .01]) / len(sample))
        
    ax = pm.plot_posterior(trace, var_names=['difference in means'],
                           ref_val=0, credible_interval=.95,
                           kind='hist', figsize=(4, 3))
    
    return model, trace, ax

#### Trained

In [None]:
trained_model, trained_trace, trained_ax = run_inference(data_dict['trained'])

plt.setp(trained_ax, 
         title='Natural - Synthetic; Trained subjects', 
         xlabel=r'$\Delta$ Type I error rate');

#### Untrained

In [None]:
untrained_model = build_binomial_model(data_dict['untrained'])

In [None]:
nuts_kwargs = {'target_accept': .98}
with untrained_model:
    untrained_trace = pm.sample(16000, cores=4, tune=4000, nuts_kwargs=nuts_kwargs)

In [None]:
sns.set_style('white')

ax = pm.plot_posterior(untrained_trace, 
                       var_names=['difference in means'],
                       ref_val=0, credible_interval=.95,
                       kind='hist', 
                       figsize=(4, 3))
plt.setp(ax, 
         title='Natural - Synthetic; Untrained subjects', 
         xlabel=r'$\Delta$ error rate');

### Difference between conditions, per stimulus type

####  Natural

In [None]:
natural_model = build_binomial_model(data_dict['natural'])

In [None]:
nuts_kwargs = {'target_accept': .98}
with natural_model:
    natural_trace = pm.sample(16000, cores=4, tune=4000, nuts_kwargs=nuts_kwargs)

In [None]:
ax = pm.plot_posterior(natural_trace, 
                       var_names=['difference in means'],
                       ref_val=0, credible_interval=.95,
                       kind='hist', 
                       figsize=(4, 3))
plt.setp(ax, 
         title='Trained - Untrained; Natural positions', 
         xlabel=r'$\Delta$ error rate');

In [None]:
sample = natural_trace.get_values('difference in means')

print('p ROPE', len(np.where((-.01 < sample) & (sample < .01))[0]) / len(sample))
print('p < ROPE', len(sample[sample < -.01]) / len(sample))
print('p > ROPE', len(sample[sample > .01]) / len(sample))

#### Synthetic

In [None]:
synthetic_model = build_binomial_model(data_dict['synthetic'])

In [None]:
nuts_kwargs = {'target_accept': .98}
with synthetic_model:
    synthetic_trace = pm.sample(16000, cores=4, tune=4000, nuts_kwargs=nuts_kwargs)

In [None]:
sns.set_style('white')

ax = pm.plot_posterior(synthetic_trace, 
                       var_names=['difference in means'],
                       ref_val=0, credible_interval=.95,
                       kind='hist', round_to=3,
                       figsize=(4, 3))
plt.setp(ax, 
         title='Trained - Untrained; Synthetic positions', 
         xlabel=r'$\Delta$ error rate');

In [None]:
sample = synthetic_trace.get_values('difference in means')

print('p ROPE', len(np.where((-.01 < sample) & (sample < .01))[0]) / len(sample))
print('p < ROPE', len(sample[sample < -.01]) / len(sample))
print('p > ROPE', len(sample[sample > .01]) / len(sample))