In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats

%matplotlib inline

In [None]:
data_dir = os.path.expanduser('~/Google Drive/Bas Zahy Gianni - Games/Data/4_rcn/Clean/Game')
files = [f for f in os.listdir(data_dir) if f[-3:] == 'csv']
filepaths = [os.path.join(data_dir, f) for f in files]
df = pd.concat([pd.read_csv(f) for f in filepaths]).reset_index(drop=True)

In [None]:
trained = df['trained']
reconf = df['status'] == 'reconf'
reconi = df['status'] == 'reconi'
terminals = reconf | reconi

In [None]:
df.loc[trained, 'subject'] = '1' + df.loc[trained, 'subject']
df.loc[~trained, 'subject'] = '0' + df.loc[~trained, 'subject']

ipos = df.loc[reconi, ['subject', 'bp', 'wp']].copy().reset_index()
ipos.columns = ['indexi', 'subject', 'bp true', 'wp true']
fpos = df.loc[reconf, ['bp', 'wp']].copy().reset_index()
fpos.columns = ['indexf', 'bp', 'wp']

comp = pd.concat([ipos, fpos], axis=1)

In [None]:
def string_to_array(board_string):
    return np.array(list(board_string)).astype(int)

def expand_row(row):
    bpt, wpt, bp, wp = row[['bp true', 'wp true', 'bp', 'wp']].map(string_to_array)
    return bpt, wpt, bp, wp

def score(row):
    bpt, wpt, bp, wp = expand_row(row)
    bperror = (bpt != bp).astype(int).sum()
    wperror = (wpt != wp).astype(int).sum()
    doubleerror = ((bpt != bp) & (wpt != wp)).astype(int).sum()
    return bperror + wperror - doubleerror

def extra_pieces(row):
    bpt, wpt, bp, wp = expand_row(row)
    pt = bpt + wpt
    p = bp + wp
    
    return (pt - p < 0).sum()

def missing_pieces(row):
    bpt, wpt, bp, wp = expand_row(row)
    pt = bpt + wpt
    p = bp + wp
    
    return (pt - p > 0).sum()

def wrong_color(row):
    bpt, wpt, bp, wp = expand_row(row)
    b2w = ((bpt == 1) & (wp == 1)).sum()
#     print(b2w)
    w2b = ((wpt == 1) & (bp == 1)).sum()
    
    return b2w + w2b
    
def n_pieces(row):
    bpt, wpt = row[['bp true', 'wp true']]
    n_bpieces = string_to_array(bpt).sum()
    n_wpieces = string_to_array(wpt).sum()
    return n_bpieces + n_wpieces

npieces = comp.apply(n_pieces, axis=1)
missing = comp.apply(missing_pieces, axis=1)
extra = comp.apply(extra_pieces, axis=1)
wrong = comp.apply(wrong_color, axis=1)

In [None]:
df.loc[reconf, 'total errors'] = missing.values + extra.values + wrong.values
df.loc[reconf, 'n initial'] = npieces.values
df.loc[reconf, 'n missing'] = missing.values
df.loc[reconf, 'n extra'] = extra.values
df.loc[reconf, 'n wrong_color'] = wrong.values

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(8, 5))
trained_errors = df.loc[trained & reconf, 'total errors'].values
untrained_errors = df.loc[(~trained) & reconf, 'total errors'].values

label = 'Trained: {:.2f}'.format(trained_errors.mean())
sns.distplot(trained_errors, ax=axes, label=label, kde=False, bins=np.arange(0, 36, 1))

label = 'Untrained: {:.2f}'.format(untrained_errors.mean())
sns.distplot(untrained_errors, ax=axes, label=label, kde=False, bins=np.arange(0, 36, 1))

axes.legend()
sns.despine()

In [None]:
# Ref here for method: https://onlinecourses.science.psu.edu/stat414/node/268

n0 = trained_errors.size * 36              # Num possible error events = num trials * num positions
n1 = untrained_errors.size * 36
p0 = trained_errors.sum() / n0
p1 = untrained_errors.sum() / n1

phat = ((n0 * p0) + (n1 * p1)) / (n0 + n1)

z = (p0 - p1) / np.sqrt(phat * (1 - phat) * (1/n0 + 1/n1))
print('Trials per group: {}\nZ-Stat: {:.2f}'.format(n0/36, np.abs(z)))