In [None]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as sts
import seaborn as sns
import statsmodels.formula.api as smf
import statsmodels.api as sm

from lib.utility_functions import *
from lib.exp4 import *

sns.set_style('white')
sns.set_context('paper')

colors = sns.color_palette().as_hex()

pd.set_option('display.max_columns', 40)

% matplotlib inline

In [None]:
tidy = pd.read_csv('./tidy_data.csv', index_col=0)
tidy['Condition'] = tidy['Condition'].map(lambda x: 'Untrained' if x == 'Naive' else x)

tidy['Num Dif'] = tidy['Num Pieces (final)'] - tidy['Num Pieces']
tidy.head()

In [None]:
mean_piv = tidy.pivot_table(
    index='Subject ID', values='Num Dif', columns=['Is Real', 'Num Pieces']
)

mean_piv['Condition'] = mean_piv.index.map(lambda x: tidy.loc[tidy['Subject ID'] == x, 'Condition'].values[0])

std_piv = tidy.pivot_table(
    index='Subject ID', values='Num Dif', columns=['Is Real', 'Num Pieces'],
    aggfunc=np.std
)

std_piv['Condition'] = mean_piv.index.map(lambda x: tidy.loc[tidy['Subject ID'] == x, 'Condition'].values[0])

In [None]:
c_filter = mean_piv['Condition'] == 'Trained'

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(8, 3), dpi=300)

tc_filter = tidy['Condition'] == 'Trained'
tp_filter = tidy['Is Real'] == True


def errbar_plot(df, ax, **kwargs):
    piv = df.pivot_table(
        index='Subject ID', values='Num Pieces (final)', columns='Num Pieces'
    )
    
    x = piv.columns.values
    y = piv.mean(axis=0)
    err = piv.std() / np.sqrt(len(piv))

    ax.errorbar(x, y, yerr=err, **kwargs)
    
    return None


## Real

ax = axes[0]

#### Trained

errbar_plot(
    tidy.loc[tc_filter & tp_filter], 
    ax=ax, color=colors[0], alpha=.9, label='Trained'
)

#### Untrained

errbar_plot(
    tidy.loc[~tc_filter & tp_filter],
    ax=ax, color=colors[1], alpha=.9, label='Untrained'
)


## Fake

ax = axes[1]

#### Trained

errbar_plot(
    tidy.loc[tc_filter & ~tp_filter], 
    ax=ax, color=colors[0], alpha=.9, label='Trained'
)

#### Untrained

errbar_plot(
    tidy.loc[~tc_filter & ~tp_filter],
    ax=ax, color=colors[1], alpha=.9, label='Untrained'
)

## Make it fancy

axes[0].plot(np.arange(20), color='grey')
axes[1].plot(np.arange(20), color='grey')

axes[0].legend(loc=0)

plt.setp(
    axes, 
    ylabel='Recalled # Pieces', xlabel='Original # Pieces', 
#     xlim=[9, 20], ylim=[9, 20],
    xticks=np.arange(11, 19, 1), xticklabels=np.arange(11, 19, 1),
    yticks=np.arange(9, 20, 1), yticklabels=np.arange(9, 20, 1)
)

plt.setp(axes[0], title='Real Positions')
plt.setp(axes[1], title='Fake Positions')

sns.despine()

In [None]:
tidy['x'] = tidy['Num Pieces']
tidy['y'] = tidy['Num Pieces (final)']
probe = tidy.loc[tidy['Subject ID'] == tidy['Subject ID'].values[0]]

In [None]:
probe.head()

In [None]:
NBFam = sm.families.NegativeBinomial()
# NBFam = sm.families.Gaussian()


p = tidy['Subject ID'] == 1464109333721
c = tidy['Condition'] == 'Trained'
r = tidy['Is Real'] == True
S = tidy.loc[p & r & c]

model = smf.glm(formula='y ~ x', data=S, family=NBFam).fit(method='nm', start_params=[0, 0], maxiter=10000)

In [None]:
from statsmodels.discrete.discrete_model import NegativeBinomial

In [None]:
nbm = NegativeBinomial.from_formula('y ~ x', data=S)

m = nbm.fit(method='bfgs', maxiter=10000)

In [None]:
m.params

In [None]:
m.predict(pd.DataFrame({'x': np.arange(20)}))

In [None]:
x_test = np.stack([np.ones(20), np.arange(20)])

In [None]:
np.exp(np.dot(m.params.values[np.newaxis, :-1], x_test))

In [None]:
len(tidy['Subject ID'].unique())

In [None]:
NBFam = sm.families.NegativeBinomial()

def get_nbr_params(condition='Trained', is_real=True):
    models = []
    c = tidy['Condition'] == condition
    r = tidy['Is Real'] == is_real
    
    T = tidy.loc[c & r]
    
    for subject_id in T['Subject ID'].unique():
        p = T['Subject ID'] == subject_id
        S = T.loc[p]
        
        model = smf.glm(formula='y ~ x', data=S, family=NBFam).fit(method='bfgs', maxiter=10000)
        models.append(model)
        
    params = pd.DataFrame([m.params for m in models], index=T['Subject ID'].unique())
    params['Condition'] = condition
    params['Is Real'] = is_real
    
    return params
#     return pvals

In [None]:
param_dfs = []

for condition in ['Trained', 'Untrained']:
    for is_real in [True, False]:
        param_dfs.append(get_nbr_params(condition=condition, is_real=is_real))
        
params = pd.concat(param_dfs)

In [None]:
params

In [None]:
means = params.pivot_table(index='Condition', values='x', columns='Is Real', aggfunc=np.mean)
sems = params.pivot_table(index='Condition', values='x', columns='Is Real', aggfunc=lambda x: x.std() / np.sqrt(len(x)))

In [None]:
means

In [None]:
sems

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(8, 3), dpi=300)

axes[0].bar([0], means.loc['Trained', True], color=colors[0], label='Trained')
axes[0].bar([1], means.loc['Untrained', True], color=colors[1], label='Untrained')
axes[0].errorbar([0], means.loc['Trained', True], yerr=sems.loc['Trained', True], color='black')
axes[0].errorbar([1], means.loc['Untrained', True], yerr=sems.loc['Untrained', True], color='black')

axes[1].bar([0], means.loc['Trained', False], color=colors[0])
axes[1].bar([1], means.loc['Untrained', False], color=colors[1])
axes[1].errorbar([0], means.loc['Trained', False], yerr=sems.loc['Trained', False], color='black')
axes[1].errorbar([1], means.loc['Untrained', False], yerr=sems.loc['Untrained', False], color='black')

plt.figlegend(loc=0)

plt.setp(
    axes,
    ylim=[0, .05], ylabel='Regression Coefficient',
    xticks=[0, 1], xticklabels=['Trained', 'Untrained']
)

plt.setp(axes[0], title='Real Positions')
plt.setp(axes[1], title='Fake Positions')

sns.despine()

In [None]:
c = params['Condition'] == 'Trained'
r = params['Is Real'] == True

sts.ttest_1samp(params.loc[c, 'x'].values, 1)

In [None]:
sts.ttest_1samp(params.loc[~c, 'x'].values, 1)

In [None]:
sts.ttest_1samp(params.loc[r, 'x'].values, 1)

In [None]:
sts.ttest_1samp(params.loc[~r, 'x'].values, 1)

In [None]:
sts.ttest_ind(params.loc[c & r, 'x'].values, params.loc[~c & r, 'x'].values)

In [None]:
sts.ttest_ind(params.loc[c & ~r, 'x'].values, params.loc[~c & ~r, 'x'].values)

In [None]:
sts.ttest_ind(params.loc[c & r, 'x'].values, params.loc[c & ~r, 'x'].values)

In [None]:
sts.ttest_ind(params.loc[~c & r, 'x'].values, params.loc[~c & ~r, 'x'].values)

In [None]:
real_models = []
fake_models = []

for subject_id in tidy['Subject ID'].unique():
    probe = tidy.loc[tidy['Subject ID'] == subject_id]
    
    # Is it possible that x should be categorical?
    model = smf.glm(formula='y ~ x', data=probe.loc[probe['Is Real']], family=sm.families.NegativeBinomial()).fit()
    real_models.append(model)
    
    model = smf.glm(formula='y ~ x', data=probe.loc[~probe['Is Real']], family=sm.families.NegativeBinomial()).fit()
    fake_models.append(model)


In [None]:
m = real_models[0]

In [None]:
real_params.rename(index=None, columns={'Intercept': 'I_p', 'x': 'x_p'})

In [None]:
real_params = pd.DataFrame(
    [m.params for m in real_models], 
    index=tidy['Subject ID'].unique()
)

real_params['Condition'] = real_params.index.map(lambda x: tidy.loc[tidy['Subject ID'] == x, 'Condition'].values[0])


In [None]:
real_trained_params = real_params.loc[real_params['Condition'] == 'Trained']

real_trained_coef_mean = real_trained_params['x'].mean()
real_trained_coef_sem = real_trained_params['x'].std() / np.sqrt(len(real_trained_params))
print(real_trained_coef_mean, real_trained_coef_sem)

In [None]:
real_trained_params = real_params.loc[~(real_params['Condition'] == 'Trained')]

real_trained_coef_mean = real_trained_params['x'].mean()
real_trained_coef_sem = real_trained_params['x'].std() / np.sqrt(len(real_trained_params))
print(real_trained_coef_mean, real_trained_coef_sem)

In [None]:
table = summary.tables[1]

In [None]:
table.