In [15]:
import numpy as np
import pandas as pd
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma
from statsmodels.tsa.stattools import acf
import datetime
import sys
import os

from codebase.plot import * 
from codebase.file_utils import save_obj, load_obj

%matplotlib inline

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
data = load_obj('data', './log/20190523_140753_fabian_data/')
ps = load_obj('ps', './log/simulation_fabian/')

In [17]:
data['y']

array([[ 3.53682028e+00,  2.68123990e+00,  4.90685338e-02,
        -6.84641018e-02, -1.59598864e-02,  5.83294431e-01],
       [ 1.44849618e+00,  2.00391218e+00,  1.00848343e+00,
        -3.47193392e-01,  1.35661168e+00, -6.04239366e-01],
       [ 7.20268880e-01,  2.12797685e+00, -3.38242699e-01,
        -2.34728934e+00, -3.37984981e-03, -1.66006165e+00],
       ...,
       [ 3.00000000e+00,  3.00000000e+00,  3.00000000e+00,
         3.00000000e+00,  3.00000000e+00,  3.00000000e+00],
       [ 3.00000000e+00,  3.00000000e+00,  3.00000000e+00,
         3.00000000e+00,  3.00000000e+00,  3.00000000e+00],
       [ 3.00000000e+00,  3.00000000e+00,  3.00000000e+00,
         3.00000000e+00,  3.00000000e+00,  3.00000000e+00]])

In [18]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['beta'][:,j,k],
             true_value=data['beta'][j,k],
             title = 'Posterior distribution for beta(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [31]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['K']):
    for k in range(data['K']):
        if j<k:
            plots.append(plot_trace(ps['V_corr'][:,j,k],
                                    true_value=data['V_corr'][j,k],
            title = 'Posterior distribution for V_corr(%s,%s)'%(j,k)).options(fig_inches=10, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [22]:
def get_residuals(ps_u, by_axis, absval = True, sort=False):
    """
    params
    ps_u posterior samples of u
    by_axis 0=residuals per iterm, 1=residuals per subject
    """
    mean_u = np.mean(ps_u, axis=0) #mean posterior residual matrix
    std_u = np.sqrt(np.mean(ps_u**2, axis=0)- mean_u**2) #std of posterior residual matrix
    if absval:
        res = pd.DataFrame(np.sum(np.abs(mean_u/std_u), axis=by_axis)).reset_index()
    else:
        res = pd.DataFrame(np.sum(mean_u/std_u, axis=by_axis)).reset_index()
    res.columns = ['subj_id', 'residual']
    
    if sort:
        res.sort_values('residual', ascending=False, inplace=True)
    return res

get_residuals(ps['uu'], 1, True, True).head()


Unnamed: 0,subj_id,residual
257,257,3.418244
412,412,3.296813
311,311,3.263149
298,298,3.241278
34,34,3.093044


In [26]:
# %%opts Bars {+axiswise} [width=1000, height=300, ] 
res = get_residuals(ps['uu'], 1, False, True)

hv.Bars(res[:20], hv.Dimension('subj_id'), 'residual',\
        label='Top 20 Residuals').options(color='blue', xrotation=90).options(fig_inches=8, aspect=3)


In [24]:
res = get_residuals(ps['uu'], 1, False)
res['color'] = 'blue'
red_index = res[res.subj_id > (data['N']-int(data['N']*.1)-1)].index
res.loc[red_index, 'color'] = 'red'
res.sort_values('residual', ascending=False, inplace=True)
res.reset_index(drop=True, inplace=True)
res

Unnamed: 0,subj_id,residual,color
0,133,1.242984,blue
1,119,1.168006,blue
2,467,1.110284,red
3,483,1.085470,red
4,495,1.069424,red
5,482,1.038769,red
6,492,1.031807,red
7,32,1.031531,blue
8,465,1.017490,red
9,494,0.985490,red


In [25]:
%%opts Overlay [fig_size=300]
plots = []
for color in ['blue', 'red']:
    tmp = res[res.color==color]
    plots.append(hv.Scatter((tmp.index,tmp.residual),
        ).\
                 options(fig_inches=4, aspect=2.5, s=10, color=color))
layout = hv.Overlay(plots).options(show_title = True,
                                  normalize=False) # use same y-range for all plots?

layout