In [3]:
from scipy.stats import multivariate_normal, norm, bernoulli
import numpy as np
import pandas as pd
from codebase.smc2 import * 
from codebase.plot import * 
from scipy.special import logsumexp


%matplotlib inline

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Read Data

In [4]:
np.random.seed(121)
nsim_data = 150
J = 6
K = 2
ww = np.array([[1,0], [-2, 2],[-1,-1],[2,1], [3,-1], [1,-1]], dtype=float)
sigma = np.array([1,2,.3,.8, 1, 1.4])
Sigma = np.diag(sigma)
ee = multivariate_normal.rvs(mean = np.zeros(J), cov = Sigma, size=nsim_data)
zz = multivariate_normal.rvs(mean = np.zeros(K), cov=np.eye(K), size=nsim_data)
yy = zz@ww.T + ee

err = np.abs((ww@ww.T + Sigma) - np.cov(yy.T))

# errors = np.empty((100, err.shape[0],err.shape[1]))
# for kk in range(100):
#     zz = multivariate_normal.rvs(mean = np.zeros(K), cov=np.eye(K), size=nsim_data)
#     ee = multivariate_normal.rvs(mean = np.zeros(J), cov = Sigma, size=nsim_data)
#     yy = zz@ww.T + ee
#     errors[kk] = (ww@ww.T + Sigma) - np.cov(yy.T)

# np.mean(errors, axis=0)    

In [5]:
data = dict()
data['N'] = nsim_data
data['K'] = K
data['J'] = J
data['sigma'] = sigma
data['Sigma'] = Sigma
data['y'] = yy
data['z'] = zz
data['w'] = ww
data['e'] = ee

In [8]:
%opts Histogram [width=200, height=200, tools=['hover']] 

hv.Distribution(data['y'][:,0]).options(alpha=0.3)


In [9]:
print("dim(w) = ", ww.shape)
print("dim(z) = ", zz.shape)
print("dim(y) = ", yy.shape)
print("dim(e) = ", ee.shape)


dim(w) =  (6, 2)
dim(z) =  (150, 2)
dim(y) =  (150, 6)
dim(e) =  (150, 6)


In [15]:
nsim_prtcls = 10
particles = dict()
particles['ww'] = prior_w(data['K'],data['J'], size=nsim_prtcls) # run with prior Cov = 100 * I
particles['sigma'] = prior_sigma(data['K'], size=nsim_prtcls)
# particles['Sigma'] = construct_Sigma(particles['sigma'])
particles['parameter_names'] = ['ww',
                               'Sigma',
                               'sigma']
particles['N'] = nsim_prtcls
log_weights = np.zeros(nsim_prtcls)  ### log 1 = 0
loglkhds = np.empty(nsim_data)
print("ESS: %.2f"%(ESS(log_weights)))

ESS: 10.00


In [6]:
np.seterr(all='ignore')

i = 0
data_temp = data.copy()
data_temp["N"] = int(i+1)
data_temp['y'] = data['y'][i].copy()

log_incr_weights = get_weights(data_temp, particles)
loglkhds[i] = logsumexp(log_weights + log_incr_weights) - logsumexp(log_weights)

log_weights = log_incr_weights + log_weights

print("ESS: %.2f"%(ESS(log_weights)))

ESS: 4.93


In [7]:
i = 1

data_temp["N"] = int(i+1)
data_temp['y'] = data['y'][i].copy()

log_incr_weights = get_weights(data_temp, particles)
loglkhds[i] = logsumexp(log_weights + log_incr_weights) - logsumexp(log_weights)

log_weights = log_incr_weights + log_weights

print("ESS: %.2f"%(ESS(log_weights)))

ESS: 4.75


In [8]:
degeneracy_limit = .5
for i in range(2,nsim_data):
    data_temp["N"] = int(i+1)
    data_temp['y'] = data['y'][i].copy()


    log_incr_weights = get_weights(data_temp, particles)
    loglkhds[i] = logsumexp(log_weights + log_incr_weights) - logsumexp(log_weights)
    log_weights = log_incr_weights + log_weights
    print("ESS: %.2f"%(ESS(log_weights)))
    
    if (ESS(log_weights) < degeneracy_limit * nsim_prtcls) and (i+1) < nsim_data:
        data_temp['y'] = data['y'][:(i+1)].copy()
        print("Deg %d"%(i))
        particles = multinomial_sample_particles(particles, np.exp(log_weights))
    
        particles = jitter(data_temp, particles)
        log_weights = np.zeros(nsim_prtcls)
    else:
        pass


ESS: 2.78
Deg 2
ESS: 130.40
Deg 3


  out = random_state.multivariate_normal(mean, cov, size)


ESS: 160.50
Deg 4
ESS: 461.73
Deg 5
ESS: 400.34
Deg 6
ESS: 616.87
ESS: 75.57
Deg 8
ESS: 292.01
Deg 9
ESS: 504.80
ESS: 223.43
Deg 11
ESS: 689.59
ESS: 461.92
Deg 13
ESS: 288.39
Deg 14
ESS: 340.96
Deg 15
ESS: 731.66
ESS: 585.09
ESS: 345.56
Deg 18
ESS: 846.16
ESS: 510.54
ESS: 443.27
Deg 21
ESS: 181.10
Deg 22
ESS: 795.21
ESS: 535.06
ESS: 387.87
Deg 25
ESS: 693.73
ESS: 456.50
Deg 27
ESS: 875.97
ESS: 690.78


In [9]:
weights = np.exp(log_weights)

In [10]:
from codebase.file_utils import save_obj

save_obj(particles, 'particles' , './log/log_implementation/')
save_obj(log_weights, 'log_weights' , './log/log_implementation/')
save_obj(loglkhds, 'lkhds' , './log/log_implementation/')


In [11]:
%opts Histogram [width=600, height=200, xrotation=90, tools=['hover']] (fill_color='lightblue') 



In [12]:
%opts Distribution [width=500, height=200, tools=['hover']] 


In [13]:
%opts Curve [width=800, height=300, tools=['hover']] 



In [14]:
%opts Bars [width=1000, height=400, xrotation=90, tools=['hover']] 

df = pd.DataFrame(weights, columns=['ws']).reset_index()
weights_plot = hv.Bars(df, hv.Dimension('index'), 'ws')

weights_plot

# Posterior Distributions

In [15]:
estimate = dict()

estimate['Sigma'] = np.average(particles['Sigma'], axis=0, weights=weights)
estimate['mu'] = np.average(particles['mu'], axis=0, weights=weights)


In [16]:
%%opts Curve {+axiswise} [width=600, height=200, tools=['hover']] 

plots = []
for i in range(J):
    plots.append(plot_histograms(particles['mu'][:,i],
                                 weights=weights,
                                 true_value=data['mu'][i],
             title = 'Posterior distribution for mu(%s)'%(i)))
layout = hv.Layout(plots)
layout.cols(1)

In [17]:
%%opts Curve {+axiswise} [width=600, height=200, tools=['hover']] 
plots = []
for i in range(J):
    for j in range(i,J):
        if i!=j:
            plots.append(plot_histograms(particles['R'][:,i,j],
                                         weights=weights, 
                                         true_value=data['R'][i,j],
                     title = 'Posterior distribution for R(%s,%s)'%(i,j)))
layout = hv.Layout(plots)
layout.cols(1)

In [18]:
%%opts Curve {+axiswise} [width=600, height=200, tools=['hover']] 

plots = []
for i in range(data['Kc']):
    plots.append(plot_histograms(particles['sigma'][:,i],
                                 weights = weights, 
                                 true_value=data['sigma'][i],
             title = 'Posterior distribution for sigma(%s)'%(i)))
layout = hv.Layout(plots)
layout.cols(1)

In [19]:
estimate['marginal_loglklhd'] = np.nansum(loglkhds)
estimate['marginal_loglklhd']

-134.90921969923383

In [20]:
loglkhds

array([        nan,         nan,         nan, -5.68756128, -6.05098267,
       -3.76409748, -4.43207493, -3.72705719, -7.45495276, -5.82025678,
       -4.68058348, -4.62427943, -4.18729184, -4.69085517, -6.24599181,
       -6.10303531, -4.13715761, -4.23861838, -4.18695274, -3.29769398,
       -5.47816303, -4.23784071, -8.78134017, -4.15835822, -5.60019408,
       -5.80445719, -5.31874207, -4.60305768, -3.64491668, -3.95270706])