In [1]:
import numpy as np
import pandas as pd
import sys
import os
from codebase.file_utils import save_obj, load_obj
from codebase.post_process import samples_to_df, get_post_df, remove_cn_dimension
import altair as alt
alt.data_transformers.disable_max_rows()

%load_ext autoreload
%autoreload 2

In [19]:
log_dir = "./log/20211013_195812___s0m2_seed3//"
data = load_obj('data', log_dir)
data

{'random_seed': 3,
 'N': 2000,
 'K': 2,
 'J': 6,
 'alpha': array([0., 0., 0., 0., 0., 0.]),
 'beta': array([[1. , 0. ],
        [0.8, 0. ],
        [0.8, 0. ],
        [0. , 1. ],
        [0. , 0.8],
        [0. , 0.8]]),
 'sigma_z': array([1., 1.]),
 'Phi_corr': array([[1., 0.],
        [0., 1.]]),
 'Phi_cov': array([[1., 0.],
        [0., 1.]]),
 'z': array([[ 1.78862847,  0.43650985],
        [ 0.09649747, -1.8634927 ],
        [-0.2773882 , -0.35475898],
        ...,
        [-0.13790705,  1.10679585],
        [-0.15818468, -0.41708256],
        [ 0.53466852, -0.94417554]]),
 'y': array([[ 1.78862847,  1.43090278,  1.43090278,  0.43650985,  0.34920788,
          0.34920788],
        [ 0.09649747,  0.07719797,  0.07719797, -1.8634927 , -1.49079416,
         -1.49079416],
        [-0.2773882 , -0.22191056, -0.22191056, -0.35475898, -0.28380718,
         -0.28380718],
        ...,
        [-0.13790705, -0.11032564, -0.11032564,  1.10679585,  0.88543668,
          0.88543668],
        

In [2]:
log_dir = "./log/fabian-freq-estimators/cont/m1/"
data = load_obj('data0', log_dir)


In [3]:
data

{'random_seed': 0,
 'N': 1000,
 'K': 2,
 'J': 6,
 'alpha': array([0., 0., 0., 0., 0., 0.]),
 'beta': array([[1. , 0. ],
        [0.8, 0. ],
        [0.8, 0. ],
        [0. , 1. ],
        [0. , 0.8],
        [0. , 0.8]]),
 'sigma_z': array([1., 1.]),
 'Phi_corr': array([[1., 0.],
        [0., 1.]]),
 'Phi_cov': array([[1., 0.],
        [0., 1.]]),
 'Marg_cov': array([[2.  , 0.8 , 0.8 , 0.  , 0.  , 0.  ],
        [0.8 , 1.64, 0.64, 0.  , 0.  , 0.  ],
        [0.8 , 0.64, 1.64, 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 2.  , 0.8 , 0.8 ],
        [0.  , 0.  , 0.  , 0.8 , 1.64, 0.64],
        [0.  , 0.  , 0.  , 0.8 , 0.64, 1.64]]),
 'Theta': array([[1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1.]]),
 'sigma': array([1., 1., 1., 1., 1., 1.]),
 'y': array([[-0.7165283 , -4.1517797 , -0.98267814, -1.21219826, -0.61838363,
          0.7657609 ],


In [4]:
def clean_samples(ps0):
    ps = ps0.copy()
    num_chains = ps['alpha'].shape[1]
    num_samples = ps['alpha'].shape[0]
    for chain_number in range(ps['alpha'].shape[1]):
        for i in range(num_samples):
            sign1 = np.sign(ps['beta'][i,chain_number,0,0])
            sign2 = np.sign(ps['beta'][i,chain_number,3,1])
            ps['beta'][i,chain_number,:3,0] = ps['beta'][i,chain_number,:3,0] * sign1
            ps['beta'][i,chain_number,3:,1] = ps['beta'][i,chain_number,3:,1] * sign2

            if 'Phi_cov' in ps.keys():
                ps['Phi_cov'][i,chain_number,0,1] = sign1 * sign2 * ps['Phi_cov'][i,chain_number,0,1]
                ps['Phi_cov'][i,chain_number,1,0] = ps['Phi_cov'][i,chain_number,0,1]
    
    return ps

def get_point_estimates(ps0, param_name, estimate_name):
    ps = remove_cn_dimension(
        clean_samples(ps0)[param_name]
    )
    if estimate_name == 'mean':
        return np.mean(ps,axis=0)
    elif estimate_name == 'median':
        return np.median(ps, axis=0)

    
def get_credible_interval_beta(ps):
    df = get_post_df(ps)
    df_quant = df.groupby(['row', 'col'])[['value']].quantile(0.025).reset_index()
    df_quant.rename({'value':'q1'}, axis=1, inplace=True)
    df_quant2 = df.groupby(['row', 'col'])[['value']].quantile(0.975).reset_index()
    df_quant2.rename({'value':'q2'}, axis=1, inplace=True)

    df = df_quant.merge(df_quant2, on=['row', 'col'])

    dd = pd.DataFrame(data['beta'], columns=['0', '1'])
    dd['row'] = np.arange(dd.shape[0])
    dd = dd.melt(id_vars='row', var_name='col', value_name = 'data')
    dd['col'] = dd.col.astype(int)

    plot_data = df.merge(dd, on=['row', 'col'])
    plot_data['index'] = 'row ' + plot_data.row.astype(str)+' .col '+plot_data.col.astype(str)


    c1 = alt.Chart(plot_data).mark_bar(opacity=0.6).encode(
            alt.X('q1', title=None),
            alt.X2('q2', title=None))
    c1



    c2 = alt.Chart(plot_data).mark_point(opacity=1, color='red').encode(
            alt.X('data', title=None)
    )
    return (c1+c2
            ).facet(
                   'index',
                columns=2
                )


def get_credible_interval_Phi(ps):
    df = get_post_df(ps)
    df = df[(df.row==0)&(df.col==1)]
    df_quant = df.groupby(['row', 'col'])[['value']].quantile(0.025).reset_index()
    df_quant.rename({'value':'q1'}, axis=1, inplace=True)
    df_quant2 = df.groupby(['row', 'col'])[['value']].quantile(0.975).reset_index()
    df_quant2.rename({'value':'q2'}, axis=1, inplace=True)

    plot_data = df_quant.merge(df_quant2, on=['row', 'col'])
    plot_data['data'] = data['Phi_cov'][0,1]
    plot_data
    plot_data['index'] = 'row ' + plot_data.row.astype(str)+' .col '+plot_data.col.astype(str)


    c1 = alt.Chart(plot_data).mark_bar(opacity=0.6).encode(
            alt.X('q1', title=None),
            alt.X2('q2', title=None))
    c1



    c2 = alt.Chart(plot_data).mark_point(opacity=1, color='red').encode(
            alt.X('data', title=None)
    )
    return (c1+c2
            ).facet(
                   'index',
                columns=2
                )

In [5]:
estimates = load_obj('beta_mean', log_dir)
deviance = estimates - data['beta']
mean_dev = np.mean(deviance, 0)
var_dev = np.var(deviance, 0)
print('Loading beta - Posterior Mean')
print('deviance mean')
print(mean_dev)
print('deviance var')
print(var_dev)

Loading beta - Posterior Mean
deviance mean
[[0.         0.        ]
 [0.04339896 0.        ]
 [0.04005713 0.        ]
 [0.         0.        ]
 [0.         0.02665294]
 [0.         0.03078232]]
deviance var
[[0.         0.        ]
 [0.00440679 0.        ]
 [0.00210532 0.        ]
 [0.         0.        ]
 [0.         0.0033366 ]
 [0.         0.00197827]]


In [6]:
estimates = load_obj('beta_median', log_dir)
deviance = estimates - data['beta']
mean_dev = np.mean(deviance, 0)
var_dev = np.var(deviance, 0)
print('Loading beta - Posterior Median')
print('deviance mean')
print(mean_dev)
print('deviance var')
print(var_dev)

Loading beta - Posterior Median
deviance mean
[[0.         0.        ]
 [0.04102374 0.        ]
 [0.03740102 0.        ]
 [0.         0.        ]
 [0.         0.02415551]
 [0.         0.02789583]]
deviance var
[[0.         0.        ]
 [0.00440568 0.        ]
 [0.0021009  0.        ]
 [0.         0.        ]
 [0.         0.00332991]
 [0.         0.00193287]]


In [7]:
estimates = load_obj('Phi_mean', log_dir)
deviance = estimates - data['Phi_cov']
mean_dev = np.mean(deviance, 0)
var_dev = np.var(deviance, 0)
print('Factor Cov beta - Posterior Mean')
print('deviance mean')
print(mean_dev)
print('deviance var')
print(var_dev)

Factor Cov beta - Posterior Mean
deviance mean
[[-0.04585999 -0.0017504 ]
 [-0.0017504  -0.06278371]]
deviance var
[[0.00917602 0.00194525]
 [0.00194525 0.00848935]]


In [8]:
estimates = load_obj('Phi_median', log_dir)
deviance = estimates - data['Phi_cov']
mean_dev = np.mean(deviance, 0)
var_dev = np.var(deviance, 0)
print('Factor Cov beta - Posterior Median')
print('deviance mean')
print(mean_dev)
print('deviance var')
print(var_dev)

Factor Cov beta - Posterior Median
deviance mean
[[-0.04919106 -0.00181927]
 [-0.00181927 -0.06607426]]
deviance var
[[0.00920912 0.00191984]
 [0.00191984 0.00850967]]


In [11]:

p = np.empty((20,6,2))
for i in range(20):
    qtl = load_obj('q_beta'+str(i), log_dir)
    results = (qtl[0] <= data['beta'])&(qtl[1] >= data['beta'])
    p[i] = results
p.sum(axis=0)

array([[20., 20.],
       [18., 20.],
       [18., 20.],
       [20., 20.],
       [20., 20.],
       [20., 20.]])

In [12]:

p = np.empty((20,2,2))
for i in range(20):
    qtl = load_obj('q_Phi_cov'+str(i), log_dir)
    results = (qtl[0] <= data['Phi_cov'])&(qtl[1] >= data['Phi_cov'])
    p[i] = results
p.sum(axis=0)   

array([[18., 19.],
       [19., 19.]])

In [13]:
estimates = load_obj('beta_mean', log_dir).reshape(20, 1, 6, 2)
get_credible_interval_beta(estimates)

In [14]:
estimates = load_obj('beta_median', log_dir).reshape(20, 1, 6, 2)
get_credible_interval_beta(estimates)

In [15]:
estimates = load_obj('Phi_mean', log_dir).reshape(20, 1, 2, 2)
get_credible_interval_Phi(estimates)

In [16]:
estimates = load_obj('Phi_median', log_dir).reshape(20, 1, 2, 2)
get_credible_interval_Phi(estimates)