# Table: geom

**Motivation**: KNN + Shatter dim. Device = ```cuda:1``` <br>

In [1]:
# HIDE CODE


import os, sys
from IPython.display import display

# tmp & extras dir
git_dir = os.path.join(os.environ['HOME'], 'Dropbox/git')
extras_dir = os.path.join(git_dir, 'jb-vae/_extras')
fig_base_dir = os.path.join(git_dir, 'jb-vae/figs')
tmp_dir = os.path.join(git_dir, 'jb-vae/tmp')

# GitHub
sys.path.insert(0, os.path.join(git_dir, '_PoissonVAE'))
from analysis.eval import sparse_score
from figures.fighelper import *
from vae.train_vae import *

# warnings, tqdm, & style
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)
from rich.jupyter import print
%matplotlib inline
set_style()

## Fig save dir

In [2]:
from figures.imgs import plot_weights

fig_dir = pjoin(fig_base_dir, 'nips_may22')
os.makedirs(fig_dir, exist_ok=True)
print(os.listdir(fig_dir))

kws_fig = {
    'transparent': True,
    'bbox_inches': 'tight',
    'dpi': 600,
}

## Device

In [3]:
device_idx = 1
device = f'cuda:{device_idx}'

from analysis.final import sort_fits, analyze_fits, add_nelbo_diff
from figures.table import table_entry_knn, table_entry_shatter
from analysis.stats import ttest

## Load dfs

In [4]:
df_knn = pd.read_pickle(pjoin(tmp_dir, 'df_knn.df'))
df_shatter = pd.read_pickle(pjoin(tmp_dir, 'df_shatter.df'))

## Shatter

### ttest

In [5]:
selected_models = [
    'poisson', 'categorical', 'laplace',
    'gaussian', 'gaussian-relu', 'gaussian-exp',
]
df_selected = df_shatter.loc[df_shatter['n_dims'].isin([10, 50, 100])].copy()
df_selected = df_selected.loc[df_selected['type'].isin(selected_models)]

In [6]:
print(df_selected['type'].unique().tolist())

In [7]:
df_agg, df_ttest = ttest(
    value='accuracy',
    df=df_selected,
    by=['n_dims', 'type'],
    test_conds=[
        ('poisson', m) for m in
        df_selected['type'].unique()
        if m != 'poisson'],
    method='fdr_bh',
    alpha=0.01,
)

In [8]:
df_ttest

Unnamed: 0,n_dims,cond1,cond2,t,pvals,pvals_fdr_bh,reject
0,10,poisson,categorical,0.662967,0.507473,0.507473,False
1,10,poisson,gaussian,16.687499,1.236669e-56,1.426926e-56,True
2,10,poisson,gaussian-exp,28.113196,2.300337e-135,3.136824e-135,True
3,10,poisson,gaussian-relu,15.893258,5.624889e-52,6.0266670000000006e-52,True
4,10,poisson,laplace,19.559841,1.331953e-74,1.664942e-74,True
5,50,poisson,categorical,42.717303,3.587684e-247,5.3815270000000006e-247,True
6,50,poisson,gaussian,119.982773,0.0,0.0,True
7,50,poisson,gaussian-exp,111.222322,0.0,0.0,True
8,50,poisson,gaussian-relu,109.848941,0.0,0.0,True
9,50,poisson,laplace,100.845083,0.0,0.0,True


In [9]:
df_ttest.pivot(index='cond2', columns='n_dims', values='reject')

n_dims,10,50,100
cond2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
categorical,False,True,True
gaussian,True,True,True
gaussian-exp,True,True,True
gaussian-relu,True,True,True
laplace,True,True,True


In [10]:
for index, values in df_agg.items():
    print(index, len(values))
    break

### ci

In [11]:
%%time

for dim in [10, 50, 100]:
    table = table_entry_shatter(dim, df_agg, n_resamples=int(1e5), fmt='0.3f')
    print(f"——— dim: {dim} ———\n")
    print(table)
    print('\n\n\n')

CPU times: user 39.2 s, sys: 9.67 s, total: 48.9 s
Wall time: 48.9 s


## KNN

### ttest

In [12]:
selected_models = [
    'poisson', 'categorical', 'laplace',
    'gaussian', 'gaussian-relu', 'gaussian-exp',
]
df_selected = df_knn.loc[df_knn['n_dims'].isin([10, 50, 100])].copy()
df_selected = df_selected.loc[df_selected['type'].isin(selected_models)]

In [13]:
print(df_selected['type'].unique().tolist())

In [14]:
df_agg, df_ttest = ttest(
    value='accuracy',
    df=df_selected,
    by=['n_dims', 'size', 'type'],
    test_conds=[
        ('poisson', m) for m in
        df_selected['type'].unique()
        if m != 'poisson'],
    method='fdr_bh',
    alpha=0.01,
)

In [15]:
df_ttest

Unnamed: 0,n_dims,size,cond1,cond2,t,pvals,pvals_fdr_bh,reject
0,10,200,poisson,categorical,169.881202,0.0,0.0,True
1,10,200,poisson,gaussian,182.39875,0.0,0.0,True
2,10,200,poisson,gaussian-exp,165.188618,0.0,0.0,True
3,10,200,poisson,gaussian-relu,145.069923,0.0,0.0,True
4,10,200,poisson,laplace,82.033128,0.0,0.0,True
5,10,1000,poisson,categorical,240.189356,0.0,0.0,True
6,10,1000,poisson,gaussian,221.416368,0.0,0.0,True
7,10,1000,poisson,gaussian-exp,203.872618,0.0,0.0,True
8,10,1000,poisson,gaussian-relu,161.395089,0.0,0.0,True
9,10,1000,poisson,laplace,132.399649,0.0,0.0,True


In [16]:
df_ttest.pivot(index=['n_dims', 'cond2'], columns='size', values='reject')

Unnamed: 0_level_0,size,200,1000,5000
n_dims,cond2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10,categorical,True,True,True
10,gaussian,True,True,True
10,gaussian-exp,True,True,True
10,gaussian-relu,True,True,True
10,laplace,True,True,False
50,categorical,True,True,True
50,gaussian,True,True,True
50,gaussian-exp,True,True,True
50,gaussian-relu,True,True,True
50,laplace,True,True,True


In [17]:
for index, values in df_agg.items():
    print(index, len(values))
    break

### ci=0.99

In [18]:
%%time

for dim in [10, 50, 100]:
    table = table_entry_knn(dim, df_agg, n_resamples=int(1e5), fmt='0.3f')
    print(f"——— dim: {dim} ———\n")
    print(table)
    print('\n\n\n')

CPU times: user 2min 37s, sys: 37.6 s, total: 3min 14s
Wall time: 3min 14s
