# (24) Untangle -- NVAE

**Motivation**: <br>

In [1]:
# HIDE CODE


import os, sys
from IPython.display import display

# tmp & extras dir
git_dir = os.path.join(os.environ['HOME'], 'Dropbox/git')
extras_dir = os.path.join(git_dir, 'jb-MTMST/_extras')
fig_base_dir = os.path.join(git_dir, 'jb-MTMST/figs')
tmp_dir = os.path.join(git_dir, 'jb-MTMST/tmp')

# Code path
sys.path.insert(0, '/home/hadi/Documents/MTMST/code')
from vae.train_vae import TrainerVAE, ConfigTrainVAE
from vae.vae2d import VAE, ConfigVAE
from figures.fighelper import *
from analysis.glm import *


# warnings, tqdm, & style
warnings.filterwarnings('ignore', category=DeprecationWarning)
from rich.jupyter import print
%matplotlib inline
set_style()

## Check the cNVAE vs. NVAE vs. cNAE numbers

In [2]:
g, select_lbl = prep_rofl('fixate1')
print(
    {k: v.shape for k, v in g.items()},
    select_lbl == list(LBL2TEX),
)

In [3]:
%%time

models = {
    'cNVAE': 'fixate1_nf-420_beta-0.15_(2023_04_27,20:20)',
    'VAE': 'vanilla_fixate1_nf-420_beta-0.15_(2023_05_02,15:15)',
    'NVAE': (
        'fixate1_k-32_z-1x[2,3,6]_enc(3x3)-dec(2x2)-pre(1x3)-post(1x3)_noncmprs',
        'NVAE_ep160-b600-lr(0.002)_beta(0.15:0x0.5)_lamb(0.0001)_gr(250.0)_(2023_08_07,10:30)'),
    'cNAE': 'fixate1_nf-420_beta-ae_(2023_05_12,07:31)',
}

trainers = {}
for mod_name, f in models.items():
    if isinstance(f, str):
        trainers[mod_name] = load_model_lite(
            path=pjoin(results_dir(f), 'Trainer'),
            device='cuda',
            strict=False,
        )[0]
    elif isinstance(f, tuple):
        trainers[mod_name] = load_model(*f, device='cuda')[0]

CPU times: user 1min 35s, sys: 4.86 s, total: 1min 40s
Wall time: 14.9 s


In [4]:
%%time

r2_scores = {}
for mod_name, tr in trainers.items():
    z = {
        k: tr.forward(k, True)[0]['z']
        for k in ['vld', 'tst']
    }
    lr = sk_linear.LinearRegression().fit(
        z['vld'], g['vld'])
    r2_scores[mod_name] = sk_metric.r2_score(
        y_true=g['tst'],
        y_pred=lr.predict(z['tst']),
        multioutput='raw_values',
    )
r2_scores = {
    k: {lbl: r2 for lbl, r2 in zip(select_lbl, v)}
    for k, v in r2_scores.items()
}

CPU times: user 2min 26s, sys: 14.2 s, total: 2min 40s
Wall time: 2min 24s


In [5]:
save_obj(
    obj=r2_scores,
    file_name='r2_all_mods',
    save_dir=pjoin(tmp_dir, 'fig4_untangle_material'),
    mode='npy',
);

In [6]:
print(r2_scores)

In [7]:
print({k: np.mean(list(v.values())) for k, v in r2_scores.items()})

In [8]:
trainers['NVAE'].model.cfg.name()

'fixate1_k-32_z-1x[2,3,6]_enc(3x3)-dec(2x2)-pre(1x3)-post(1x3)_noncmprs'

## Check the cNVAE (beta = 0.8) vs. VAE (beta = 5.0) thing

In [2]:
path = pjoin(tmp_dir, 'results_combined')

fits = []
with open(pjoin(path, 'fits.txt'), 'r') as f:
    for line in f:
        if '# computer name' in line:
            continue
        fits.append(line.split('\n')[0])

In [3]:
skip_fits = [
    'fixate1_nf-420_beta-0.8_(2023_05_07,04:15)',
]
fits = sorted([
    f for f in fits
    if 'fixate1' in f
    and f not in skip_fits
])

In [4]:
df = []
perf = {}
for fit_name in fits:
    if 'reservoir' in fit_name:
        continue
    _df = f"summary_{fit_name}.df"
    _df = pjoin(tmp_dir, 'results_combined', _df)
    _df = pd.read_pickle(_df)
    assert len(_df) == 141
    k = _df['category'].unique().item()
    if 'vanilla' in fit_name:
        k = f"vanilla-{k}"
        _df['category'] = k
    perf[k] = _df.perf.values
    df.append(_df)
df = pd.concat(df)

In [5]:
df.groupby(['category', 'beta']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,nf,cell,perf,max_perf,log_alpha,best_lag
category,beta,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
fixate1,0.01,420.0,5.439716,0.474176,0.850839,1.297872,4.070922
fixate1,0.1,420.0,5.439716,0.453431,0.850839,2.22695,4.41844
fixate1,0.15,420.0,5.439716,0.484689,0.850839,2.326241,4.304965
fixate1,0.2,420.0,5.439716,0.482407,0.850839,2.106383,4.29078
fixate1,0.3,420.0,5.439716,0.497061,0.850839,1.156028,4.156028
fixate1,0.4,420.0,5.439716,0.49274,0.850839,1.51773,4.48227
fixate1,0.5,420.0,5.439716,0.506407,0.850839,1.609929,4.163121
fixate1,0.6,420.0,5.439716,0.502729,0.850839,0.283688,4.177305
fixate1,0.7,420.0,5.439716,0.479763,0.850839,0.702128,4.312057
fixate1,0.8,420.0,5.439716,0.517225,0.850839,-3.595745,4.134752


In [16]:
a = df.loc[
    (df['category'] == 'fixate1') &
    (df['beta'] == 0.8), 'perf'
].values

b = df.loc[
    (df['category'] == 'vanilla-fixate1') &
    (df['beta'] == 5.0), 'perf'
].values

In [22]:
sp_stats.ttest_rel(a, b)

TtestResult(statistic=1.178167748329819, pvalue=0.2407284723498203, df=140)

## Save bash scripts

In [2]:
args = [
    f'--lesion_enc {s}'
    for s in [2, 4, 8]
]
args += [
    f'--lesion_dec {s}'
    for s in [2, 4, 8]
]
args += [None]
args

['--lesion_enc 2',
 '--lesion_enc 4',
 '--lesion_enc 8',
 '--lesion_dec 2',
 '--lesion_dec 4',
 '--lesion_dec 8',
 None]

In [3]:
fits = ['/'.join([
    'fixate1_k-32_z-20x[3,6,12]_enc(1x3)-dec(1x2)-pre(1x3)-post(1x3)',
    'ep160-b600-lr(0.002)_beta(0.5:0x0.5)_lamb(0.0001)_gr(250.0)_(2023_04_29,17:52)',
])] * 7

save_script_neural(
    fits=fits,
    args=args,
    device='cuda',
)

In [3]:
tr, meta = load_model(
    'fixate1_k-32_z-20x[3,6,12]_enc(1x3)-dec(1x2)-pre(1x3)-post(1x3)',
    'ep160-b600-lr(0.002)_beta(0.5:0x0.5)_lamb(0.0001)_gr(250.0)_(2023_04_29,17:52)',
)

In [4]:
meta

{'checkpoint': None,
 'global_step': None,
 'file': 'VAE+TrainerVAE_(2023_04_30,20:06).pt',
 'path': '/home/hadi/Documents/MTMST/models/fixate1_k-32_z-20x[3,6,12]_enc(1x3)-dec(1x2)-pre(1x3)-post(1x3)/ep160-b600-lr(0.002)_beta(0.5:0x0.5)_lamb(0.0001)_gr(250.0)_(2023_04_29,17:52)'}

In [2]:
vae = VAE(ConfigVAE('fixate1', compress=True, save=False))
vae.total_latents()

420

In [4]:
latents = vae.sample()[1]
z = flat_cat(latents)
z.size()

torch.Size([1024, 420])

In [5]:
vae = VAE(ConfigVAE('fixate1', compress=False, save=False))
vae.total_latents()

17520

In [6]:
latents = vae.sample()[1]
z = flat_cat(latents)
z.size()

torch.Size([1024, 17520])