# (12) Jobs: KNN

**Motivation**: Make fig (```knn```, only conv+mnist) job runnsers as txt file. <br>

In [1]:
# HIDE CODE


import os, sys
from IPython.display import display

# tmp & extras dir
git_dir = os.path.join(os.environ['HOME'], 'Dropbox/git')
extras_dir = os.path.join(git_dir, 'jb-vae/_extras')
fig_base_dir = os.path.join(git_dir, 'jb-vae/figs')
tmp_dir = os.path.join(git_dir, 'jb-vae/tmp')

# GitHub
sys.path.insert(0, os.path.join(git_dir, '_PoissonVAE'))
from analysis.eval import sparse_score
from figures.fighelper import *
from vae.train_vae import *

# warnings, tqdm, & style
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
from rich.jupyter import print
%matplotlib inline
set_style()

## Setup

In [2]:
from analysis.helper import job_runner_script


def _cleanup(path, host=None):
    for f in os.listdir(path):
        cond = f.endswith('.txt')
        if host is not None:
            cond = cond and host in f
        if cond:
            os.remove(pjoin(path, f))


def _name(host, gpu_i, fit_i):
    return f"{host}-cuda{gpu_i}-fit{fit_i}"


def divide_list(lst, n):
    k, m = divmod(len(lst), n)
    return [lst[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)]

In [3]:
save_dir = 'Dropbox/git/_PoissonVAE/scripts'
save_dir = pjoin(os.environ['HOME'], save_dir)
os.makedirs(save_dir, exist_ok=True)

_cleanup(save_dir)

print(sorted(os.listdir(save_dir)))

In [4]:
n_seeds = 5
seeds = range(1, n_seeds + 1)

## KNN experiments

This is a reduced set of experiments.
- Only MNIST
- Only CONV

Explores model dimensionality: ```n_latents = 5, 10, 20, 50, 100```

In [5]:
host = 'mach'
_cleanup(save_dir, host)

n_latents = [5, 10, 20, 50, 100]
model_types = ['poisson', 'categorical', 'gaussian', 'laplace']
archi = 'conv+b|conv+b'
dataset = 'MNIST'

scripts_mach = collections.defaultdict(list)
tot = 0

In [6]:
combos_outer = itertools.product(
    enumerate(model_types), n_latents)
for (gpu_i, m), k in combos_outer:
    if m == 'categorical':
        arg = f"--n_latents 1 --n_categories {k}"
    else:
        arg = f"--n_latents {k}"
    arg = f"{arg} --cudnn_bench"
    for s in seeds:
        scripts_mach[gpu_i].append(job_runner_script(
            device=gpu_i,
            dataset=dataset,
            archi=archi,
            args=arg,
            model=m,
            seed=s,
        ))
        tot += 1

In [7]:
print(tot)

In [8]:
scripts_mach = dict(scripts_mach)
print({k: len(v) for k, v in scripts_mach.items()})

In [9]:
n_fits = 5

for gpu_i, scripts in scripts_mach.items():
    scripts_divided = divide_list(scripts, n_fits)
    for fit_i, s in enumerate(scripts_divided):
        combined = ' && '.join(s)
        save_obj(
            obj=combined,
            file_name=_name(host, gpu_i, fit_i),
            save_dir=save_dir,
            mode='txt',
        )

In [10]:
print(combined.replace('&& ', '&& \n'))

In [12]:
print(scripts_mach)

## Gausian w/ activation function

- Apply relu, softplus, square, and exp.
- Only do ```k=10, k=50```, use solo or chewie

In [5]:
host = 'chewie'
_cleanup(save_dir, host)

activations = ['relu', 'softplus', 'square', 'exp']
model_types = ['gaussian', 'laplace']
n_latents = [10, 50]

scripts_chewie = collections.defaultdict(list)
tot = 0

In [6]:
combos_outer = itertools.product(
    enumerate(n_latents), activations)
for (gpu_i, k), fn in combos_outer:
    # arg
    arg = f"--n_latents {k} --latent_act {fn} --comment {fn}"
    if fn == 'exp':
        arg = f"{arg} --init_scale 0.0001"

    combos_inner = itertools.product(
        seeds, model_types)
    for s, m in combos_inner:
        scripts_chewie[gpu_i].append(job_runner_script(
            device=gpu_i,
            archi='conv+b|conv+b',
            dataset='MNIST',
            args=arg,
            model=m,
            seed=s,
        ))
        tot += 1

In [7]:
print(tot)

In [8]:
scripts_chewie = dict(scripts_chewie)
print({k: len(v) for k, v in scripts_chewie.items()})

In [9]:
n_fits = 5

for gpu_i, scripts in scripts_chewie.items():
    scripts_divided = divide_list(scripts, n_fits)
    for fit_i, s in enumerate(scripts_divided):
        combined = ' && '.join(s)
        save_obj(
            obj=combined,
            file_name=_name(host, gpu_i, fit_i),
            save_dir=save_dir,
            mode='txt',
        )

In [10]:
print(combined.replace('&& ', '&& \n'))

In [11]:
print(scripts_chewie)

## Gausian w/ activation function

- Apply relu and exp
- Only do ```k=100```
- use mach

In [5]:
host = 'mach'
_cleanup(save_dir, host)

scripts_mach = collections.defaultdict(list)
tot = 0

In [6]:
k = 100

for fn in ['relu', 'exp']:
    # arg
    arg = f"--n_latents {k} --latent_act {fn} --comment {fn}"
    if fn == 'exp':
        arg = f"{arg} --init_scale 0.0001"
    arg = f"{arg} --cudnn_bench"
    
    for s in seeds:
        gpu_i = tot % 4
        scripts_mach[gpu_i].append(job_runner_script(
            device=gpu_i,
            archi='conv+b|conv+b',
            dataset='MNIST',
            model='gaussian',
            args=arg,
            seed=s,
        ))
        tot += 1

In [7]:
print(tot)

In [8]:
scripts_mach = dict(scripts_mach)
print({k: len(v) for k, v in scripts_mach.items()})

In [9]:
for gpu_i, scripts in scripts_mach.items():
    for fit_i, s in enumerate(scripts):
        save_obj(
            obj=s,
            file_name=_name(host, gpu_i, fit_i),
            save_dir=save_dir,
            mode='txt',
        )

In [10]:
print(s)

In [11]:
print(scripts_mach)