# (11) Jobs: beta fits

**Motivation**: Make beta/n_latent job runnsers as txt file. <br>

In [1]:
# HIDE CODE


import os, sys
from IPython.display import display

# tmp & extras dir
git_dir = os.path.join(os.environ['HOME'], 'Dropbox/git')
extras_dir = os.path.join(git_dir, 'jb-vae/_extras')
fig_base_dir = os.path.join(git_dir, 'jb-vae/figs')
tmp_dir = os.path.join(git_dir, 'jb-vae/tmp')

# GitHub
sys.path.insert(0, os.path.join(git_dir, '_PoissonVAE'))
from analysis.eval import sparse_score
from figures.fighelper import *
from vae.train_vae import *

# warnings, tqdm, & style
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
from rich.jupyter import print
%matplotlib inline
set_style()

## Setup

In [2]:
from analysis.helper import job_runner_script


def _cleanup(path, host=None):
    for f in os.listdir(path):
        cond = f.endswith('.txt')
        if host is not None:
            cond = cond and host in f
        if cond:
            os.remove(pjoin(path, f))


def _name(host, gpu_i, fit_i):
    return f"{host}-cuda{gpu_i}-fit{fit_i}"


def divide_list(lst, n):
    k, m = divmod(len(lst), n)
    return [lst[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)]

In [3]:
save_dir = 'Dropbox/git/_PoissonVAE/scripts'
save_dir = pjoin(os.environ['HOME'], save_dir)
os.makedirs(save_dir, exist_ok=True)

# delete existing job runners?
_cleanup(save_dir, None)

print(sorted(os.listdir(save_dir)))

## Betas (mach)

```<lin|lin>```

In [4]:
host = 'mach'
_cleanup(save_dir, host)

scripts_mach = collections.defaultdict(list)
tot = 0

In [5]:
n_seeds = 5
seeds = range(1, n_seeds + 1)

betas = [0.01, 0.2, 0.4, 0.6, 0.8, 1.0, 1.5, 4.0]
latent_dim = [i * 256 for i in [1, 2, 3, 4]]

In [6]:
combos = itertools.product(
    enumerate(betas), latent_dim, seeds)
for (idx, b), k, s in combos:
    if b == 1.0 and k == 512:
        continue

    arg = ' '.join([
        f"--kl_beta {b}",
        f"--n_latents {k}",
        f"--comment k{k}-b{b:0.2g}",
    ])
    gpu_i = idx // 2

    kws = dict(
        device=gpu_i,
        dataset='DOVES',
        model='poisson',
        archi='lin|lin',
        seed=s,
        args=arg,
    )
    scripts_mach[gpu_i].append(job_runner_script(**kws))
    tot += 1

In [7]:
print(tot)

In [8]:
scripts_mach = dict(scripts_mach)
print({k: len(v) for k, v in scripts_mach.items()})

### Save

In [9]:
n_fits = 8

for gpu_i, scripts in scripts_mach.items():
    scripts_divided = divide_list(scripts, n_fits)
    for fit_i, s in enumerate(scripts_divided):
        combined = ' && '.join(s)
        save_obj(
            obj=combined,
            file_name=_name(host, gpu_i, fit_i),
            save_dir=save_dir,
            mode='txt',
        )

Print one to check

In [10]:
print(combined.replace('&& ', '&& \n'))

In [11]:
print(scripts_mach)

In [12]:
print(scripts_divided)

## Betas (solo + chewie)

```<conv|lin>```

### solo

In [4]:
host = 'solo'
_cleanup(save_dir, host)

scripts_solo = collections.defaultdict(list)
tot = 0

In [5]:
n_seeds = 5
seeds = range(1, n_seeds + 1)

betas = [0.01, 0.2, 0.4, 0.6, 0.8, 1.0, 1.5, 4.0]
latent_dim = [i * 256 for i in [3, 4]]  # [1, 2, 3, 4]]

In [6]:
combos = itertools.product(
    enumerate(betas), latent_dim, seeds)
for (idx, b), k, s in combos:
    if b == 1.0 and k == 512:
        continue

    arg = ' '.join([
        f"--kl_beta {b}",
        f"--n_latents {k}",
        f"--comment k{k}-b{b:0.2g}",
    ])
    gpu_i = idx // 4

    kws = dict(
        device=gpu_i,
        dataset='DOVES',
        model='poisson',
        archi='conv+b|lin',
        seed=s,
        args=arg,
    )
    scripts_solo[gpu_i].append(job_runner_script(**kws))
    tot += 1

In [7]:
print(tot)

In [8]:
scripts_solo = dict(scripts_solo)
print({k: len(v) for k, v in scripts_solo.items()})

#### Save

In [9]:
n_fits = 5

for gpu_i, scripts in scripts_solo.items():
    scripts_divided = divide_list(scripts, n_fits)
    for fit_i, s in enumerate(scripts_divided):
        combined = ' && '.join(s)
        save_obj(
            obj=combined,
            file_name=_name(host, gpu_i, fit_i),
            save_dir=save_dir,
            mode='txt',
        )

Print one to check

In [10]:
print(combined.replace('&& ', '&& \n'))

In [11]:
print(scripts_solo)

### chewie

In [4]:
host = 'chewie'
_cleanup(save_dir, host)

scripts_chewie = collections.defaultdict(list)
tot = 0

In [5]:
n_seeds = 5
seeds = range(1, n_seeds + 1)

betas = [0.01, 0.2, 0.4, 0.6, 0.8, 1.0, 1.5, 4.0]
latent_dim = [i * 256 for i in [1, 2]]  # [1, 2, 3, 4]]

In [6]:
combos = itertools.product(
    enumerate(betas), latent_dim, seeds)
for (idx, b), k, s in combos:
    if b == 1.0 and k == 512:
        continue

    arg = ' '.join([
        f"--kl_beta {b}",
        f"--n_latents {k}",
        f"--comment k{k}-b{b:0.2g}",
    ])
    gpu_i = idx // 4

    kws = dict(
        device=gpu_i,
        dataset='DOVES',
        model='poisson',
        archi='conv+b|lin',
        seed=s,
        args=arg,
    )
    scripts_chewie[gpu_i].append(job_runner_script(**kws))
    tot += 1

In [7]:
print(tot)

In [8]:
scripts_chewie = dict(scripts_chewie)
print({k: len(v) for k, v in scripts_chewie.items()})

#### Save

In [9]:
n_fits = 5

for gpu_i, scripts in scripts_chewie.items():
    scripts_divided = divide_list(scripts, n_fits)
    for fit_i, s in enumerate(scripts_divided):
        combined = ' && '.join(s)
        save_obj(
            obj=combined,
            file_name=_name(host, gpu_i, fit_i),
            save_dir=save_dir,
            mode='txt',
        )

Print one to check

In [10]:
print(combined.replace('&& ', '&& \n'))

In [11]:
print(scripts_chewie)