# (26) lca - retrieve

**Motivation**: host = ```mach```, device = ```cuda:2``` <br>

In [1]:
# HIDE CODE


import os, sys
from IPython.display import display

# tmp & extras dir
git_dir = os.path.join(os.environ['HOME'], 'Dropbox/git')
extras_dir = os.path.join(git_dir, 'jb-vae/_extras')
fig_base_dir = os.path.join(git_dir, 'jb-vae/figs')
tmp_dir = os.path.join(git_dir, 'jb-vae/tmp')

# GitHub
sys.path.insert(0, os.path.join(git_dir, '_PoissonVAE'))
from analysis.eval import sparse_score
from figures.fighelper import *
from vae.train_vae import *

# warnings, tqdm, & style
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)
from rich.jupyter import print
%matplotlib inline
set_style()

In [3]:
device_idx = 2
device = f'cuda:{device_idx}'

print(f"device: {device}  ———  host: {os.uname().nodename}")

In [11]:
def sc_extract_info(fname: str):
    type, lamb, lr, iter, seed = fname.split('_')
    if type == 'fista':
        type = 'ista'
        lr = lr.split(':')[-1]
    else:
        lr = lr.split('-')[1]
    info = dict(
        type=str(type),
        lr=float(lr),
        iter=int(iter.split('-')[1]),
        lamb=float(lamb.split(':')[-1]),
        seed=int(seed.split('.')[0].split('-')[1]),
    )
    return info


def analyze_fits_sc(fits: str):
    df_sc = collections.defaultdict(list)
    for fname in tqdm(fits, ncols=60):
        vals = sc_extract_info(fname)
        if fname in best_lca_seeds:
            quality = 'best'
        elif fname.replace('.npz', '') in selected_lca:
            quality = 'good'
        else:
            quality = 'bad'
        vals['quality'] = quality
        # load
        run = run = np.load(pjoin(root, fname))
        y, z = run['recons'], run['activations']
        # recon
        vals['mse_map'] = tr.model.loss_recon(
            y=tr.to(y.reshape(-1, 1, 16, 16)),
            x=tr.dl_vld.dataset.tensors[0],
        ).mean().item()
        # sparse score
        lifetime, population, _ = sparse_score(
            z.astype('float32'))
        vals['lifetime'] = lifetime.mean()
        vals['population'] = population.mean()
        # add values
        for k, v in vals.items():
            df_sc[k].append(v)
    
    df_sc = pd.DataFrame(df_sc)
    return df_sc

## Load LCA results

In [12]:
root = add_home('Dropbox/chkpts/LCA')
fits_lca = sorted(os.listdir(root))

print({'lca': len(os.listdir(root))})

In [13]:
selected_lca = os.listdir(pjoin(fig_base_dir, 'selected_ista_lca'))
selected_lca = [f.replace('.png', '') for f in sorted(selected_lca)]
selected_lca = [f for f in selected_lca if f.startswith('lca')]

print(len(selected_lca))

In [14]:
best_lca = ['lca_lamb-0.05:0.7:0.1_lr-0.01_iter-900_seed-1']

best_lca_seeds = []
for target_string in best_lca:
    pattern = target_string.rsplit('_', 1)[0] + '_seed-\d+'
    matches = [s for s in fits_lca if re.match(pattern, s)]
    best_lca_seeds.extend(matches)
print(best_lca_seeds)

In [15]:
tr = TrainerVAE(
    MODEL_CLASSES['poisson'](CFG_CLASSES['poisson'](dataset='vH16', save=False)),
    ConfigTrainVAE(),
    device=device,
)

In [16]:
df_lca = analyze_fits_sc(fits_lca)

  0%|                               | 0/330 [00:00<?, ?it/s]


RuntimeError: The size of tensor a (28224) must match the size of tensor b (25811) at non-singleton dimension 0

In [17]:
tr.dl_vld.dataset.tensors[0].shape

torch.Size([25811, 1, 16, 16])