In [None]:
import wandb
import os
os.environ["WANDB_SILENT"] = "true"

import numpy as np
import pandas as pd

# import sys
# sys.path.insert(0, "../..")

%cd ../..

from helpers import load_variational_mgt_model
from model import GrooveTransformerEncoderVAE
import torch

from umap import UMAP

from bokeh.palettes import inferno, Category20b
from bokeh.core.enums import MarkerType
from bokeh.plotting import figure, show, save
from bokeh.io import output_notebook, reset_output
# output_notebook()

from helpers.VAE.eval_utils import UMapper

def generate_umap(
        groove_transformer_vae, test_dataset, subset_name,
        collapse_tapped_sequence):
    """
    Generate the umap for the given model and dataset setting.
    Args:
        :param groove_transformer_vae: The model to be evaluated
        :param device: The device to be used for evaluation
        :param dataset_setting_json_path: The path to the dataset setting json file
        :param subset_name: The name of the subset to be evaluated
        :param collapse_tapped_sequence: Whether to collapse the tapped sequence or not (input will have 1 voice only)

    Returns:
        dictionary ready to be logged by wandb {f"{subset_name}_{umap}": wandb.Html}
    """

    # and model is correct type
    assert isinstance(groove_transformer_vae, GrooveTransformerEncoderVAE)

    in_groove = torch.tensor(
        np.array([hvo_seq.flatten_voices(reduce_dim=collapse_tapped_sequence)
                  for hvo_seq in test_dataset.hvo_sequences]), dtype=torch.float32)
        
    tags = [hvo_seq.metadata["style_primary"] for hvo_seq in test_dataset.hvo_sequences]

    _, _, _, latents_z = groove_transformer_vae.predict(in_groove, return_concatenated=True)

    umapper = UMapper(subset_name)
    umapper.fit(latents_z.detach().cpu().numpy(), tags_=tags)
    p = umapper.plot(show_plot=False, prepare_for_wandb=False)
    return {f"{subset_name}_umap": p}


# Download model and load

In [14]:
run = wandb.init()

epoch = 160
run_name = f"apricot-sweep-56_ep{epoch}"
artifact_path = f"nime2022_anon/beta_annealing_study/model_epoch_{epoch}:v60"
epoch = artifact_path.split("model_epoch_")[-1].split(":")[0]

artifact = run.use_artifact(artifact_path, type='model')
artifact_dir = artifact.download()
model = load_variational_mgt_model(os.path.join(artifact_dir, f"{epoch}.pth"))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03347656726837158, max=1.0)…

INFO:helpers/VAE/modelLoader.py:Model was loaded to cpu!!!


Offset activation is sigmoid, bias is initialized to 0.5


# Inspect Model (Visualize UMAP)

In [17]:
from data.src.dataLoaders import MonotonicGrooveDataset

test_dataset = MonotonicGrooveDataset(
        dataset_setting_json_path="data/dataset_json_settings/4_4_Beats_gmd.json",
        subset_tag="test",
        max_len=32,
        tapped_voice_idx=2,
        collapse_tapped_sequence=True,
        down_sampled_ratio=None,
        move_all_to_gpu=False
    )

INFO:data.Base.dataLoaders:Loading gmd dataset
INFO:data.Base.dataLoaders:Loading Cached Version from: data/gmd/resources/cached/beat_division_factor_[4]/drum_mapping_label_ROLAND_REDUCED_MAPPING/beat_type_['beat']_time_signature_['4-4']
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2054/2054 [00:00<00:00, 2328.57it/s]
INFO:data.Base.dataLoaders:Loaded 2054 sequences


In [18]:
umap = generate_umap(model, test_dataset, run_name, True)

try:
    reset_output()
    output_notebook()
    show(umap[f'{run_name}_umap'])
except:
    output_notebook()
    show(umap[f'{run_name}_umap'])


INFO:bokeh.io.state:Session output file 'misc/apricot-sweep-56_ep160_umap.html' already exists, will be overwritten.


# Serialize if meets your requirements

In [19]:
model.serialize(save_folder=f"{run_name}_{epoch}")