In [2]:
import wandb
import os
os.environ["WANDB_SILENT"] = "true"

import numpy as np
import pandas as pd

# import sys
# sys.path.insert(0, "../..")

%cd ..

from helpers import load_model
from model import GenreClassifier
from data.src.dataLoaders import Groove2Drum2BarDataset
from model import BaseVAE, MuteVAE

import torch


from bokeh.palettes import inferno, Category20b
from bokeh.core.enums import MarkerType
from bokeh.plotting import figure, show, save
from bokeh.io import output_notebook, reset_output
# output_notebook()

In [3]:

down_sampled_ratio=None
# load dataset as torch.utils.data.Dataset
dataset = Groove2Drum2BarDataset(
    dataset_setting_json_path="data/dataset_json_settings/Balanced_6000_performed.json",
    subset_tag="test",
    max_len=32,
    tapped_voice_idx=2,
    collapse_tapped_sequence=True,
    num_voice_density_bins=3,
    num_tempo_bins=6,
    num_global_density_bins=7,
    augment_dataset=False,
    force_regenerate=False
)


# Download model, load and Serialize

In [4]:
from helpers import download_model_from_wandb, predict_using_model, load_model
    
# download_model_from_wandb("45", 3, "driven-frost-24", GenreClassifier, new_path="./trained_models/genre_classifier.pth")
# download_model_from_wandb("155", 1, "lively-pond-9", BaseVAE, new_path="./trained_models/base_vae_beta_0_2.pth")
# download_model_from_wandb("405", 0, "polished-pyramid-1", MuteVAE, new_path="./trained_models/mute_vae_beta_0_2.pth")

genre_classifier = load_model("./trained_models/genre_classifier.pth", GenreClassifier)
model_BaseVAE_0_2 = load_model("./trained_models/base_vae_beta_0_2.pth", BaseVAE)
model_MuteVAE_0_2 = load_model("./trained_models/mute_vae_beta_0_2.pth", MuteVAE)

# model_MuteVAE


# model.serialize(save_folder=f"{run_name}", filename=f"Gen_{run_name}_{epoch}_serialized__{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.pt")

In [5]:
from eval.UMAP import TSNEMapper, UMapper
from helpers import predict_using_model

In [6]:
# tsne_mapper = TSNEMapper(identifier_="BaseVAE_beta_0_2")
# _, latents_z = predict_using_model(model_BaseVAE_0_2, dataset)


In [7]:
# tsne_mapper.fit(latents_z, tags_ = [hvo_seq.metadata["style_primary"] for hvo_seq in dataset.hvo_sequences])



In [8]:
# p = tsne_mapper.plot(save_plot=False, show_plot=False)
# from bokeh.plotting import figure, output_file, save, show, reset_output


In [9]:
# show(p)

In [24]:
#umap_mapper = UMapper(identifier_="BaseVAE_beta_0_2", embedding_dims=2, metric="euclidean", n_neighbors=100)
_, latents_z = predict_using_model(model_MuteVAE_0_2, dataset)

In [28]:
import matplotlib.pyplot as plt
import numpy as np

def plot_latent_distributions(latents):
    """
    Plot boxplot distributions of latent values at each dimension.

    Parameters:
    - latents: A 2D NumPy array of shape (n_samples, n_dimensions) where each row represents
               a latent vector and each column corresponds to a dimension.
    """
    # Ensure latents is a NumPy array
    latents = np.array(latents)
    
    # Check if the shape of latents is appropriate
    if latents.ndim != 2 or latents.shape[1] != 128:
        raise ValueError("Latents should be a 2D NumPy array with 128 dimensions.")
    
    # Plotting
    plt.figure(figsize=(10, 5))
    plt.boxplot(latents, notch=True, vert=True)
    plt.title('Boxplot Distribution of Latent Values at Each Dimension')
    plt.xlabel('Latent Dimension Value')
    plt.ylabel('Dimension Index')
    plt.grid(True)
    plt.show()
    
plot_latent_distributions(latents_z.numpy())

# plot

In [23]:
from bokeh.models.glyphs import Line, Patch
from bokeh.io import show, output_notebook
from bokeh.layouts import gridplot
import numpy as np
from scipy.stats import gaussian_kde
from bokeh.models import ColumnDataSource, FixedTicker, Grid, LinearAxis, Plot, Range1d

def make_ridge_plot(latents, start_dim, end_dim, width=900, height=200):
    """
    Create a single ridge plot for a subset of dimensions.
    
    Parameters:
    - latents: 2D NumPy array of latent vectors.
    - start_dim, end_dim: Start and end dimension indices for this plot.
    - width, height: Width and height of the plot.
    """
    # Calculate the gaussian KDE for each dimension in the subset
    kde_list = [gaussian_kde(latents[:, i]) for i in range(start_dim, end_dim)]
    
    # Determine x_range for the plot dynamically based on the latents' range
    x_min, x_max = np.min(latents[:, start_dim:end_dim]), np.max(latents[:, start_dim:end_dim])
    x_range = Range1d(start=x_min, end=x_max)
    y_range = Range1d(start=-3, end=(end_dim-start_dim) + 1)
    
    plot = Plot(width=width, height=height, x_range=x_range, y_range=y_range, title=None)
    
    # Plot each dimension's distribution
    for i, kde in enumerate(kde_list, start=1):
        x = np.linspace(x_min, x_max, 100)
        y = kde(x) * -0.75 + i  # Adjust scale and position for visibility
        source = ColumnDataSource(data=dict(x=x, y=y))
        
        # Filled area under the curve
        plot.add_glyph(source, Patch(x='x', y='y', fill_alpha=0.6, line_color=None))
        
        # Line around the curve
        plot.add_glyph(source, Line(x='x', y='y', line_color="black", line_width=2))
    
    return plot

def plot_ridge_grid(latents, dimensions=128, rows=4, width=900, height=200):
    """
    Create a grid of ridge plots for the latent space dimensions.
    
    Parameters:
    - latents: 2D NumPy array of latent vectors.
    - dimensions: Total number of dimensions. Should be 128 for this use case.
    - rows: Number of rows in the grid.
    - width, height: Width and height of each row plot.
    """
    plots_per_row = dimensions // rows
    grid = []
    
    for i in range(0, dimensions, plots_per_row):
        grid.append(make_ridge_plot(latents, i, min(i+plots_per_row, dimensions), width, height))
    
    grid_layout = gridplot(grid, ncols=4, plot_width=width, plot_height=height)
    
    # Show plot
    output_notebook()  # Display plot within a Jupyter Notebook. Comment out if not using Jupyter.
    show(grid_layout)
    
plot_ridge_grid(latents_z.numpy(), dimensions=128, rows=4, width=100, height=400)

In [0]:
#umap_mapper.fit(latents_z, tags_ = [hvo_seq.metadata["style_primary"] for hvo_seq in dataset.hvo_sequences])
#show(umap_mapper.plot(save_plot=False, show_plot=False))
