In [15]:
import torch
import os
from helpers.VAE.modelLoader import load_variational_mgt_model
from bokeh.io import output_notebook
from bokeh.plotting import show
import IPython.display as ipd
import sys
import pickle
import bz2
import plotly.express as px

from hvo_sequence.hvo_seq import HVO_Sequence
from hvo_sequence.drum_mappings import ROLAND_REDUCED_MAPPING
from data.src.dataLoaders import load_gmd_hvo_sequences

import numpy as np
import pandas as pd

import uuid

output_notebook()

In [26]:
with bz2.BZ2File('data/i2dgd/guitar2drum.bz2pickle', 'rb') as file:
    data = pickle.load(file)

In [34]:
model = load_variational_mgt_model("models/drawn-sweep-18/190.pth")

INFO:helpers/VAE/modelLoader.py:Model was loaded to cpu!!!


Offset activation is sigmoid, bias is initialized to 0.5


In [44]:
test = data['test']

inputs = test['inputs']
outputs = test['outputs']
outputs_hvos = test['outputs_hvo_seqs']
outputs_hvos

[<hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967a877f0>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967a87a90>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967a87d30>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967a87fd0>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967a8f2b0>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967a9d370>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967a9d610>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967a9d8b0>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967a9db50>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967a9ddf0>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967aa6250>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967aa64f0>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967aa6790>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967aa6a30>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967aa6cd0>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967aa6f70>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967aad250>,
 <hvo_sequence.hvo_seq.HVO_Sequence at 0x2c967aad4f0>,
 <hvo_sequ

In [38]:
## Utility Methods
def fix_dimensions(hvo_tensor):
  """Adjusts the length of the hvo sequence to 32 time steps.
  Adjustment by truncation or padding with zeros
  Works for tensor of shape (T, N*3) and also (1, T, N*3) 
  where T is time steps and N is the number of voices
  """
  if len(hvo_tensor.shape)==2:
    number_of_steps = hvo_tensor.shape[0]
    if number_of_steps > 32:
      hvo_tensor = hvo_tensor[:32, :]
    elif number_of_steps < 32:
        hvo_tensor = torch.concatenate(
            (hvo_tensor, torch.zeros((32 - number_of_steps, hvo_tensor.shape[-1]))), axis=0)
  else:
    number_of_steps = hvo_tensor.shape[1]
    if number_of_steps > 32:
        hvo_tensor = hvo_tensor[:, :32, :]
    elif number_of_steps < 32:
        hvo_tensor = torch.concatenate(
            (hvo_tensor, torch.zeros((1, 32 - number_of_steps, hvo_tensor.shape[-1]))), axis=1)
  return hvo_tensor

def get_flattened_version(sample_hvo_seq):
  """ Returns a new hvo_sequence class that hold the flattened version 
  --> Useful for plotting and generationg audios 
  """
  groove_temp = sample_hvo_seq.copy_zero()
  groove_temp.hvo = fix_dimensions(sample_hvo_seq.flatten_voices(reduce_dim=False))
  return groove_temp


def plot_and_synthesize(hvo_seq_sample):
  """ Plots the piano roll of the sequence stored in the hvo_sequence object
  and also returns the synthesized pattern
  """
  hvo_seq_sample.piano_roll(show_figure=True)
  audio = hvo_seq_sample.synthesize(
      sf_path="hvo_sequence/soundfonts/TamaRockSTAR.sf2")
  return audio

def encode_into_latent_z(model_, in_groove):
  """ returns Z corresponding to a provided groove of shape (1, 32, 3)
  """
  # Generation using an Input Groove
  # Step 1. Get the mean and var of the latent encoding
  mu, logvar = model_.encode_to_mu_logvar(in_groove)
  
  # Step 2. Sample a latent vector (z) from latent distribution
  latent_z = model_.reparametrize(mu, logvar)
    
  return latent_z

def generate_from_groove(model_, in_groove, sampling_thresh=0.4):
  """ returns a full drum pattern based on the provided input groove
  """
  latent_z = encode_into_latent_z(model_, in_groove)

  # Step 3. Generate using the latent encoding 
  return model.sample(
      latent_z=latent_z, 
      voice_thresholds=[sampling_thresh],
      voice_max_count_allowed=[32],
      return_concatenated=True,
      sampling_mode=0)


def decode_z_into_drums(model_, latent_z, sampling_thresh=0.5):
  """ returns a full drum pattern based on a provided latent encoding Z
  """
  return model_.sample(latent_z=torch.tensor(latent_z, dtype=torch.float32), 
                   voice_thresholds=[sampling_thresh]*9,
                   voice_max_count_allowed=[32]*9,
                   return_concatenated=True,
                   sampling_mode=0)


def save_to_midi(hvo_seq_pattern, filename=None):
  """ saves the score in a hvo_seq_pattern obj as midi
  """
  if filename is None:
    filename = str(uuid.uuid4())
  hvo_seq_pattern.save_hvo_to_midi(f"{filename}.mid")
  print(f"saved to {filename}.mid")

In [41]:
groove = torch.tensor(inputs[0], dtype=torch.float32)
groove.shape



torch.Size([32, 3])

In [49]:
# Create the groove
groove = fix_dimensions(torch.tensor(
    [outputs_hvos[1].flatten_voices(reduce_dim=True)], 
    dtype=torch.float32))
print(f"The shape of the groove tensor is: {groove.shape}")
  
  
generated = generate_from_groove(model, groove)
generated.shape

The shape of the groove tensor is: torch.Size([1, 32, 3])


IndexError: index 1 is out of bounds for dimension 2 with size 1

In [23]:
import pandas as pd

data_hits = {
    'Guitar Groove': ['John', 'Alice', 'Bob', 'Alice', 'John'],
    'Original Drum Groove': [25, 30, 27, 25, 28],
    'Predicted Drum Groove': ['New York', 'London', 'Paris', 'London', 'New York']
}

df = pd.DataFrame(data_hits)
df

fig = px.box(df, x="City", y="Age")
fig.show()