# Generating Rhythms with VAEDER

#### Install required packages and utility functions

In [None]:
!apt install fluidsynth
!pip install note-seq
!pip install pyfluidsynth
!pip install wandb
!pip install bokeh==2.4.3
!pip install umap

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
fluidsynth is already the newest version (2.2.5-1).
0 upgraded, 0 newly installed, 0 to remove and 16 not upgraded.
Collecting umap
  Downloading umap-0.1.1.tar.gz (3.2 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: umap
  Building wheel for umap (setup.py) ... [?25l[?25hdone
  Created wheel for umap: filename=umap-0.1.1-py3-none-any.whl size=3541 sha256=a65aa8c0e116691020f6a56090d15d3e8e70570e7749bf967d11ada18256e21c
  Stored in directory: /root/.cache/pip/wheels/15/f1/28/53dcf7a309118ed35d810a5f9cb995217800f3f269ab5771cb
Successfully built umap
Installing collected packages: umap
Successfully installed umap-0.1.1


In [None]:
!git clone -b dev/VAE_Control_Classifiers https://github.com/behzadhaki/GrooveTransformer.git
%cd GrooveTransformer/

fatal: destination path 'GrooveTransformer' already exists and is not an empty directory.
/content/GrooveTransformer


In [None]:

import torch
import os
from helpers.VAE.modelLoader import load_variational_mgt_model
from bokeh.io import output_notebook
from bokeh.plotting import show
import IPython.display as ipd
import sys

from hvo_sequence.hvo_seq import HVO_Sequence
from hvo_sequence.drum_mappings import ROLAND_REDUCED_MAPPING
from data.src.dataLoaders import load_gmd_hvo_sequences

import numpy as np
import pandas as pd

import uuid

output_notebook()

In [None]:
# Utility methods
from model import GrooveControl_VAE


def plot_and_synthesize(hvo_seq_sample):
  """ Plots the piano roll of the sequence stored in the hvo_sequence object
  and also returns the synthesized pattern
  """
  hvo_seq_sample.piano_roll(show_figure=True)
  audio = hvo_seq_sample.synthesize(
      sf_path="hvo_sequence/soundfonts/TamaRockSTAR.sf2")
  return audio


def save_to_midi(hvo_seq_pattern, filename=None):
  """ saves the score in a hvo_seq_pattern obj as midi
  """
  if filename is None:
    filename = str(uuid.uuid4())
  hvo_seq_pattern.save_hvo_to_midi(f"{filename}.mid")
  print(f"saved to {filename}.mid")

def load_vaeder_model(model_path, params_dict=None, is_evaluating=True, device=None,
                      genre_json_path=None):
    try:
        if device is not None:
            loaded_dict = torch.load(model_path, map_location=device)
        else:
            loaded_dict = torch.load(model_path)
    except:
        loaded_dict = torch.load(model_path, map_location=torch.device('cpu'))

    if params_dict is None:
        if 'params' in loaded_dict:
            params_dict = loaded_dict['params']

            if isinstance(genre_json_path, str):
                with open(genre_json_path, 'r') as f:
                    genre_dict = json.load(f)
                    params_dict['genre_dict'] = genre_dict

        else:
            raise Exception(f"Could not instantiate model as params_dict is not found. "
                            f"Please provide a params_dict either as a json path or as a dictionary")

    if isinstance(params_dict, str):
        with open(params_dict, 'r') as f:
            params_dict = json.load(f)

    model = GrooveControl_VAE(params_dict)
    model.load_state_dict(loaded_dict["model_state_dict"])
    if is_evaluating:
        model.eval()

    return model

def create_genre_onehot_tensor(genre_label, genre_dict):
  genre_id = genre_dict[genre_label]
  return torch.nn.functional.one_hot(torch.tensor([genre_id]),
                                                  num_classes=len(genre_dict)).to(dtype=torch.float32)

def get_flattened_version(sample_hvo_seq):
  """ Returns a new hvo_sequence class that hold the flattened version
  --> Useful for plotting and generationg audios
  """
  groove_temp = sample_hvo_seq.copy_zero()
  #groove_temp.hvo = fix_dimensions(sample_hvo_seq.flatten_voices(reduce_dim=False))
  groove_temp.hvo = sample_hvo_seq.flatten_voices(reduce_dim=False)
  return groove_temp


def slice_and_convert_to_tensor(arr):
    if not isinstance(arr, np.ndarray):
        raise ValueError("Input should be a numpy array")

    if arr.ndim != 2:
        raise ValueError("Input should be a 2D numpy array")

    # Padding or truncation
    if arr.shape[0] < 32:
        pad_length = 32 - arr.shape[0]
        arr = np.pad(arr, ((0, pad_length), (0, 0)), mode='constant', constant_values=0)
    elif arr.shape[0] > 32:
        arr = arr[:32, :]

    tensor = torch.from_numpy(arr)
    return tensor



## Download a model

You can find the selection of final ablation models [here](https://wandb.ai/mmil_julian/ControlAdversarial/reports/Classic-31-Final-Evals--Vmlldzo1MTk2MDg0)

There is also a full selection of VAEDER models available [here](https://wandb.ai/mmil_julian/ControlAdversarial)

For this demo, we use "earthy_armadillo_149" at epoch 210, which is presented as the  **base model** in the evaluations.





In [None]:
url = "https://api.wandb.ai/artifactsV2/gcp-us/mmil_julian/QXJ0aWZhY3Q6NTQyNjA4OTI0/c5b01b5529031465d9c77277a0793a00/210.pth"
!wget {url}
!mv 210.pth vaeder.pth
!ls

--2023-09-07 14:38:59--  https://api.wandb.ai/artifactsV2/gcp-us/mmil_julian/QXJ0aWZhY3Q6NTQyNjA4OTI0/c5b01b5529031465d9c77277a0793a00/210.pth
Resolving api.wandb.ai (api.wandb.ai)... 35.186.228.49
Connecting to api.wandb.ai (api.wandb.ai)|35.186.228.49|:443... connected.
HTTP request sent, awaiting response... 307 Temporary Redirect
Location: https://storage.googleapis.com/wandb-artifacts-prod/wandb_artifacts/85038362/542608924/c5b01b5529031465d9c77277a0793a00?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gorilla-files-url-signer-man%40wandb-production.iam.gserviceaccount.com%2F20230907%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20230907T143859Z&X-Goog-Expires=3599&X-Goog-Signature=31444a93c1dc96a1e9878a232599192c52cabcb65c452fca345417f3e978bbec45475ab1fa25346d3034c283dcfd76600e5cfe1b32cbfea4bef2f36294c55667c0560174eabff49fa82e0d96306354b658f725cf31342a4d92959733417f92e4f333aeea36284e02ffb0651ca4233e471e6227828c9f2536d53b76f350e6ea4747b61221a93a555176b03296b621b0232e24a9e13f0c

# Random Generation
Now that we have the model downloaded, let's initialize a random z vector, as well as specify our density, intensity and genre control parameters

In [None]:
model = load_vaeder_model("vaeder.pth")
latent_dim = model.get_latent_dim()
genre_dict = model.get_genre_dict()
z = torch.rand(1, latent_dim).to(dtype=torch.float32)


Using In-Attention: 1


In [None]:
# You can change these values below to see how it responds
density = torch.Tensor([0.65])
intensity = torch.Tensor([0.5])
genre = create_genre_onehot_tensor("rock", genre_dict)


In [None]:

h, v, o = model.decode(z, density, intensity, genre)
hvo = torch.cat([h, v, o], dim=-1).squeeze().cpu().numpy()
print(hvo.shape)
hvo_seq = HVO_Sequence(beat_division_factors=[4],
                       drum_mapping=ROLAND_REDUCED_MAPPING)
hvo_seq.add_time_signature(time_step=0, numerator=4, denominator=4)
hvo_seq.add_tempo(time_step=0, qpm=120)
hvo_seq.hvo = hvo
audio = plot_and_synthesize(hvo_seq)
ipd.Audio(audio, rate=44100, autoplay=False)

(32, 27)


# Tap2Drum

Now we will load a drum loop from our test set, collapse it, and feed it through our full model

In [None]:
# Loads all patterns in the test set as a custom class (called HVO_Sequence)
# HVO_Sequence objects contain the score (accessible using .hvo field)
# and also allow for easy inspection/plotting/synthesizing the scores using
# built in functionalities
# Moreover, metadata of a sample (if any) are also available (using .metadata)
# field
test_set = load_gmd_hvo_sequences(
    dataset_setting_json_path = "data/dataset_json_settings/4_4_Beats_gmd.json",
    subset_tag = "test",
    force_regenerate=False)

# grab a sample
ix = 25
gt_sample = test_set[ix]
print(gt_sample.metadata)

# plot and play
audio_gt = plot_and_synthesize(gt_sample)
ipd.Audio(audio_gt, rate=44100, autoplay=False)

{'Source': 'Groove MIDI Dataset', 'drummer': 'drummer1', 'session': 'eval_session', 'loop_id': 'drummer1/eval_session/1:010', 'master_id': 'drummer1/eval_session/1', 'style_primary': 'funk', 'style_secondary': 'groove1', 'bpm': '138', 'beat_type': 'beat', 'time_signature': '4-4', 'full_midi_filename': 'drummer1/eval_session/1_funk-groove1_138_beat_4-4.mid', 'full_audio_filename': 'drummer1/eval_session/1_funk-groove1_138_beat_4-4.wav'}


In [None]:
# Get the flattened version and play it
groove_hvo_seq = get_flattened_version(gt_sample)
audio_groove = plot_and_synthesize(groove_hvo_seq)
ipd.Audio(audio_groove, rate=44100, autoplay=False)

#### Prepare our Model Inputs

In [None]:
density = torch.Tensor([0.8])
intensity = torch.Tensor([0.25])
genre = create_genre_onehot_tensor("jazz", genre_dict)


from copy import deepcopy
tapped_seq = deepcopy(gt_sample)
tapped_seq = tapped_seq.flatten_voices(reduce_dim=True)
tapped_input = slice_and_convert_to_tensor(tapped_seq).unsqueeze(dim=0).to(dtype=torch.float32)
print(tapped_input.shape)



torch.Size([1, 32, 3])
torch.Size([1, 32, 27])


#### Run inference and visualize the results

In [None]:
hvo, _, _, _ = model.predict(tapped_input, density, intensity, genre, return_concatenated=True)
print(hvo.shape)

output_seq = deepcopy(gt_sample)
output_seq.hvo = hvo.squeeze().cpu().numpy()
audio_groove = plot_and_synthesize(output_seq)
ipd.Audio(audio_groove, rate=44100, autoplay=False)

torch.Size([1, 32, 27])
