<a href="https://colab.research.google.com/github/jmhuer/shift_invariant_dictionary_learning/blob/main/teporal_struc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pretty_midi
!git clone https://github.com/jmhuer/ModularSparseAutoencoder
!git clone https://github.com/music-x-lab/POP909-Dataset
%cd /content/POP909-Dataset/data_process
!pip install libfmp

Collecting pretty_midi
  Downloading pretty_midi-0.2.9.tar.gz (5.6 MB)
[K     |████████████████████████████████| 5.6 MB 13.8 MB/s 
Collecting mido>=1.1.16
  Downloading mido-1.2.10-py2.py3-none-any.whl (51 kB)
[K     |████████████████████████████████| 51 kB 7.5 MB/s 
Building wheels for collected packages: pretty-midi
  Building wheel for pretty-midi (setup.py) ... [?25l[?25hdone
  Created wheel for pretty-midi: filename=pretty_midi-0.2.9-py3-none-any.whl size=5591953 sha256=8a16807346cf9c1fd68d7fd74a00a95923b9dba14337c77bc03cf67b8b664e13
  Stored in directory: /root/.cache/pip/wheels/ad/74/7c/a06473ca8dcb63efb98c1e67667ce39d52100f837835ea18fa
Successfully built pretty-midi
Installing collected packages: mido, pretty-midi
Successfully installed mido-1.2.10 pretty-midi-0.2.9
Cloning into 'ModularSparseAutoencoder'...
remote: Enumerating objects: 163, done.[K
remote: Counting objects: 100% (163/163), done.[K
remote: Compressing objects: 100% (109/109), done.[K
remote: Total 163 (d

In [None]:
#@title Pytorch for DL

import torch.nn.functional as F
import torch.optim as optim
from torch import nn
import torch
from torch.nn.utils import weight_norm
import numpy as np
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("Using device: ", device)


def get_model_parameters(model):
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    return params

In [None]:

import pickle
import os
import sys
# import utils
from processor import MidiEventProcessor
import pretty_midi 
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms



class Pop909(Dataset):
    def __init__(self, midi_paths, parts="all", traspose=True):
      "todo: transpose"
      import csv
      from os import path
      self.total = 0
      self.all_data = self.preprocess_pop909(midi_paths=midi_paths, 
                                               parts=parts, 
                                               traspose=traspose).double()
      print("all_data ", self.all_data.shape)
      self.transform = self.make_transform()
    def __len__(self):
        return len(self.all_data)
    def __getitem__(self, idx):
        piano_roll_slice = self.all_data[idx,:]
        return self.transform["norm"](piano_roll_slice[None][None])
    def make_transform(self):
      mean = self.all_data.mean()
      std = self.all_data.std() * 2
      # print("mean ", mean.shape)
      # print("std ", std.shape)
      tensor_transform = {
        'norm':
            transforms.Compose([
                transforms.Normalize([mean], [std])  # Imagenet standards
            ]),
        "inverse_norm":
              transforms.Normalize(
                mean= [-m/s for m, s in zip([mean], [std])],
                std= [1/s for s in [std]]
               )
           }
      return tensor_transform
    def preprocess_midi(self, path, parts="all"):
        get_index = {
            "all"   : [0,3],
            "melody": [0,1],  
            "bridge": [1,2],  
            "chords": [2,3],  
        }
        index = get_index[parts]
        data = pretty_midi.PrettyMIDI(path)
        main_notes = []
        acc_notes = []
        for ins in data.instruments[index[0]:index[1]]:
            acc_notes.extend(ins.notes)
        for i in range(len(main_notes)):
            main_notes[i].start = round(main_notes[i].start,2)
            main_notes[i].end = round(main_notes[i].end,2)
        for i in range(len(acc_notes)):
            acc_notes[i].start = round(acc_notes[i].start,2)
            acc_notes[i].end = round(acc_notes[i].end,2)
        main_notes.sort(key = lambda x:x.start)
        acc_notes.sort(key = lambda x:x.start)

        piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
        piano = pretty_midi.Instrument(program=piano_program)
        piano.notes.extend(acc_notes)
        # score = prettyn
        # mpr = MidiEventProcessor()
        # repr_seq = mpr.encode([main_notes, acc_notes])
        self.total += 1
        return piano.get_piano_roll()
    def preprocess_pop909(self, midi_paths, parts="all", traspose=True):
        save_py = []
        i = 0
        out_fmt = '{}-{}.data'
        for path in midi_paths:
            if path[len(path)-5:len(path)] != ".xlsx":
              # print(' ', end='[{}]'.format(path), flush=True)
              filename = path + "/"+ path[10:14] + ".mid"
              try:
                  data = torch.tensor(self.preprocess_midi(filename, parts=parts))
              except KeyboardInterrupt:
                  print(' Abort')
                  return
              except EOFError:
                  print('EOF Error')
                  return
              save_py.append(data.T)
        return torch.cat(save_py)


def load_data(midi_paths, parts, num_workers=0, batch_size=32, random_seed = 40):
    '''
    this data loading proccedure assumes dataset/train/ dataset/val/ folders
    also assumes transform dictionary with train and val
    '''
    total = len(midi_paths) 
    index = int(len(midi_paths)*0.9) #_0% of data
                
    dataset_train = Pop909(midi_paths[0:index],parts="all") 
    dataset_val = Pop909(midi_paths[index:total],parts="all") 

    print("Size of train dataset: ",len(dataset_train))
    print("Size of val dataset: ",len(dataset_val))

    dataloaders = {
        'train': DataLoader(dataset_train, batch_size=batch_size, shuffle=False, drop_last=True),
        'val': DataLoader(dataset_val, batch_size=batch_size, shuffle=False, drop_last=True)
    }
    return dataloaders



##only tensor transforms



In [None]:

midi_paths = ["../POP909/" + d for d in os.listdir("../POP909/")] #not index

dataset = load_data(midi_paths=midi_paths, parts="all", batch_size=128)
print(len(dataset["val"])*128 )


In [None]:
""

def piano_roll_to_pretty_midi(piano_roll, fs=100, program=0):
    '''Convert a Piano Roll array into a PrettyMidi object
     with a single instrument.
    Parameters
    ----------
    piano_roll : np.ndarray, shape=(128,frames), dtype=int
        Piano roll of one instrument
    fs : int
        Sampling frequency of the columns, i.e. each column is spaced apart
        by ``1./fs`` seconds.
    program : int
        The program number of the instrument.
    Returns
    -------
    midi_object : pretty_midi.PrettyMIDI
        A pretty_midi.PrettyMIDI class instance describing
        the piano roll.
    '''
    notes, frames = piano_roll.shape
    pm = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=program)

    # pad 1 column of zeros so we can acknowledge inital and ending events
    piano_roll = np.pad(piano_roll, [(0, 0), (1, 1)], 'constant')

    # use changes in velocities to find note on / note off events
    velocity_changes = np.nonzero(np.diff(piano_roll).T)

    # keep track on velocities and note on times
    prev_velocities = np.zeros(notes, dtype=int)
    note_on_time = np.zeros(notes)

    for time, note in zip(*velocity_changes):
        # use time + 1 because of padding above
        velocity = piano_roll[note, time + 1]
        time = time / fs
        if velocity > 0:
            if prev_velocities[note] == 0:
                note_on_time[note] = time
                prev_velocities[note] = velocity
        else:
            pm_note = pretty_midi.Note(
                velocity=prev_velocities[note],
                pitch=note,
                start=note_on_time[note],
                end=time)
            instrument.notes.append(pm_note)
            prev_velocities[note] = 0
    pm.instruments.append(instrument)
    return pm




In [None]:
import IPython.display

# cello_c_chord.write('cello-C-chord.mid')
#lets play a batch 
tr = dataset["val"].dataset.transform["inverse_norm"]
g = tr(next(iter(chord_train_dataset["val"])))[:,0,0,:].T
print(g)
batch = None
for i, ba in enumerate(chord_train_dataset["val"]): 
    if i==101: batch = ba
batch_piano_roll = tr(batch)[:,0,0,:].T
print(batch_piano_roll.shape)
pm = piano_roll_to_pretty_midi(batch_piano_roll)
IPython.display.Audio(pm.synthesize(fs=16000), rate=16000)



#were in bussiness

In [None]:
import argparse
import datetime
import json
import os

import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim



def get_args():
    parser = argparse.ArgumentParser()

    # Architecture Flags
    parser.add_argument('--intermediate_dim', type=int, default=250)
    parser.add_argument('--stripe_dim', type=int, default=5)
    parser.add_argument('--num_stripes', type=int, default=30)
    parser.add_argument('--num_active_neurons', type=int, default=15)
    parser.add_argument('--num_active_stripes', type=int, default=3)
    parser.add_argument('--layer_sparsity_mode', type=str, default='none')  # Set to none, ordinary, boosted, or lifetime.
    parser.add_argument('--stripe_sparsity_mode', type=str, default='routing')  # Set to none, ordinary, or routing.
    parser.add_argument('--distort_prob', type=float, default=.4)  # Probability of stripe sparsity mask bits randomly flipping.
    parser.add_argument('--distort_prob_decay', type=float, default=.025)  # Lowers distort_prob by this amount every epoch.

    # Boosting Flags - Only necessary when layer_sparsity_mode is set to boosted.
    parser.add_argument('--alpha', type=float, default=.8)
    parser.add_argument('--beta', type=float, default=1.2)

    # Routing Flags - Only necessary when stripe_sparsity_mode is set to routing.
    parser.add_argument('--routing_l1_regularization', type=float, default=0.)
    parser.add_argument('--log_average_routing_scores', type=bool, default=True)

    # Lifetime Stripe Flag - Only necessary when stripe_sparsity_mode is set to lifetime.
    # Within a stripe, this specifies the proportion of samples that may activate the stripe.
    parser.add_argument('--active_stripes_per_batch', type=float, default=1.)

    # Training Flags
    parser.add_argument('--lr', type=float, default=.01)
    parser.add_argument('--momentum', type=float, default=.9)
    parser.add_argument('--num_epochs', type=int, default=20)
    parser.add_argument('--batch_size', type=int, default=8)
    parser.add_argument('--data_path', type=str, default='data.csv')
    parser.add_argument('--log_path', type=str, default='logs')
    parser.add_argument('--log_class_specific_losses', type=bool, default=False)
    parser.add_argument('--log_average_activations', type=bool, default=True)
    parser.add_argument('--use_cuda_if_available', type=bool, default=True)

    args, unknown = parser.parse_known_args()
    return vars(args)
args = get_args()
print(args["intermediate_dim"])

In [27]:
from ModularSparseAutoencoder.model import Net
from ModularSparseAutoencoder.train import train

dataset = train_dataset

num_stripes = args['num_stripes']
num_epochs = args['num_epochs']
batch_size = args['batch_size']

device = torch.device('cuda' if torch.cuda.is_available() and args['use_cuda_if_available'] else 'cpu')

net = Net(args['intermediate_dim'],
          args['stripe_dim'],
          args['num_stripes'],
          args['num_active_neurons'],
          args['num_active_stripes'],
          args['layer_sparsity_mode'],
          args['stripe_sparsity_mode'],
          args['distort_prob'],
          args['alpha'],
          args['beta'],
          args['active_stripes_per_batch'],
          device)
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(),
                      lr=args['lr'],
                      momentum=args['momentum'])

timestamp = str(datetime.datetime.now()).replace(' ', '_')
root_path = os.path.join(args['log_path'],
                          args['layer_sparsity_mode'],
                          args['stripe_sparsity_mode'],
                          timestamp)
print(f'Logging results to path:  {root_path}')

distort_prob_decay = args['distort_prob_decay']
routing_l1_regularization = (args['routing_l1_regularization'] if args['stripe_sparsity_mode'] == 'routing' else 0)
log_class_specific_losses = args['log_class_specific_losses']
should_log_average_routing_scores = (
            args['stripe_sparsity_mode'] == 'routing' and args['log_average_routing_scores'])

train(net,
      criterion,
      optimizer,
      root_path,
      dataset,
      num_stripes,
      num_epochs,
      distort_prob_decay,
      routing_l1_regularization,
      log_class_specific_losses,
      should_log_average_routing_scores)

if args['log_average_activations']:
    average_activations_path = os.path.join(root_path, 'average_activations.json')
    with open(average_activations_path, 'w') as f:
        average_activations = net.get_average_activations(X_test, Y_test, device=device).tolist()
        f.write(json.dumps(average_activations))

flag_values_path = os.path.join(root_path, 'experiment_config.json')
with open(flag_values_path, 'w') as f:
    f.write(json.dumps(args))

Logging results to path:  logs/none/routing/2021-09-27_05:29:24.688949


TypeError: ignored

In [None]:
#@title KWTA


class SparsifyBase(nn.Module):
    def __init__(self, sparse_ratio=0.5):
        super(SparsifyBase, self).__init__()
        self.sr = sparse_ratio
        self.preact = None
        self.act = None
    def get_activation(self):
        def hook(model, input, output):
            self.preact = input[0].cpu().detach().clone()
            self.act = output.cpu().detach().clone()
        return hook
    def record_activation(self):
        self.register_forward_hook(self.get_activation())


class Sparsify1D_kactive(SparsifyBase):
    def __init__(self, k=1):
        super(Sparsify1D_kactive, self).__init__()
        self.k = k
    def forward(self, x):
        m = torch.zeros(x.shape).to(device)
        for i in range(self.k):
            # print("shape", x.shape)
            indeces = x.topk(self.k, dim=1)[1][:, i]
            m += torch.mul(torch.zeros(x.shape).to(device).scatter(1, indeces.unsqueeze(1), 1), x)
            # print("\n hi", m )
        return m.double()


class Sparsify2D_vol(SparsifyBase):
    '''cross channel sparsify'''
    def __init__(self, sparse_ratio=0.1):
        super(Sparsify2D_vol, self).__init__()
        self.sr = sparse_ratio
    def forward(self, x):
        # print("x size", x.shape)
        size = x.shape[1]*x.shape[2]*x.shape[3]
        k = int(self.sr*size)
        tmpx = x.view(x.shape[0], -1)
        # print("size after view",tmpx.shape )
        topval = tmpx.topk(k, dim=1)[0][:,-1]
        topval = topval.repeat(tmpx.shape[1], 1).permute(1,0).view_as(x)
        comp = (x>=topval).to(x)
        return comp*x

In [None]:
#@title TCN 

class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()


class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        super(TemporalBlock, self).__init__()
        self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp1 = Chomp1d(padding)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp2 = Chomp1d(padding)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)

        self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
                                 self.conv2, self.chomp2, self.relu2, self.dropout2)
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.relu = nn.ReLU()
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 1)
        self.conv2.weight.data.normal_(0, 1)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 1)

    def forward(self, x):
        # print("block ", x.size())
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)


class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        super(TemporalConvNet, self).__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=(kernel_size-1) * dilation_size, dropout=dropout)]

        self.network = nn.Sequential(*layers)


        # print("last layer conv", self.network[-1].conv2.weight.data[:,0,:].size())
        # print("last layer conv", self.network[-1].conv2.weight.data[:,0,:])

    def forward(self, x):
        return self.network(x)


In [None]:
#@title TCN - Autoeconder 

class TCNAutoencoder(nn.Module):
    def __init__(self, kernel_size, sparse_ratio=0.1):
        super(TCNAutoencoder, self).__init__()
        self.wta = Sparsify2D_vol(sparse_ratio=sparse_ratio)
        # self.feature = TemporalConvNet(128, [128], kernel_size=64, dropout=dropout).double()
        self.encoder = torch.nn.Conv2d(in_channels=1, out_channels=88, kernel_size=(128,64), padding=0, bias=False, stride=64)
        self.decoder = torch.nn.ConvTranspose2d(in_channels=88, out_channels=1, kernel_size=(128,64), padding=0, bias=False, stride=64)
        # self.encoder2 = torch.nn.Conv2d(in_channels=1, out_channels=24, kernel_size=(128,12), padding=0, bias=False, stride=12)
        # self.decoder2 = torch.nn.ConvTranspose2d(in_channels=24, out_channels=1, kernel_size=(128,12), padding=0, bias=False, stride=12)
        # self.encoder2.weight.data.normal_(30)
        # self.encoder.weight.data.normal_(30)
        # self.decoder.weight.data.normal_(300)
        self.relu1 = nn.ReLU()
        self.sig = nn.Sigmoid()
        self.code = None
        # torch.nn.init.xavier_uniform(self.encoder.weight)
        # torch.nn.init.xavier_uniform(self.decoder.weight)
    def get_kernels(self):
        return self.decoder.weight.data[:,0,:]
    def feature_map(self, x):
        code = self.code
        return code
    def forward(self, x):
        # x needs to have dimension (N, C, L) in order to be passed into CNN
        # output = self.feature(x)
        self.code = self.wta(self.sig(self.encoder(x)))
        output = self.decoder(self.code)
        # output = self.decoder(self.code )
        return output

# t = torch.rand((1, 1,128,1000))
# b = TCNAutoencoder(kernel_size = 128, wta_k=5)
# b(t)

# print(a.shape)
# np.savetxt("foo.csv", b.code[0,:,0,:].detach().numpy(), delimiter=",")


In [None]:
#@title GO

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("Using device: ", device)
kernel_size = 64



model = TCNAutoencoder(kernel_size=kernel_size, 
                       sparse_ratio = 0.05).to(device).double()
print("TCNAutoencoder trainable parameters: ", get_model_parameters(model))

# model.load_state_dict(torch.load("/content/model.pth"))


loss_fn = torch.nn.MSELoss().to(device)
# optimizer = optim.SGD(model.parameters(), lr=.0008, weight_decay = 0.00001, momentum=0.05) ##this has weight decay just like you implemented
optimizer = optim.Adam(model.parameters(), lr=.001,  betas=(0.8, 0.999), eps=1e-08, weight_decay=0, amsgrad=True) ##this has weight decay just like you implemented
epochs = 200
history = {"loss": []}


calc = []
total_len = 0
for piano_roll_batch in train_dataset["train"]:
    piano_roll_batch = piano_roll_batch[:,0, 0,:]
    #preprocess
    optimizer.zero_grad()
    output = model(piano_roll_batch)

    loss = loss_fn(piano_roll_batch, output)
    loss.backward()
    # torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
    optimizer.step()
    history["loss"].append(float(loss)*len(chord))
    # print("Loss : {} ".format(float(loss)))
    total_len += len(chord)
    print("Epoch : {} \t Loss : {} ".format(i, round(float(np.mean(history["loss"], axis=0)),9)))
    history["loss"] = []
    total_len = 0

# print(len(calc))
# print(np.mean(calc, axis=0))
# print(np.std(calc, axis=0))


Using device:  cuda
TCNAutoencoder trainable parameters:  1441792
Epoch : 0 	 Loss : 3.257e-06 
Epoch : 1 	 Loss : 2.683e-06 
Epoch : 2 	 Loss : 2.477e-06 
Epoch : 3 	 Loss : 2.251e-06 
Epoch : 4 	 Loss : 2.178e-06 
Epoch : 5 	 Loss : 1.942e-06 
Epoch : 6 	 Loss : 1.864e-06 
Epoch : 7 	 Loss : 1.832e-06 
Epoch : 8 	 Loss : 1.776e-06 
Epoch : 9 	 Loss : 1.678e-06 
Epoch : 10 	 Loss : 1.654e-06 
Epoch : 11 	 Loss : 1.604e-06 
Epoch : 12 	 Loss : 1.569e-06 
Epoch : 13 	 Loss : 1.511e-06 
Epoch : 14 	 Loss : 1.464e-06 
Epoch : 15 	 Loss : 1.463e-06 
Epoch : 16 	 Loss : 1.431e-06 
Epoch : 17 	 Loss : 1.39e-06 
Epoch : 18 	 Loss : 1.368e-06 
Epoch : 19 	 Loss : 1.355e-06 
Epoch : 20 	 Loss : 1.329e-06 
Epoch : 21 	 Loss : 1.316e-06 
Epoch : 22 	 Loss : 1.292e-06 
Epoch : 23 	 Loss : 1.274e-06 
Epoch : 24 	 Loss : 1.267e-06 
Epoch : 25 	 Loss : 1.249e-06 
Epoch : 26 	 Loss : 1.237e-06 
Epoch : 27 	 Loss : 1.223e-06 
Epoch : 28 	 Loss : 1.203e-06 
Epoch : 29 	 Loss : 1.192e-06 
Epoch : 30 	 Lo

KeyboardInterrupt: ignored

In [None]:
#try 2d conv

index_example = 0
# index_example = 876

size = 4000

print("orginal 1")

raw_input =  chord_train_dataset[index_example][None,:, 0:size]

pm = piano_roll_to_pretty_midi(raw_input.cpu().numpy()[0].astype(int))
IPython.display.Audio(pm.synthesize(fs=16000), rate=16000)




orginal 1


In [None]:
raw_input =  chord_train_dataset[index_example][None, None,:, 0:size]


In [None]:
model = TCNAutoencoder(kernel_size=kernel_size, 
                       sparse_ratio = 0.05).to(device).double()
print("TCNAutoencoder trainable parameters: ", get_model_parameters(model))

model.load_state_dict(torch.load("/content/model.pth"))


TCNAutoencoder trainable parameters:  5767168


<All keys matched successfully>

In [None]:


######reconstructed
print("reconstructed")


raw_input = raw_input  / 111.14747885919755
raw_input = raw_input.to(device).double()
# print("train_data size", train_datase.shape)
model_out = model(raw_input)
model_out = model_out * 111.14747885919755
# model_out[model_out < 0] = 0
# print(model_out.cpu().detach().numpy().astype(int))

model_out = model_out.cpu().detach().numpy()[0].astype(int)

model_out = model_out[0]
model_out = (model_out>10) *100

print("model_out size", model_out)

pm = piano_roll_to_pretty_midi(model_out)
IPython.display.Audio(pm.synthesize(fs=16000), rate=16000)


reconstructed
model_out size [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [None]:

torch.save(model.state_dict(), "model.pth")
from google.colab import files
files.download('model.pth')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# model Eval

In [None]:
#few helpers
def get_code(model,input):
    model(input)
    return model.code


def play_example(input):
    input = input.cpu().detach().numpy()[0].astype(int)

    # model_out = model_out[model_out>=0]
    # print("model_out size", model_out)

    pm = piano_roll_to_pretty_midi(input)
    IPython.display.Audio(pm.synthesize(fs=16000), rate=16000)

#make it a keep top n 
def exchange_max_rows(A,B):
    maxrowA = torch.argmax(A.sum(1))
    maxrowB = torch.argmax(B.sum(1))
    rowA = A[maxrowA:maxrowA+1,].clone()
    rowB = B[maxrowB:maxrowB+1,].clone()
    # print("rowA ",rowA )
    # print("rowB ",rowB )
    A[maxrowB:maxrowB+1,] =  rowB
    B[maxrowA:maxrowA+1,] =  rowA
    return A,B

#make it a keep top n 
def keep_topk(A,k):
    mask = torch.zeros(A.shape).to(device)
    v, i  = torch.topk(A.sum(1), k)
    print("\n index is", i)
    mask[i, ] = True
    return mask * A


import torch.nn.functional as F

def get_tokcos(X, y, k, similar=True, ):
    '''
    X : Array Rows of possibilities 
    Y : vector of intrest 
    '''
    cos = nn.CosineSimilarity(eps=1e-6)
    dist = F.cosine_similarity(X, y, dim=-1)
    index_sorted = torch.argsort(dist, descending=similar)

    # we dont want identical vec
    if similar: top_k = index_sorted[1:k+1] 
    else: top_k = index_sorted[0:k] 

    return top_k

In [None]:
index_example = 747

raw_input = chord_train_dataset[index_example][None, None,:,0:5000].to(device).double()
raw_input2 = chord_train_dataset[index_example+1][None, None,:,0:5000].to(device).double()
raw_input = raw_input  / 111.14747885919755
raw_input2 = raw_input2  / 111.14747885919755

#get raw_input
# raw_input = train_data[index_example:index_example+1,:,:] 
# raw_input2 = train_data[index_example+1:index_example+2,:,:] 



#example 1
print("Orginal 1")
input1 = torch.cat([raw_input], axis=-1)
# play_example(input1)
code1 = get_code(model, input1)




#example 2
print("Orginal 2")
input2 = torch.cat([raw_input2 ], axis=-1)
# play_example(input2)
code2 = get_code(model, input2)



# New music
print("New")
print(code1.shape)
print(code2.shape)
# new_code1, new_code2  = exchange_max_rows(code1[0],code2[0])

new_code1 = (code1[0] + code2[0])


input = model.decoder(new_code1[None]) * 111.14747885919755
# input = (raw_input + raw_input2) * 111.14747885919755
# input = input.cpu().detach().numpy()[0].astype(int) 
input = input.cpu().detach().numpy()[0].astype(int) 


input = input[0]
input = (input>50) *100

# input = input[input>=0]
# print("model_out size", model_out)

pm = piano_roll_to_pretty_midi(input)
IPython.display.Audio(pm.synthesize(fs=16000), rate=16000)




Orginal 1
Orginal 2
New
torch.Size([1, 88, 1, 396])
torch.Size([1, 88, 1, 396])


In [None]:
#@title only play most active n_atoms
print(new_code1.shape)
n_atoms = 2
onekernel = keep_topk(new_code1[:,0,:].clone(), n_atoms)
input = model.decoder(onekernel[None,:,None,:]) * 111.14747885919755
input = input.cpu().detach().numpy()[0].astype(int) 

input = input[0]
input = (input>50) *100

pm = piano_roll_to_pretty_midi(input)
IPython.display.Audio(pm.synthesize(fs=16000), rate=16000)



torch.Size([88, 1, 396])

 index is tensor([11, 86], device='cuda:0')


In [None]:
import libfmp.c1
score = libfmp.c1.midi_to_list(pm)

libfmp.c1.visualize_piano_roll(score, figsize=(8, 3))



# PCA to find temporal structure 


In [None]:
output = torch.zeros((1,128))
kernel_size = 64
for chord in  chord_train_dataset:
      # calc.extend(train_data.flatten().numpy())
      #normalize 
      chord = chord / 111.14747885919755 
      #preprocess
      chord = chord[None, None, 0:(len(chord)//kernel_size)*kernel_size].to(device).double()
      # melody = melody[None, None,0:(len(melody)//kernel_size)*kernel_size].to(device).double()
      # singleoutput = model(chord)[0][0].T
      # code1 = get_code(model, chord)[0,:,0,:].T
      # print("code1 ", code1.shape)
      print("singleoutput ", chord[0][0].T.shape)
      output = torch.cat((output, chord[0][0].T.detach().cpu()), 0)

print(output.shape)



NameError: ignored

In [None]:
#@title Train PCA 2 lower dimensions 

from sklearn.decomposition import PCA

def get_code(model,input):
    model(input)
    return model.code



#train_data PCA
# output = train_data.flatten(start_dim=1, end_dim=2)
pca = PCA(n_components=2)
pca_result = pca.fit_transform(output.detach().cpu())

pca_plot  = {"legend": "original", 
              "x": pca_result[0:6000,1], #first song
              "y": pca_result[0:6000,0],
              "marker_symbol": 'star'}
plot([pca_plot], "PCA on Drum Train Data")


from numpy import diff
dx = 1
dy = np.abs(diff(pca_result[0:6000,0])/dx)
pca_plot  = {"legend": "original", 
              "x": list(range(0,len(dy))), #first song
              "y": dy,
              "marker_symbol": 'star'}
plot([pca_plot], "PCA on Drum Train Data")


NameError: ignored

# explore kernels


In [None]:
import plotly.graph_objects as graph
def plot(all_history:list, title:str, log = False):
    """
    input:
        all_history: list of dicts to plot
    ret:
        None: show plotly fig
    """
    #symbol_sequence= ['circle-open', 'circle', 'circle-open-dot', 'square']
    fig = graph.Figure(layout = graph.Layout(title=graph.layout.Title(text=title))) 
    for i in range(len(all_history)):
        fig.add_trace(graph.Scatter(x = all_history[i]["x"], 
                                    y = all_history[i]["y"],
                                    name = all_history[i]["legend"],
                                    mode='markers',
                                    marker_size=5,
                                    marker_symbol=all_history[i]["marker_symbol"])) 
    if log: fig.update_xaxes(type="log")
    fig.show()

In [None]:
print(model.get_kernels().shape)


kernel1 = model.get_kernels().cpu().numpy()[1].tolist()
kernel0 = model.get_kernels().cpu().numpy()[0].tolist()
kernel6 = model.get_kernels().cpu().numpy()[6].tolist()


kernels6_plot  = {"legend": "original", 
                 "x": list(range(0,len(kernel6))), 
                 "y": kernel6,
                 "marker_symbol": 'triangle-up-open'}

#perfect plot
kernels1_plot  = {"legend": "original", 
                 "x": list(range(0,len(kernel1))), 
                 "y": kernel1,
                 "marker_symbol": 'star'}

plot([kernels6_plot], "kernels_plot")