# Project File - APS360 Team 25
Divided into the following section: 
# 
1) Library imports
2) Data imports
3) Model architecture definition
4) Training function definition
5) Model training
6) Model testing

## Library imports 
(Place all library imports here)

In [2]:
#import torchvision
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

import time # Tracking model training time.

In [3]:
# Install mido for Data importing
!pip install mido;

import mido
from mido import MidiFile, Message, MidiTrack, MetaMessage
import os
import random



In [4]:
#mount googledrive
from google.colab import drive
drive.mount('/content/gdrive')

# location on Google Drive
master_path = '/content/gdrive/My Drive/APS360/Project/'

#Set working directory if required:
%cd /content/gdrive/My\ Drive/APS360/Project/

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/My Drive/APS360/Project


## Data imports
#### MIDI reading functions

In [5]:
def CountTracks(directory):          #Count files and tracks in folder
    trackCount = 0
    fileCount = 0
    for file in os.listdir(directory):
        if file.endswith(".midi"):
            fileCount += 1
            midiDir = MidiFile(directory+"/"+file)
            for track in midiDir.tracks:
                trackCount += 1
    print(fileCount+" files")
    print(trackCount+" tracks")

    
def PrintMessages(mid):                # print midi messages
    for i, track in enumerate(mid.tracks):
        print('Track {}: {}'.format(i, track.name))
        for msg in track:
            print(msg)

            
def PrintSomeMessages(mid):             #print first 200 midi messages
    track = mid.tracks[1]
    for i,msg in enumerate(track):
        if i < 200:
            print(msg)
            
def PrintMetaMessages(mid):             #print fmeta messages
    track = mid.tracks[0]
    for i,msg in enumerate(track):
        print(msg)

def cleanupMessages(mid):              #removes non-note messages by force
    track = mid.tracks[1]
    track2 = []
    for msg in track:
        if msg.type == "note_on":
            track2.append(msg)
    mid.tracks[1] = track2

#### MIDI to Numpy code

In [6]:
def Midi2NumpyNoSustain(mid):                                #converts to numpy array removing non-note messages
    track = mid.tracks[1]                           #0th track only contains meta-messages, all notes on 1st track
    notes = np.empty([0,4])
    time = 0
    for msg in track:
        if msg.type == "note_on":                   # only count "note" messages - other inputs i.e. foot pedals are ignored
            notes = np.append(notes,np.array([[msg.note, msg.velocity, msg.time + time, 0]]),axis=0)         # (note, velocity, time, sustain)
            time = 0
        else:
            time += msg.time                        #adjust time when removing other messages
    return notes


def NumpyGetSustain(note):
    notes = np.copy(note)
    for i, msg in enumerate(notes):
        if msg[1] > 0:                            # if velocity is not 0
            j = 1
            sustain = 0
            while msg[0] != notes[i+j][0]:        # while note values are different
                sustain += notes[i+j][2]
                j += 1                            #search for next message with same note i.e. message telling that note was released
            notes[i,3] = sustain + notes[i+j][2]
    time = 0
    for i, msg in enumerate(notes):
        if msg[1] > 0:
            notes[i,2] += time
            time = 0
        else:
            time += msg[2]                        #adjust time
    notes = notes[notes[:,1] > 0]                 #filter for notes with positive velocities (note presses)
    return notes

def NumpyNormalize(note, oneHot=False):                         #normalize all values to 0-1
    notes = np.copy(note)
    
    if oneHot:
        notes[:,12] /= 11
        notes[:,13] /= 128
        notes[:,14] /= 40000
        notes[:,15] /= 40000
    else:
        notes[:,0] /= 128
        notes[:,1] /= 128
        notes[:,2] /= 40000
        notes[:,3] /= 40000       
    return notes

def NumpyOneHot(note):
    notes = np.copy(note)
    oneHot = np.zeros([len(notes),16])
    oneHot[:, 13:] = notes[:, 1:]
    names = notes[:,0]
    namesOct = names%12
    oneHot[:,12] = (names-(namesOct))/12
    
    for i, name in enumerate(namesOct):
        oneHot[i,name.astype(int)] = 1
    
    return oneHot

def Midi2Numpy(path, oneHot=False): # full midi to numpy conversion
    mid = MidiFile(path)
    notes = Midi2NumpyNoSustain(mid)
    cleanNotes = NumpyGetSustain(notes)
    
    if oneHot:
        cleanNotes = NumpyOneHot(cleanNotes)
    
    normNotes = NumpyNormalize(cleanNotes, oneHot=oneHot)
    return normNotes

#### Numpy to MIDI code

In [7]:
def NumpyDenormalize(note): # interpret all values from 0-1 to normal values
    notes = np.copy(note)    
    if notes.shape[1] == 16: # if encode as one-hot
        notes[:,12] *= 11
        notes[:,13] *= 128
        notes[:,14] *= 40000
        notes[:,15] *= 40000
        
        notes = NumpyEncode(notes) #encode back as original 4-variable format
    else:
        notes[:,0] *= 128
        notes[:,1] *= 128
        notes[:,2] *= 40000
        notes[:,3] *= 40000       
    return notes.astype(int)

def NumpyEncode(note): # convert back from one-hot encoding
    notes = np.copy(note)
    encoded = np.zeros([len(notes),4])
    encoded[:, 1:] = notes[:, 13:]
    encoded[:, 0] = notes[:,12]*12
    
    for i in range(len(notes)):
        encoded[i,0] += np.argmax(notes[i,:12])
    
    return encoded

def NumpySequence(notes): # put all notes into a "timeline" i.e.: time values of [10, 20, 10, 30] become [10, 30, 40, 70]
    sequenced = np.copy(notes)                      # this allows us to easily add vel=0 notes in any order since we can later sort them by time
    for i, msg in enumerate(sequenced):
        if i > 0:
            sequenced[i,2] += sequenced[i-1,2]
    return sequenced

def NumpyAddOffNotes(sequenced): # add vel=0 notes from sustain into sequenced timeline
    withOff = np.copy(sequenced)
    for msg in sequenced:
        offNote = np.array([[msg[0], 0, msg[2] + msg[3], 0]])
        withOff = np.append(withOff, offNote, axis=0)
    #withOff = np.sort(withOff,axis=0)
    withOff = withOff[withOff[:,2].argsort()] # sort by time
    return withOff

def NumpyUnsequence(notes): # revert time value to "time since last message"
    unsequenced = np.copy(notes)
    for i, msg in reversed(list(enumerate(unsequenced))):
        unsequenced[i,3] = 0
        if i > 0:
            unsequenced[i,2] -= unsequenced[i-1,2]
    return unsequenced

def Numpy2MidiDirect(array):    #make MIDI object from numpy
    #Start with initializing a new Mido Track:
    mid = MidiFile()
    track0 = MidiTrack()
    track1 = MidiTrack()
    
    track0.append(MetaMessage('set_tempo', tempo=500000, time=0)) #MetaMessages not necessary but are present in used files
    track0.append(MetaMessage('time_signature', numerator=4, denominator=4, clocks_per_click=24, notated_32nd_notes_per_beat=8, time=0))
    track0.append(MetaMessage('end_of_track', time=1))
    
    track1.append(Message('program_change', channel=0, program=0, time=0))
    
    for i,note in enumerate(array):         # Get the index and the note. Array must be int array
        j = 1
        track1.append(Message('note_on',note = array[i,0], velocity = array[i,1],time = array[i,2])) # Add the note to the track.

    mid.tracks.append(track0)
    mid.tracks.append(track1)
    return mid

def Numpy2Midi(notes, name): # full numpy to midi conversion, saving result to [name].midi
    denorm = NumpyDenormalize(notes)
    seq = NumpySequence(denorm)
    off = NumpyAddOffNotes(seq)
    unseq = NumpyUnsequence(off)
    mid = Numpy2MidiDirect(unseq)
    mid.save(name + ".midi")

#### Generatng tensor dataset from CSVs

In [8]:
def Numpy2Dataset(notes,num=20,skip=10): # make list of sumpy arrays
    samples = []
    i = 0
    while i+num <= len(notes):
        samples.append(notes[i:i+num])
        i += skip
    return samples

def SampleAllNumpy(dataPath): # generate samples from all saved CSVs
    allSamples = []

    for i,f in enumerate(os.listdir(dataPath)):
        notes = np.genfromtxt(dataPath+f, delimiter=',')
        allSamples += Numpy2Dataset(notes)
        if i % 100 == 0:
            print(i)
    
    return allSamples

def SaveSamplesTensor(samples, outputPath): # save tensor
    tens = torch.Tensor(samples)
    torch.save(samples, outputPath+"Notes_Dataset.pt")
    return tens   

def SaveAllSamples(dataPath, outputPath): # save dataset tensor
    samples = SampleAllNumpy(dataPath)
    SaveSamplesTensor(samples, outputPath)

#### Bulk data conversion code - COMMENT OUT IF NOT IN USE!!!

In [9]:
#SaveAllSamples("data/numpy_files/","data/") #save all into tensor

In [10]:
# IMPORTANT: COMMENT OUT IF NOT IN USE TO AVOID ACCIDENTS!!!!!!!

# Getting CSVs from MIDI data and processed data from CSVs
# Processed MIDI does not contain program messages and so are a good measure of what output SHOULD look like in a perfect world

# dataPath = "data/MIDI_files_original/"
# outputPath = "data/numpy_files/"
# processedPath = "data/MIDI_files_processed/"

# for i,f in enumerate(os.listdir(dataPath)):
#     notes = Midi2Numpy(dataPath+f)
#     np.savetxt(outputPath + "MIDI_{:04d}.csv".format(i),notes,delimiter=",")
#     Numpy2Midi(notes, processedPath + "MIDI_{:04d}".format(i))
    
#     if i % 100 == 0:
#         print(i)

In [11]:
# dataPath = "data/numpy_files/"  # one-hot encoding on CSVs
# outputPath = "data/numpy_onehot"

# for i,f in enumerate(os.listdir(dataPath)):
#     notes = np.genfromtxt(dataPath+f, delimiter=',')
#     notes = NumpyDenormalize(notes)
#     notes = NumpyOneHot(notes)
#     notes = NumpyNormalize(notes, oneHot=True)
#     np.savetxt(outputPath + "MIDI_{:04d}.csv".format(i),notes,delimiter=",")
#     if i % 100 == 0:
#         print(i)

## Baseline Model Code
#### getting available notes

In [12]:
def GetAllNotesMajor(root):# Get all used notes in major scale of root=root
    notes = []
    intervals = [2,2,1,2,2,2,1]
    
    while root > 24: #bring down to lowest used octave
        root -= 12
    
    n = root
    notes.append(n)
    while n < 84: #up to higherst used note
        for i in intervals:
            n += i
            notes.append(n)   
    return notes    


def GetRangeMajor(notes, low, high): # Get all notes within range
    lowIndex = notes.index(low)
    highIndex = notes.index(high)
    
    return notes[lowIndex:highIndex+1]   

#### Piece Class
##### represents whole output from all 4 voices

In [13]:
class Piece: # Entire baseline model compostion - composed of 4 voices soprano, alto, tenor, bass (SATB)
    def __init__(self, barNum=16, root=60):# 16 bars in C major
        self.root = root # root note
        self.allNotes = GetAllNotesMajor(self.root) # all notes on major scale
        self.barNum = barNum # number of bars
        
        self.soprano = Voice(self.allNotes,60,84,speed=8) # SATB
        self.alto = Voice(self.allNotes,48,72)
        self.tenor = Voice(self.allNotes,36,60)
        self.bass = Voice(self.allNotes,24,48)
          
        self.notes = np.empty([0,4]) #notes output
        
        self.pieceChords = [] # chords
        
        self.chords = np.array([ # common classical C major chords
            [ 0,  4,  7,  0],# I
            [ 2,  5,  9,  2],# ii
            [ 4,  7, 11,  4],# iii
            [ 5,  9, 0,  5],# IV
            [ 7, 11, 2,  7],# V
            [ 9, 0, 4,  9],# vi
            [11, 2, 5, 11],# vii dim
            [ 2,  5,  9, 0],# ii7
            [ 5,  9, 0, 4],# IVmaj7
            [ 7, 11, 2, 5],# V7
            [11, 2, 5, 9]])# vii7 half-dim
        
    def GenerateSoprano(self): # Generate soprano line
        self.soprano.GenerateLine(self.soprano.speed*self.barNum)
        
    def GenerateAlto(self): # Generate alto line from chords
        self.alto.GenerateChordLine(self.pieceChords)
        
    def GenerateTenor(self): # see alto
        self.tenor.GenerateChordLine(self.pieceChords)
        
    def GenerateBass(self): # see alto
        self.bass.GenerateChordLine(self.pieceChords)
        
        
    
    def ChooseChord(self, sopNote): # Choose a fitting chord for soprano note
        while sopNote >= 12:
            sopNote -= 12
        
        goodChords = np.empty([0,4])
        
        for chord in self.chords:
            if (chord==sopNote).sum() > 0:
                goodChords = np.append(goodChords,[chord],axis=0)
        
        chosenChord = goodChords[random.randint(0,len(goodChords)-1)]
        chosenChord = np.sort(np.unique(chosenChord))
        
        i = 12
        chordNotes = chosenChord
        while i < 120:
            chordNotes = np.append(chordNotes, chosenChord+i)
            i += 12
        
        return(chordNotes)
    
    def GetChords(self): # select all chords in piece
        for i, note in enumerate(self.soprano.notes):
            if i % 2 == 0:
                sopNote = note[0]
                chord = self.ChooseChord(sopNote)
                self.pieceChords.append(chord)
                
    def Normalize(self): # normalize all values to 0-1
        for i, msg in enumerate(self.notes):
            self.notes[i,0] = msg[0]/128
            self.notes[i,1] = msg[1]/128
            self.notes[i,2] = msg[2]/40000
            self.notes[i,3] = msg[3]/40000
                
    def GenerateLines(self): # Generate all SATB lines and joins them - entire baseline model
        self.GenerateSoprano()
        self.GetChords()
        self.GenerateAlto()
        self.GenerateTenor()
        self.GenerateBass()
        self.joinLines()
        self.OffsetTime(20)
        self.Normalize()
        
        return self.notes
        
    def InsertLine(self, starting, inserted, startIndex, skipIndex): # join 2 lines
        base = np.copy(starting)
        ins = np.copy(inserted)
        
        for i,note in enumerate(ins):
            base = np.insert(base, (i*skipIndex)+startIndex, [note], axis=0)
            
        return base
        
    def joinLines(self): # join all SATB lines
        #self.notes = np.copy(self.soprano)
        self.notes = self.InsertLine(self.soprano.notes, self.alto.notes, 1, 3)
        self.notes = self.InsertLine(self.notes, self.tenor.notes, 2, 4)
        self.notes = self.InsertLine(self.notes, self.bass.notes, 3, 5)
        
    def OffsetTime(self, maxChange): # adds random time offsets to make output sound more organic
        for note in self.notes:
            note[2] += random.randint(0,maxChange)
        

#### Voice class
##### Represents individual voices

In [14]:
class Voice: # individual voices
    def __init__(self, allNotes, lowNote, highNote, jump=3, speed=4, time=4096, velocity=64):
        self.range = GetRangeMajor(allNotes,lowNote,highNote) #available ntoes
        self.jump = jump #maximum pitch interval between notes
        self.speed = speed #note length i.e. 4 for quarter, 8 for eighth etc.
        self.time = time #song speed
        self.velocity = velocity #note volume
        self.notes = np.empty([0,4]) #notes output
        self.lowNote = lowNote # lowest note
        self.highNote = highNote # highest note
        self.allNotes = allNotes # all notes in scale
            
        self.duration = self.time / self.speed # time between notes
        
        
    def RandomStartNote(self): # Generate Random first note (for soprano)
        note = random.choice(self.range)
        self.notes = np.append(self.notes,np.array([[note, self.velocity, 0, self.duration]]),axis=0)
        
        
    def RandomJump(self): # Generate Random next note (for soprano)
        lastNote = self.notes[len(self.notes)-1][0] # find last played note
        lastIndex = self.range.index(lastNote)
        
        newIndex = -1
        while newIndex < 0 or newIndex >= len(self.range): # stay in range
            newIndex = lastIndex + random.randint(-self.jump,self.jump)
            
        newNote = self.range[newIndex]
        self.notes = np.append(self.notes,np.array([[newNote, self.velocity, self.duration, self.duration]]),axis=0)
        
        
    def GenerateLine(self, length): # Generate random line (for soprano)
        self.RandomStartNote()
        
        for n in range(length-1):
            self.RandomJump()
            
            
    def clearNotes(self):
        self.notes = np.empty([0,4])
        
    def GetChordNotes(self, chordNotes): # Get useful notes from all chord notes
        chordNotes = chordNotes[chordNotes >= self.lowNote]
        chordNotes = chordNotes[chordNotes <= self.highNote]
        return chordNotes
    
    def ChooseStartChordNote(self, chordNotes): # Choose Random note in chord
        note = random.choice(chordNotes)
        self.notes = np.append(self.notes,np.array([[note, self.velocity, 0, self.duration]]),axis=0)
        
    def ChooseChordNote(self,chordNotes): # Choose suitable next note in chord
        lastNote = self.notes[len(self.notes)-1][0] # find last played note
        
        chordNotes = chordNotes[chordNotes >= lastNote - (self.jump*2)]
        chordNotes = chordNotes[chordNotes <= lastNote + (self.jump*2)]
        newNote = random.choice(chordNotes)
        
        self.notes = np.append(self.notes,np.array([[newNote, self.velocity, 0, self.duration]]),axis=0)
        
    def GenerateChordLine(self, chords): # Generate A/T/B lines
        
        firstChord = self.GetChordNotes(chords[0])
        self.ChooseStartChordNote(firstChord)
        
        for c in chords[1:]:
            chord = self.GetChordNotes(c)
            self.ChooseChordNote(chord)

## Model architecture definition

Set the hyperparameters below:


## Classification Model

In [15]:
class LSTMEncoder(nn.Module):
    def __init__(self, n_features, emb_dim, num_layers, dropout):
        super(LSTMEncoder, self).__init__()
        self.num_layers = num_layers
        self.lstm = nn.LSTM(n_features, emb_dim, num_layers, batch_first=True)

    def forward(self, x):
        out, (hn, cn) = self.lstm(x)
        return hn

class LSTMDecoder(nn.Module):
    def __init__(self, n_features, emb_dim, num_layers, dropout):
        super(LSTMDecoder, self).__init__()
        self.num_layers, self.hidden_dim = num_layers, 2*emb_dim
        self.lstm = nn.LSTM(emb_dim, self.hidden_dim, num_layers, batch_first=True,
                            dropout=dropout)
        self.fc = torch.nn.Linear(self.hidden_dim, n_features)
        
    def forward(self, x):
        out, (hn, cn) = self.lstm(x)
        out = torch.sigmoid(self.fc(out))
        return out


class LSTMAutoEncoder(nn.Module):
    def __init__(self, n_features, emb_dim, num_layers, dropout, batch_size):
        super(LSTMAutoEncoder, self).__init__()
        self.name = "LSTMAutoEncoder"
        self.encoder = LSTMEncoder(n_features, emb_dim, num_layers, dropout)
        self.decoder = LSTMDecoder(n_features, emb_dim, num_layers, dropout)

    def forward(self, x):
        encoded_x = self.encoder(x)
        decoded_x = self.decoder(encoded_x)
        return decoded_x[-1]

print('Model class created succesfully')

Model class created succesfully


## Training function

In [16]:
#To help us save the model easier...
def get_model_name(name, batch_size, learning_rate, epoch):
    """ Generate a name for the model consisting of all the hyperparameter values

    Args:
        config: Configuration object containing the hyperparameters
    Returns:
        path: A string with the hyperparameter name and value concatenated
    """
    path = "model_{0}_bs{1}_lr{2}_epoch{3}".format(name,
                                                   batch_size,
                                                   learning_rate,
                                                   epoch)
    return path

In [17]:
def get_accuracy(model, data): #Accuracy on note selection...
    correct = 0
    total = 0

    for sample in data:
        excerpt = sample[:,:-1,:] # Extracts all but the last row (model will predict last note)
        true_note = sample[:,-1,:].detach().numpy() # Extracts the last row (what we want it to predict)
        pred_note = model(excerpt).detach().numpy()

        diff = np.abs(pred_note - true_note)
        correct = len(diff[np.where(diff <= 1e-2)])
        total = pred_note.shape[0]*pred_note.shape[1]
    return correct / total

In [18]:
def fit(model, train_loader, criterion, num_epochs, batch_size, learning_rate, class_regr):
    losses, accuracy = [], []

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate, 
                                 weight_decay=1e-5) # <-- Sometimes Adam converges faster than SGD

    for epoch in range(num_epochs):
        for i, data in enumerate(train_loader):
            excerpt = data[:,:-1] # Extracts all but the last row 
            true_note = data[:,-1]  # Extracts the last row (what we want it to predict)
            out = model(excerpt)             # forward pass

            if class_regr == "classify":
                # Use classification for Note
                pred = out[:,:12]
                target = np.argmax(true_note[:,:12], axis=1)
            else:
                # Regression - Octave, Velocity, Time, Sustain
                pred = out[:,12:]
                target = true_note[:,12:]

            loss = criterion(pred, target) # compute the total loss

            loss.backward()               # backward pass (compute parameter updates)
            optimizer.step()              # make the updates for each parameter
            optimizer.zero_grad()         # a clean up step for PyTorch

            losses.append(float(loss)/batch_size)             # compute *average* loss
        
        accurate = get_accuracy(model, train_loader)
        accuracy.append(accurate)
        
        print('Epoch:{}, Loss:{:.4f}'.format(epoch+1, float(loss)))
        #Checkpoint the model every epoch
        model_path = get_model_name(model.name, batch_size, learning_rate, epoch) #Returns the model name for 
        #the save file.
        torch.save(model.state_dict(), model_path) #Saves the current model with the weights.
    return losses, accuracy

In [19]:
def train(model_classify, model_regr, train_data, num_epochs=5, batch_size=64, learning_rate=1e-3):
    torch.manual_seed(1000) #Fixed. Make sure we use this throughout...
    criterion_classfy = nn.CrossEntropyLoss()
    criterion_regr = nn.MSELoss()
    
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)
    
    n = 0 # the number of iterations
    start_time=time.time() #Start of training

    # Classification - Note
    loss_classify, accuracy_classify = fit(model_classify, train_loader, criterion_classfy,
                        num_epochs, batch_size, learning_rate, class_regr="classify")
    # Regression - Octave, Velocity, Time, Sustain
    loss_regr, accuracy_regr = fit(model_regr, train_loader, criterion_regr,
                    num_epochs, batch_size, learning_rate, class_regr="regression")

    end_time= time.time()

    return loss_classify, accuracy_classify, loss_regr, accuracy_regr

## Model Training

In [20]:
# Load saved tensor dataset
data = torch.load(r'/content/gdrive/My Drive/APS360/Project/Notes_Dataset.pt')

# Prevent type errors
data = torch.tensor(data).float()

In [21]:
def Get88NoteOneHot(note):
    notes = np.copy(note)
    oneHot = np.zeros([len(notes),16])
    oneHot[:, 13:] = notes[:, 1:]
    # The lowest pitch in piano roll
    min = 21
    names = notes[:,0]-min
    namesOct = names%12
    oneHot[:,12] = (names-(namesOct))/12
    
    for i, name in enumerate(namesOct):
        oneHot[i,name.astype(int)] = 1
    
    # if a note is played later than 8 seconds after, just set it to 8 seconds
    oneHot[:,14][(oneHot[:,14] > 960*8)] = 960*8
    # if a note is for longer than 8 seconds, just set it to 8 seconds
    oneHot[:,15][(oneHot[:,15] > 960*8)] = 960*8

    oneHot[:,12] /= 7
    oneHot[:,13] /= 128
    oneHot[:,14] /= 960*8
    oneHot[:,15] /= 960*8

    return oneHot

In [22]:
# Split into smaller dataset for training
train_data = data[:40]
test_train_data = train_data.view(-1, train_data.shape[-1]).detach().numpy()

denorm = NumpyDenormalize(test_train_data)
note_onehot = Get88NoteOneHot(denorm)

print(note_onehot.shape)

(800, 16)


In [23]:
sample_len = 11
train_samples = []
for i in range(len(note_onehot) - sample_len):
	# grab from i to i + sample_len
	sample = note_onehot[i:i+sample_len]
	train_samples.append(sample)
 
train_samples = np.array(train_samples)
train_samples = torch.tensor(train_samples).float()
print(train_samples.shape)

torch.Size([789, 11, 16])


In [24]:
# For LSTM: https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html #torch.nn.LSTM 
# Hyperparameters
BATCH_SIZE = 64
N_FEATURES = 16
N_LAYERS = 2
EMB_DIM = 64
DROPOUT = 0.3

# Train the model
model_classify = LSTMAutoEncoder(N_FEATURES, EMB_DIM, N_LAYERS, DROPOUT, BATCH_SIZE)
model_regr = LSTMAutoEncoder(N_FEATURES, EMB_DIM, N_LAYERS, DROPOUT, BATCH_SIZE)

loss_classify, accuracy_classify, loss_regr, accuracy_regr = train(model_classify, model_regr, train_samples, num_epochs=30, batch_size=BATCH_SIZE, learning_rate=0.001)

Epoch:1, Loss:2.4650
Epoch:2, Loss:2.4283
Epoch:3, Loss:2.4283
Epoch:4, Loss:2.4229
Epoch:5, Loss:2.4181
Epoch:6, Loss:2.4158
Epoch:7, Loss:2.4130
Epoch:8, Loss:2.4121
Epoch:9, Loss:2.4112
Epoch:10, Loss:2.4097
Epoch:11, Loss:2.4099
Epoch:12, Loss:2.4070
Epoch:13, Loss:2.4066
Epoch:14, Loss:2.4054
Epoch:15, Loss:2.3987
Epoch:16, Loss:2.3979
Epoch:17, Loss:2.3989
Epoch:18, Loss:2.4001
Epoch:19, Loss:2.3983
Epoch:20, Loss:2.3988
Epoch:21, Loss:2.3965
Epoch:22, Loss:2.3968
Epoch:23, Loss:2.3967
Epoch:24, Loss:2.3935
Epoch:25, Loss:2.3936
Epoch:26, Loss:2.3663
Epoch:27, Loss:2.3179
Epoch:28, Loss:2.3123
Epoch:29, Loss:2.2746
Epoch:30, Loss:2.2409
Epoch:1, Loss:0.0247
Epoch:2, Loss:0.0071
Epoch:3, Loss:0.0055
Epoch:4, Loss:0.0048
Epoch:5, Loss:0.0048
Epoch:6, Loss:0.0048
Epoch:7, Loss:0.0048
Epoch:8, Loss:0.0049
Epoch:9, Loss:0.0050
Epoch:10, Loss:0.0049
Epoch:11, Loss:0.0049
Epoch:12, Loss:0.0050
Epoch:13, Loss:0.0051
Epoch:14, Loss:0.0049
Epoch:15, Loss:0.0048
Epoch:16, Loss:0.0048
Epoch:

## Model Testing

Since our model is 'tested' with people listening to it, we need to just generate some samples.

In [25]:
# Split into smaller dataset for training
test_data = data[:100].detach().numpy()

for i, sample in enumerate(test_data):
    if i == 0:
        all_test_data = sample
    else:
        all_test_data = np.append(all_test_data, sample, axis=0)

test_denorm = NumpyDenormalize(all_test_data)
test_note_onehot = Get88NoteOneHot(test_denorm)

sample_len = 11
test_samples = []
for i in range(len(test_note_onehot) - sample_len):
	# grab from i to i + sample_len
	sample = test_note_onehot[i:i+sample_len]
	test_samples.append(sample)
 
test_samples = np.array(test_samples)
test_samples = torch.tensor(test_samples).float()

In [36]:
def GetNormalizedOutput(note):
    notes = np.copy(note)

    minNote = 21
    names = np.argmax(notes[:,:12], axis = 1)
    octave = np.round((notes[:,12]*7))
    octave_offset = 12*octave
    notes[:,12] = (names + octave_offset + minNote)/128
    notes[:,14] *= (960*8/40000)
    notes[:,15] *= (960*8/40000)

    return notes[:,12:]

In [37]:
test_loader = torch.utils.data.DataLoader(test_samples, 
                                           batch_size=1, 
                                           shuffle=True)

song_length = 50

for i, sample in enumerate(test_loader):
    # Get note from classification model
    output_note = model_classify(sample)[0].detach().numpy()
    # Get other features (octave, velocity, time, sustain) from regression model
    output_other = model_regr(sample)[0].detach().numpy()

    # Combine the two outputs
    output = np.append(output_note[:12], output_other[12:])
    new_sample = np.expand_dims(output, axis=0)

    if i == 0:
        new_song = new_sample
    else:
        new_song = np.append(new_song, new_sample, axis=0)
    if len(new_song) >= song_length:
        break

new_song = GetNormalizedOutput(new_song)

# new_excerpt = new_excerpt.type(torch.int64)
print('new_song: (Watch for the same notes appearing...)',new_song)
print('new_song.shape: ',new_song.shape)

mid = Numpy2Midi(new_song, "Autoencoder")

new_song: (Watch for the same notes appearing...) [[0.515625   0.47161615 0.01534053 0.02393847]
 [0.4921875  0.4708471  0.0164914  0.02399817]
 [0.515625   0.46877015 0.01679269 0.02397291]
 [0.515625   0.47043896 0.01638414 0.02427922]
 [0.4921875  0.4657025  0.0204136  0.02753453]
 [0.515625   0.4686741  0.01738581 0.025387  ]
 [0.515625   0.4726493  0.01560819 0.02421252]
 [0.515625   0.4682168  0.01615406 0.02424193]
 [0.4921875  0.4725493  0.01963989 0.02789339]
 [0.515625   0.47048658 0.01692159 0.02474781]
 [0.4921875  0.46667624 0.01719424 0.02473126]
 [0.515625   0.472897   0.0153676  0.02329619]
 [0.515625   0.47229734 0.01472211 0.02244608]
 [0.515625   0.47279614 0.0160325  0.0239346 ]
 [0.515625   0.4695932  0.01791228 0.02569024]
 [0.515625   0.4640951  0.01625742 0.0237198 ]
 [0.4921875  0.47062704 0.01520271 0.02268414]
 [0.515625   0.46943563 0.0162066  0.02399646]
 [0.515625   0.46803594 0.01989923 0.02778774]
 [0.515625   0.46935043 0.01714952 0.02411349]
 [0.492187