In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import numpy as np
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, transforms
from torchvision.utils import make_grid , save_image

In [3]:
from constants import *

In [149]:
import matplotlib.pyplot as plt

### Old way of pulling out corpus

In [9]:
with open(f'{CONCAT_DIR}/concat_corpus.utf') as f:
    train_contents = f.read()

In [10]:
train_contents[0:10]

'їPÿ\x07{\x919\x05)\x1c'

In [11]:
import h5py    
import numpy as np    
import json
concat_h5 = h5py.File(f'{CONCAT_DIR}/concat_corpus.h5','r+') 

concat_json = json.load(open(f'{CONCAT_DIR}/concat_corpus.json', 'rb'))

In [84]:
t = concat_h5['train']

In [89]:
t[:10]

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10], dtype=uint8)

### One hot encoding

In [221]:
def one_hot(a,c): 
    return np.eye(c)[a]

In [222]:
class MusicDataset(torch.utils.data.Dataset):
    """Face Landmarks dataset."""

    def __init__(self, h5_file, set_type, json_file, timesteps, root_dir):
        self.concat_h5 = h5py.File(f'{root_dir}/{h5_file}','r+')
        self.dataset = self.concat_h5[set_type]
        self.concat_json = json.load(open(f'{root_dir}/{json_file}', 'rb'))
        self.vocab_size = len(self.concat_json['idx_to_token'])+1
        self.data_length = self.dataset.shape[0]
        self.timesteps = timesteps

    def __len__(self):
#         print((self.data_length - self.timesteps)//10)
#         return (self.data_length - self.timesteps)//10
        return (self.data_length // self.timesteps)

    def __getitem__(self, idx):
#         print(np.arange(10)[0:8]) # example
#         print(np.arange(10)[8])
        # (AS) Should not have duplicate sequences. 
        # RBMs do not actually use target value, so no point in repeating next char
#         x = self.dataset[idx:idx+timesteps]
#         y = self.dataset[idx+timesteps]
        
        start = idx*self.timesteps
        x = self.dataset[start:start+self.timesteps]
        y = self.dataset[start+self.timesteps]
#         x_hot = one_hot(x, self.vocab_size)
        return x, y


In [135]:
md = MusicDataset(h5_file='concat_corpus.h5', set_type='train', json_file='concat_corpus.json', timesteps=15, root_dir=CONCAT_DIR)

In [136]:
batch_size = 64
train_loader = torch.utils.data.DataLoader(md,
    batch_size=batch_size)

### Dataset sanity test

In [106]:
train_iter = enumerate(train_loader)

In [107]:
i, (x, y) = next(train_iter)
i2, (x2, y2) = next(train_iter)

In [109]:
md.dataset[:100]

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  6,  6,  6, 11,  3, 12, 13,
        6, 14,  8, 15, 16,  6, 14,  8,  4, 16,  6, 14,  8,  9, 16,  6,  2,
       17, 18, 19,  6,  7, 20, 21, 22,  6, 23, 20, 21, 22,  6, 24, 20, 21,
       22,  6, 25,  3, 26, 27,  6, 28,  8, 29, 30,  6, 23,  8, 29, 30,  6,
       24,  8, 29, 30,  6,  2, 17, 18, 31,  6,  7, 20, 21, 32,  6,  7, 20,
        4,  5,  6,  7, 20,  9, 10,  6, 23, 26, 12, 18,  6, 24, 29],
      dtype=uint8)

In [112]:
# a = np.argmax(x[11], axis=1)[:-1]
# b = np.argmax(x[10], axis=1)[1:]
# np.testing.assert_array_equal(a, b)

In [114]:
# a = np.argmax(x2[0], axis=1)[:-1]
# b = np.argmax(x[-1], axis=1)[1:]
# np.testing.assert_array_equal(a, b)

### Model

In [137]:
def repackage_var(h):
    """Wraps h in new Variables, to detach them from their history."""
    return torch.autograd.Variable(h.data).cuda() if type(h) == torch.autograd.Variable else tuple(repackage_var(v) for v in h)

In [138]:
class StatefulLSTM(torch.nn.Module):
    def __init__(self, scale_size, n_hidden, n_factors, bs, nl):
        super().__init__()
        self.scale_size = scale_size
        self.nl = nl
        self.embedding = torch.nn.Embedding(scale_size, n_factors)
        self.rnn = torch.nn.LSTM(n_factors, n_hidden, nl, dropout=0.5)
        self.l_out = torch.nn.Linear(n_hidden, scale_size)
        self.n_hidden = n_hidden
        self.init_hidden(bs)
        
    def forward(self, notes):
        bs = notes[0].shape[0]
        if self.h[0].size(1) != bs: 
            print('batch size is not same size as original:', bs)
            self.init_hidden(bs)
        emb = self.embedding(notes)
        outp,h = self.rnn(emb, self.h)
        self.h = repackage_var(h)
#         return torch.nn.functional.log_softmax(self.l_out(outp), dim=-1).view(-1, self.scale_size)
        return torch.nn.functional.log_softmax(self.l_out(outp[:, -1, :]), dim=-1)
#         return torch.nn.functional.softmax(self.l_out(outp[:, -1, :]), dim=-1)
    
    def init_hidden(self, bs):
        h1 = torch.autograd.Variable(torch.zeros(self.nl, bs, self.n_hidden))
        h2 = torch.autograd.Variable(torch.zeros(self.nl, bs, self.n_hidden))
        if self._cuda():
            self.h = (h1.cuda(), h2.cuda())
        else:
            self.h = (h1, h2)
            
    def _cuda(self):
        return next(self.parameters()).is_cuda

### Training

In [139]:
m = StatefulLSTM(md.vocab_size, n_hidden=256, n_factors=10, bs=batch_size, nl=2).cuda()

In [140]:
train_op = torch.optim.Adam(m.parameters(), lr=1e-3)

In [141]:
loss_fn = torch.nn.NLLLoss()

In [148]:
display_step = 100
training_steps = 20
for step in range(training_steps):
# for step in tqdm(range(training_steps)):
    for i, (data,target) in enumerate(train_loader):
        data, target = torch.autograd.Variable(data.long().cuda()), torch.autograd.Variable(target.long().cuda())
        m.zero_grad()
        forward = m(data)
        loss = loss_fn(forward, target)
        loss.backward()
        train_op.step()
#     if (step % display_step == 0):
    print(f'Step: {step} Loss: {loss.data[0]}')

Step: 0 Loss: 2.3156449794769287
Step: 1 Loss: 2.235625743865967
Step: 2 Loss: 2.2792012691497803
Step: 3 Loss: 2.2708115577697754


### Generate music

In [None]:
GEN_SEED_RANDOMLY = False # Use a random snippet as a seed for generating the new song.
if GEN_SEED_RANDOMLY:
    ind = np.random.randint(NUM_SONGS)
else:
    ind = 41 # "How Deep is Your Love" by Calvin Harris as a starting seed
    
gen_song = encoded_songs[ind][:timesteps].tolist() # TODO explore different (non-random) seed options

Need to change sequence_length to timesteps  
Need to have unknown state 0?

In [164]:
timesteps = md.timesteps
output_size = md.vocab_size

In [165]:
gen_song = md.dataset[:timesteps].tolist()

In [227]:
# generate music!
m.init_hidden(batch_size)
for i in range(500):
    seed = np.array([gen_song[-timesteps:]])
    # Use our RNN for prediction using our seed! 
    '''TODO: Write an expression to use the RNN to get the probability for the next note played based on the seed.
    Remember that we are now using the RNN for prediction, not training.'''
    # old way
    seed_v = torch.autograd.Variable(torch.from_numpy(seed).long()).cuda()

#     seed_v = torch.autograd.Variable(torch.from_numpy(np.argmax(seed, axis=1)).long()).cuda()
    predict_probs = m(seed_v)
    
    percentage_prob = (np.e ** predict_probs.data.cpu().numpy())
    # Define output vector for our generated song by sampling from our predicted probability distribution
    '''TODO: Sample from the predicted distribution to determine which note gets played next.
    You can use a function from the numpy.random library to do this.
    Hint 1: range(x) produces a list of all the numbers from 0 to x
    Hint 2: make sure what you pass in has the "shape" you expect.'''
#     sampled_note = np.random.choice(range(md.vocab_size), p=percentage_prob[0]) # TODO
    sampled_note = np.argmax(percentage_prob)
#     print('Sampled_note:', sampled_note)
    gen_song.append(sampled_note)

# noteStateMatrixToMidi(gen_song, name="generated/gen_song_0")
# noteStateMatrixToMidi(encoded_songs[ind], name="generated/base_song_0")
# print("saved generated song! seed ind: {}".format(ind))

batch size is not same size as original: 15


In [228]:
gen_song[:10]

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [229]:
idx2token = md.concat_json['idx_to_token']
test = list(map(lambda x: idx2token[str(x)] if str(x) in idx2token else '', gen_song))
# test = [idx2token[f'{x}'] for x in seq_arr]; test

In [230]:
test_str = ''.join(test)

In [186]:
test_str

'їPÿ\x07{\x919\x05)\x1c\x91\x91\x91Ûÿ:*±Ù}Là\x9d{}\x91Õ)æãHBÈÔ%\x91H~\x91+~\x91\x84%\x91\x91,æ\x1c%\x91\x84\x91\x84~\x91ê\x8f\x91\x05àÔîä\x91\x84\x91¤t\x9a\x91ï\x1cHh%p\x91æí\x91\x84\x91)\x9a\x8f\x91ÿ)êúèä\x91àµæè%\x91áï{\x91\x84\x91\x84\x91þ\x1cä\x05\x91\x91\x84\x91\x84\x91ÿí\x84\x91Õ{~Lä\x91¤ïÊî)èãBÙ\x91\x84\x91îtÈ\x91ÕÔ\x91\x84\x91Õ\x9a\x91\x9d\x07±\x91\x84kæ\x8f\x91áïæ\x9a\x91\x84\x91\x84t\x9a\x91)è\x1c{kt\x8fÙ\x91\x05h\x91\x84\x91){B\x1cèäú\x91\x05){~\x91\x91ÿµ%\x91\x84\x91\x91Õ\x1c\x1c\x91\x91tL\x91\x84\x91\x05ÐB\x91,î)ì\x9a\x91Õ)\x07+\x1cHBè~\x91ïæ\x91ïµè\x91\x84±\x91\x91ï{\x91\x17Õ\x9e{HïãÐ\x11\x91\x84\x91\x17Õït\x9a\x91\x84L\x91),\x9eã\x9d\x1cH\x9eÈ\x91\x91æ\x07ãÿú\x91\x05ïæÈ\x91\x84\x91\x05\x1c\x9a\x91\x84L\x91\x84Ù\x91æí\x91\x17p\x91\x84\x91\x84Ù%\x91\x84\x91î\x07{\x9a\x91ïè\x91\x84þÕBÕ)Ðãµîúÿ~\x91\x84\x91\x84\x91\x84äp\x91\x05\x07\x1cúàî\x9e,ïtèp\x91æäÕê\x91\x84\x91\x84\x91.úÕ))L\x9a,+\x07äÕ\x1c\x91\x17ï)~\x91\x91\x91Õæ)æ\x9dï\x07í\x91\x84\x91ç\x05ÐÕ){ãhBè±ä\x91)t\x91\x91\x

In [232]:
import decode

In [233]:
with open(f'{SCRATCH_DIR}/utf_to_txt.json', 'r') as f:
    utf_to_txt = json.load(f)

In [239]:
test_str = ''.join(train_contents[:200])

In [244]:
test_str = open(f'{SCRATCH_DIR}/BWV-400-nomask-fermatas.utf', 'r').read()[:200]

In [245]:
score, m21s = decode.decode_string(utf_to_txt, test_str)

In [249]:
m21s.show()

SubConverterFileIOException: png file of xml not found. Is your file >999 pages?

In [248]:
score

[(False, [(67, False), (60, False), (52, False), (48, False)]),
 (False, [(67, True), (60, True), (52, True), (48, True)]),
 (False, []),
 (False, []),
 (False, [(64, False), (60, False), (55, False), (48, False)]),
 (False, [(64, True), (60, True), (55, True), (48, True)]),
 (False, []),
 (False, []),
 (False, [(69, False), (60, False), (53, False), (41, False)]),
 (False, [(69, True), (60, True), (53, True), (41, True)]),
 (False, [(69, True), (60, True), (52, False), (41, True)]),
 (False, [(69, True), (60, True), (52, True), (41, True)]),
 (True, [(67, False), (59, False), (50, False), (41, False)]),
 (True, [(67, True), (59, True), (50, True), (41, True)]),
 (True, []),
 (True, []),
 (True, []),
 (True, []),
 (True, []),
 (True, []),
 (False, [(60, False), (55, False), (52, False)]),
 (False, [(60, True), (55, True), (52, True)]),
 (False, []),
 (False, []),
 (False, [(62, False), (59, False), (53, False), (50, False)]),
 (False, [(62, True), (59, True), (53, True), (50, True)]),


In [247]:
fp = m21s.write('midi', fp=f'{OUT_DIR}/testout3.midi')

In [243]:
from IPython.lib.display import FileLink
FileLink('../data/bachbot/out/testout.midi')