In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import numpy as np
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, transforms
from torchvision.utils import make_grid , save_image

In [3]:
from constants import *

In [4]:
import matplotlib.pyplot as plt

### Old way of pulling out corpus

In [5]:
with open(f'{CONCAT_DIR}/concat_corpus.utf') as f:
    train_contents = f.read()

In [6]:
train_contents[0:10]

'їPÿ\x07{\x919\x05)\x1c'

In [7]:
import h5py    
import numpy as np    
import json
concat_h5 = h5py.File(f'{CONCAT_DIR}/concat_corpus.h5','r+') 

with open(f'{CONCAT_DIR}/concat_corpus.json', 'rb') as f:
    concat_json = json.load(f)

  from ._conv import register_converters as _register_converters


In [8]:
t = concat_h5['train']

In [9]:
t[:10]

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10], dtype=uint8)

### One hot encoding

In [10]:
def one_hot(a,c): 
    return np.eye(c)[a]

In [11]:
class MusicDataset(torch.utils.data.Dataset):
    """Face Landmarks dataset."""

    def __init__(self, h5_file, set_type, json_file, timesteps, root_dir):
        self.concat_h5 = h5py.File(f'{root_dir}/{h5_file}','r+')
        self.dataset = self.concat_h5[set_type]
        with open(f'{root_dir}/{json_file}', 'rb') as f:
            self.concat_json = json.load(f)
        self.vocab_size = len(self.concat_json['idx_to_token'])+1
        self.data_length = self.dataset.shape[0]
        self.timesteps = timesteps

    def __len__(self):
        return (self.data_length // self.timesteps)

    def __getitem__(self, idx):
        start = idx*self.timesteps
        x = self.dataset[start:start+self.timesteps]
        y = self.dataset[start+1:start+self.timesteps+1]
        return x, y


In [12]:
batch_size = 64
timesteps = 64
md = MusicDataset(h5_file='concat_corpus.h5', set_type='train', json_file='concat_corpus.json', timesteps=timesteps, root_dir=CONCAT_DIR)

In [13]:
train_loader = torch.utils.data.DataLoader(md,
    batch_size=batch_size)

### Dataset sanity test

In [14]:
train_iter = enumerate(train_loader)

In [15]:
i, (x, y) = next(train_iter)
i2, (x2, y2) = next(train_iter)

In [16]:
md.dataset[:100]

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  6,  6,  6, 11,  3, 12, 13,
        6, 14,  8, 15, 16,  6, 14,  8,  4, 16,  6, 14,  8,  9, 16,  6,  2,
       17, 18, 19,  6,  7, 20, 21, 22,  6, 23, 20, 21, 22,  6, 24, 20, 21,
       22,  6, 25,  3, 26, 27,  6, 28,  8, 29, 30,  6, 23,  8, 29, 30,  6,
       24,  8, 29, 30,  6,  2, 17, 18, 31,  6,  7, 20, 21, 32,  6,  7, 20,
        4,  5,  6,  7, 20,  9, 10,  6, 23, 26, 12, 18,  6, 24, 29],
      dtype=uint8)

### Model

In [17]:
cuda_enabled = torch.cuda.is_available()

In [18]:
def repackage_var(h):
    """Wraps h in new Variables, to detach them from their history."""
    if type(h) == torch.autograd.Variable:
        v = torch.autograd.Variable(h.data)
        return v.cuda() if cuda_enabled else v
    else:
        return tuple(repackage_var(v) for v in h)

In [19]:
class StatefulLSTM(torch.nn.Module):
    def __init__(self, scale_size, n_hidden, n_factors, bs, nl):
        super().__init__()
        self.scale_size = scale_size
        self.nl = nl
        self.embedding = torch.nn.Embedding(scale_size, n_factors)
        
        self.rnn1 = torch.nn.LSTM(n_factors, n_hidden, nl, dropout=0.5, batch_first=True)
        self.rnn2 = torch.nn.LSTM(n_hidden, n_hidden, nl, dropout=0.5, batch_first=True)
        self.rnn3 = torch.nn.LSTM(n_hidden, n_hidden, nl, dropout=0.5, batch_first=True)
        
        if cuda_enabled:
            self.rnn1 = self.rnn1.cuda()
            self.rnn2 = self.rnn2.cuda()
            self.rnn3 = self.rnn3.cuda()
        
        self.bn1 = nn.utils.weight_norm(self.rnn1, 'weight_hh_l0')
        self.bn1 = nn.utils.weight_norm(self.bn1, 'weight_ih_l0')
        self.bn2 = nn.utils.weight_norm(self.rnn2, 'weight_hh_l0')
        self.bn2 = nn.utils.weight_norm(self.bn2, 'weight_ih_l0')
        self.bn3 = nn.utils.weight_norm(self.rnn3, 'weight_hh_l0')
        self.bn3 = nn.utils.weight_norm(self.bn3, 'weight_ih_l0')
        
        # pytorch rnn does not currently work with batchnorm
        self.l_out = torch.nn.Linear(n_hidden, scale_size)
        self.n_hidden = n_hidden
        self.reset_all_hidden(bs)
        self.bs = bs
        
    def forward(self, notes):
        bs = notes.shape[0]
        if self.h1[0].size(1) != bs: 
            self.reset_all_hidden(bs)
        emb = self.embedding(notes)
#         outp1,h1 = self.rnn1(emb, self.h1)
        outp1,h1 = self.bn1(emb, self.h1)
        outp2,h2 = self.bn2(outp1, self.h2)
        outp3,h3 = self.bn3(outp2, self.h3)
        self.h1 = repackage_var(h1)
        self.h2 = repackage_var(h2)
        self.h3 = repackage_var(h3)
        return torch.nn.functional.log_softmax(self.l_out(outp3), dim=-1).view(-1, self.scale_size)
#         return torch.nn.functional.log_softmax(self.l_out(outp[:, -1, :]), dim=-1)
#         return torch.nn.functional.softmax(self.l_out(outp[:, -1, :]), dim=-1)
    
    def reset_all_hidden(self, bs):
        self.h1 = self.init_hidden(bs)
        self.h2 = self.init_hidden(bs)
        self.h3 = self.init_hidden(bs)
        
    def init_hidden(self, bs):
        h1 = torch.autograd.Variable(torch.zeros(self.nl, bs, self.n_hidden))
        h2 = torch.autograd.Variable(torch.zeros(self.nl, bs, self.n_hidden))
        if cuda_enabled:
            return (h1.cuda(), h2.cuda())
        return h1, h2

### Training

In [20]:
m = StatefulLSTM(md.vocab_size, n_hidden=256, n_factors=10, bs=batch_size, nl=2)
if cuda_enabled:
    m = m.cuda()

In [21]:
train_op = torch.optim.Adam(m.parameters(), lr=1e-3)

In [22]:
loss_fn = torch.nn.NLLLoss()

In [23]:
(2+1) % 3

0

In [24]:
display_step = 3
training_steps = 20
for step in range(training_steps):
# for step in tqdm(range(training_steps)):
    for i, (data,target) in enumerate(train_loader):
        data, target = torch.autograd.Variable(data.long()), torch.autograd.Variable(target.long())
        if cuda_enabled:
            data, target = data.cuda(), target.cuda()
        m.zero_grad()
        forward = m(data)
        loss = loss_fn(forward, target.view(-1))
        loss.backward()
        train_op.step()
        if ((i+1) % display_step == 0):
            print(f'Iteration: {i+1} Loss: {loss.data[0]}')
    print(f'Step: {step} Loss: {loss.data[0]}')

Iteration: 3 Loss: 4.6451921463012695
Iteration: 6 Loss: 4.152252197265625
Iteration: 9 Loss: 3.6559832096099854
Iteration: 12 Loss: 3.4879775047302246


KeyboardInterrupt: 

### Saving model

In [25]:
model_path = f'{OUT_DIR}/../models/bachbot_stackedlstm_rnn_t64.h5'

In [None]:
torch.save(m.state_dict(), model_path)

In [26]:
if cuda_enabled:
    m.load_state_dict(torch.load(model_path))
else:
    m.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage))

### Generate music

Need to have unknown state 0?

In [27]:
timesteps = md.timesteps

In [28]:
gen_song = md.dataset[:timesteps]

In [271]:
def generate_sequence(song, seq_length):
    full_song = song.tolist()
    # generate music!
    m.reset_all_hidden(batch_size)
    for i in range(seq_length):
        seed = np.array([full_song[-timesteps:]])
        # Use our RNN for prediction using our seed! 
        seed_v = torch.autograd.Variable(torch.from_numpy(seed).long())
        if cuda_enabled:
            seed_v = seed_v.cuda()
        predict_probs = m(seed_v)

#         percentage_prob = torch.exp(predict_probs)
        # Define output vector for our generated song by sampling from our predicted probability distribution
        
    #     sampled_note = np.random.choice(range(md.vocab_size), p=percentage_prob[0]) # TODO
#         sampled_note = np.argmax(percentage_prob)
    #     print('Sampled_note:', sampled_note)
        
        # With multi output model, use only the last prediction. As it is predicting to n timesteps
        v, idx = torch.max(torch.exp(predict_probs[-1]), 0)
        full_song.append(idx.data[0])
    return full_song
    


In [267]:
# def get_sequence_probabilities()


def get_x_input(partial):
    _, _, _, seq = partial
    input = seq[-timesteps:]
    input_var = torch.autograd.Variable(torch.LongTensor([input]))
    if cuda_enabled:
        input_var = input_var.cuda()
    return input_var

# song = string
# seq_length = generated song length
# beam_size = what to choose from
def beam_search(song, seq_length, beam_size):    
    full_song = song.tolist()
    m.reset_all_hidden(batch_size)
    partial_sequences = [(0, 0, [], full_song)]
    m.eval()

    for i in range(seq_length):
        partial_sequences = find_partials(partial_sequences)
        
    final_sequence = partial_sequences[0][3]
    return final_sequence
    
def find_partials(partial_sequences):
    partial_next = []
    for partial in partial_sequences:
        it, tot_p, p_list, seq = partial
        x_input = get_x_input(partial)

        predict_probs = m(x_input)
        # last_it_probs = torch.exp(predict_probs[-(it+1):]) # this is to predict the last few iterations
        last_it_probs = torch.exp(predict_probs[-1:])
        top, idxs = torch.topk(last_it_probs, beam_size, 1)

        for i in range(beam_size):
            prob = top.data[0][i]
            idx = idxs.data[0][i]
            new_p_list = p_list+[prob]
            partial_next.append((it+1, np.mean(new_p_list), new_p_list, seq+[idx]))

    partial_sequences = sorted(partial_next, key=lambda x: x[1], reverse=True)[:3]
    return partial_sequences

In [268]:
beam_search(gen_song, 10, 3)

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 6,
 6,
 6,
 11,
 3,
 12,
 13,
 6,
 14,
 8,
 15,
 16,
 6,
 14,
 8,
 4,
 16,
 6,
 14,
 8,
 9,
 16,
 6,
 2,
 17,
 18,
 19,
 6,
 7,
 20,
 21,
 22,
 6,
 23,
 20,
 21,
 22,
 6,
 24,
 20,
 21,
 22,
 6,
 25,
 3,
 26,
 27,
 6,
 28,
 8,
 29,
 30,
 6,
 23,
 6,
 35,
 6,
 4,
 6,
 12,
 6,
 12,
 6,
 25]

In [272]:
generate_sequence(gen_song, 10)

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 6,
 6,
 6,
 11,
 3,
 12,
 13,
 6,
 14,
 8,
 15,
 16,
 6,
 14,
 8,
 4,
 16,
 6,
 14,
 8,
 9,
 16,
 6,
 2,
 17,
 18,
 19,
 6,
 7,
 20,
 21,
 22,
 6,
 23,
 20,
 21,
 22,
 6,
 24,
 20,
 21,
 22,
 6,
 25,
 3,
 26,
 27,
 6,
 28,
 8,
 29,
 30,
 6,
 23,
 6,
 25,
 89,
 35,
 38,
 35,
 6,
 35,
 6,
 12]

### Beam search testing ground

In [234]:
song = md.dataset[:md.timesteps]
beam_size = 3
seq_length = 4
full_song = song.tolist()

In [244]:
# iteration, total_probability, probabilities, current sequence
partial_sequences = [(0, 0, [], full_song)]

In [245]:
def get_x_input(partial):
    _, _, _, seq = partial
    input = seq[-timesteps:]
    input_var = torch.autograd.Variable(torch.LongTensor([input]))
    if cuda_enabled:
        input_var = input_var.cuda()
    return input_var

In [246]:
np.random.choice(
  ['pooh', 'rabbit', 'piglet', 'Christopher'], 
  5,
  p=[0.5, 0.1, 0.1, 0.3]
)

array(['pooh', 'Christopher', 'pooh', 'pooh', 'pooh'], dtype='<U11')

In [247]:
m.eval()

StatefulLSTM(
  (embedding): Embedding(109, 10)
  (rnn1): LSTM(10, 256, num_layers=2, batch_first=True, dropout=0.5)
  (rnn2): LSTM(256, 256, num_layers=2, batch_first=True, dropout=0.5)
  (rnn3): LSTM(256, 256, num_layers=2, batch_first=True, dropout=0.5)
  (bn1): LSTM(10, 256, num_layers=2, batch_first=True, dropout=0.5)
  (bn2): LSTM(256, 256, num_layers=2, batch_first=True, dropout=0.5)
  (bn3): LSTM(256, 256, num_layers=2, batch_first=True, dropout=0.5)
  (l_out): Linear(in_features=256, out_features=109, bias=True)
)

In [248]:
def random_choice(top, idxs):
    return np.random.choice(
      idxs.data.numpy().reshape(-1), 
      1,
      p=(top/top.sum()).data.numpy().reshape(-1)
    )

In [249]:
top.data[0][0]

0.1388281136751175

In [263]:
# partial = partial_sequences[0]

m.reset_all_hidden(batch_size)

partial_next = []
for partial in partial_sequences:
    it, tot_p, p_list, seq = partial
    x_input = get_x_input(partial)

    predict_probs = m(x_input)
    # last_it_probs = torch.exp(predict_probs[-(it+1):]) # this is to predict the last few iterations
    last_it_probs = torch.exp(predict_probs[-1:])
    top, idxs = torch.topk(last_it_probs, beam_size, 1)

    for i in range(beam_size):
        prob = top.data[0][i]
        idx = idxs.data[0][i]
        new_p_list = p_list+[prob]
        partial_next.append((it+1, np.mean(new_p_list), new_p_list, seq+[idx]))

partial_sequences = sorted(partial_next, key=lambda x: x[1], reverse=True)[:3]

#1 get 3 probabilities
#2 find the probability of the whole timestep sequence
#3 prune branches to 3


# With multi output model, use only the last prediction. As it is predicting to n timesteps
# v, idx = torch.max(torch.exp(predict_probs[-1]), 0)
# torch.topk()
# full_song.append(idx.data[0])

### Question: is the model predicting at every timestep?

In [None]:
partial = partial_sequences[0]

m.reset_all_hidden(batch_size)

it, _, _, seq = partial
x_input = get_x_input(partial)

print(seq)
predict_probs = m(x_input)
last_it_probs = torch.exp(predict_probs) # this is to predict the last few iterations
# last_it_probs = torch.exp(predict_probs[-1:])
a, idxs = torch.topk(last_it_probs, 1, 1)
idxs.view(-1).data.numpy()
# np.random.choice(
#   idxs.data.numpy().reshape(-1), 
#   1,
#   p=(top/top.sum()).data.numpy().reshape(-1)
# )

#1 get 3 probabilities
#2 find the probability of the whole timestep sequence
#3 prune branches to 3


# With multi output model, use only the last prediction. As it is predicting to n timesteps
# v, idx = torch.max(torch.exp(predict_probs[-1]), 0)
# torch.topk()
# full_song.append(idx.data[0])

In [104]:
top, idxs

(Variable containing:
  0.2018  0.0695  0.0632
 [torch.FloatTensor of size 1x3], Variable containing:
  32  70  22
 [torch.LongTensor of size 1x3])

In [None]:
full_song = []
for i in range(seq_length):
    for j in range(beam_size):
        partial = partial_sequences[j]
        x_input = get_x_input(partial)
        
        predict_probs = m(x_input)
        
        #1 get 3 probabilities
        #2 find the probability of the whole timestep sequence
        #3 prune branches to 3
        

        # With multi output model, use only the last prediction. As it is predicting to n timesteps
        v, idx = torch.max(torch.exp(predict_probs[-1]), 0)
        torch.topk()
        full_song.append(idx.data[0])

### Beam search end - Decoding time

In [None]:
import decode

In [None]:
def decode_output(output_idx):
    idx2token = md.concat_json['idx_to_token']
    token_list = list(map(lambda x: idx2token.get(str(x), ''), output_idx))
    return decode_token(token_list)

def decode_token(token_list):
    if (token_list[0] != START_DELIM):
        token_list.insert(0, START_DELIM)
    token_str = ''.join(token_list)
    with open(f'{SCRATCH_DIR}/utf_to_txt.json', 'r') as f:
        utf_to_txt = json.load(f)
    score, stream = decode.decode_string(utf_to_txt, token_str)
    return token_str, score, stream

# test = [idx2token[f'{x}'] for x in seq_arr]; test

In [None]:
song_seed = md.dataset[:md.timesteps]
generated_idxs = generate_sequence(song_seed, 500)

In [None]:
token_str, score, stream = decode_output(generated_idxs)

In [None]:
token_str

In [None]:
score

### For testing stuff

In [None]:
with open(f'{SCRATCH_DIR}/utf_to_txt.json', 'r') as f:
    utf_to_txt = json.load(f)

In [None]:
test_str = train_contents[:200]

In [None]:
with open(f'{SCRATCH_DIR}/BWV-400-nomask-fermatas.utf', 'r') as f:
    test_str = f.read()[:200]

In [None]:
token_str, score, stream = decode_token(test_str)

In [None]:
stream.elements[:10]

### Evaluate stream

In [None]:
stream.show()

In [None]:
fp = stream.write('midi', fp=f'{OUT_DIR}/testout.midi')

In [None]:
fp = stream.write('xml', fp=f'{OUT_DIR}/testout7.xml')

In [None]:
from IPython.lib.display import FileLink
FileLink('../data/bachbot/out/testout7.xml')

In [None]:
music21.environment.get('musicxmlPath')

In [None]:
music21.environment.set('music')