In [1]:
import os
from tqdm import tqdm
import pickle

In [9]:
def flatten_list(l):
    flat_list = []
    for sublist in l:
        for item in sublist:
            flat_list.append(item)
    
    return flat_list

In [2]:
import pickle
def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)
    
notation_to_idx = load_obj('notation_to_idx')
idx_to_notation = load_obj('idx_to_notation')

In [3]:
import torch
import torch.nn as nn
from torch.autograd import Variable


##creating the RNN class
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        
        ##input size same as the number of unique characters
        self.input_size = input_size
        ##hidden size decides the number of LSTM units. The output size is same as the hidden size
        self.hidden_size = hidden_size
        self.output_size = output_size
        ##number of layers for the GRU which by default is 1
        self.n_layers = n_layers
        
        ##initialize the embedding layer to convert the words to their embedding vectors
        self.encoder = nn.Embedding(input_size, hidden_size)
        ###gru with the same input and hidden size
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        ###decoder is the a linear layer whose output is of dimension output_size(generally the number of characters 
        ##because that is what we are predicting)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        
        ##the forward propogation
        input = self.encoder(input.view(1, -1))
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden

    def init_hidden(self):
        ##hidden vector initialized with all zeros
        return Variable(torch.zeros(self.n_layers, 1, self.hidden_size))

In [4]:
# Turn string into list of longs
def music_tensor(notations_list):
    tensor = torch.zeros(len(notations_list)).long()
    for c in range(len(notations_list)):
        tensor[c] = notation_to_idx[notations_list[c]]
    return Variable(tensor)

print(music_tensor(list("[")))

tensor([59])


In [5]:
###given a prime notation, that is character of starting strings predict the next characters in sequence
###the multinomial decides whether to sample from the last layer(in which case we can get 2nd or 3rd 
### best character as output as well) or use argmax of the last layer(basically consider the most probable output only)
def evaluate(prime_notation='[', predict_len=100, temperature=0.8, use_multinomial = True):
    hidden = decoder.init_hidden()
    prime_input = music_tensor(prime_notation)
    predicted = prime_notation

    # Use priming string to "build up" hidden state
    for p in range(len(prime_notation) - 1):
        _, hidden = decoder(prime_input[p], hidden)
    inp = prime_input[-1]
    
    
    for p in range(predict_len):
        if not use_multinomial:
            output, hidden = decoder(inp, hidden)
            _, index_top = torch.topk(output.view(-1),1)
            predicted_notation = idx_to_notation[index_top.numpy()[0]]
            predicted += predicted_notation
            inp = music_tensor([predicted_notation])

        else:
            output, hidden = decoder(inp, hidden)            
            # Sample from the network as a multinomial distribution
            output_dist = output.data.view(-1).div(temperature).exp()
            top_i = torch.multinomial(output_dist, 1)[0]
            # Add predicted character to string and use as next input
            predicted_notation = idx_to_notation[top_i.view(-1).numpy()[0]]
            predicted += predicted_notation
            inp = music_tensor([predicted_notation])

            
#     for p in range(predict_len):
#         output, hidden = decoder(inp, hidden)
        
#         # Sample from the network as a multinomial distribution
#         output_dist = output.data.view(-1).div(temperature).exp()
#         top_i = torch.multinomial(output_dist, 1)[0]
        
#         # Add predicted character to string and use as next input
#         predicted_char = all_characters[top_i]
#         predicted += predicted_char
#         inp = char_tensor(predicted_char)

    return "".join(predicted)

In [6]:
##load the pre-trained model
n_notations = len(notation_to_idx)
hidden_size = n_notations+50
n_layers = 1

decoder = RNN(n_notations, hidden_size, n_notations, n_layers)
decoder.load_state_dict(torch.load(r"C:\Users\chinn\Desktop\AppliedAI\DEEP LEARNING\Music Generation\music-generation-pytorch-abc-notation--master\music_gen.pth"))
decoder.eval()

RuntimeError: Error(s) in loading state_dict for RNN:
	size mismatch for encoder.weight: copying a param with shape torch.Size([78, 48]) from checkpoint, the shape in current model is torch.Size([78, 128]).
	size mismatch for gru.weight_ih_l0: copying a param with shape torch.Size([144, 48]) from checkpoint, the shape in current model is torch.Size([384, 128]).
	size mismatch for gru.weight_hh_l0: copying a param with shape torch.Size([144, 48]) from checkpoint, the shape in current model is torch.Size([384, 128]).
	size mismatch for gru.bias_ih_l0: copying a param with shape torch.Size([144]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for gru.bias_hh_l0: copying a param with shape torch.Size([144]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for decoder.weight: copying a param with shape torch.Size([78, 48]) from checkpoint, the shape in current model is torch.Size([78, 128]).

In [15]:
#generate random lyrics and paste the generated notation here: https://abcjs.net/abcjs-editor.html
##Note: EOS token separates one song notation from another
prime = """X: 39
T:"""
print(evaluate(prime, 1500, temperature=0.7, use_multinomial=True))

X: 39
T:Monton Domana
% Nottingham Music Database
S:Mick Peat
M:4/4
L:1/8
R:Hornpipe
K:D
P:A
|:A|"D"f3/2f/2a3/2f/2g/2a/2|"D"dcdf "A7"eA(3ABc|
"D"dAAA ABAA|"G"GABc dcdd|"Em"gagf "A7"afed|
"D"ABAF DEFA|"G"BABA "D"F2d2|"D"A2A2 A2D2|"A7"EAAd cBA2|
"D"DFA2a aAdc|"G"BAGF "A7"EGAB|"D"A2FA fdAd|
"G"edAB "D7"AFAF|"G"BdBd "D"fdfe|"G"dede "A7"EAef|"D"f2ed AFAd|"G"B2dB cBA|"G"d4ddd d2dB|1"A7/c+"cBAA ABcA|"D"d2A2a agfd|"Em"gfeB dcBc|"Em"B2d2e2|"A7"efga agec|"D"dfdd "E7"edcB|"A7"ABAF "D"FAdcd|
"G/b"eded "E7"edfg|"A"efef edcd|
"D"f3fe dAFA|"G"d3 -d2d|"D"Adf2 fedc|
"G"d2gd "E7"BBd|"G"BAGF "D"A2f2|"Em"g2A BcBc|"G"B2BA d2dd|"G"d2d "A7"ABAG|
"D"FFA "D"ddddf|"A7"gefe edcd|
"D"fdfe "G"defe|"D"d2f2 A3/2f3/4a/4g/4f/4e/4d/4d/4c/4A/4A/4A/4A/4B/4A/4F/4F/4A/4B/4A/4(3aast
M:4/4
L:1/8
R:Hornpipe
K:D
P:A
(3D/2F/2|"G"GAGF dGBd|"Em"g2g2 fga|"D""D"fdcd "A7"EA(3ABc|"D"d3 d3:|
EOS

X: 204
T:Thce Dow Dewes Lours of Peen Bess
% Nottingham Music Database
S:AAAd FFTreassh, via EF
M:4/4
L:1/4
K:G
"G"GG "C"DE|"G"DG GF|"Em"GE 