In [1]:
import math
from typing import Tuple
import numpy as np
import copy
import torch
from torch import nn, Tensor
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils import data
from torch.autograd import Variable

# Transformer model

## embedding 

In [2]:
class Embedder(nn.Module):
    def __init__(self,num_token,d_model):
        super(Embedder,self).__init__()
        self.embed = nn.Embedding(num_token,d_model)
    def forward(self,x):
        return self.embed(x)

## positional encoder

In [4]:
class PositionalEncoder(nn.Module):
    def __init__(self, d_model, max_seq_len=2048):
        super().__init__()
        self.d_model = d_model
        
        # create constant "positoinal encoding pe" matrix with values dependant on 
        #  position and i
        pe = torch.zeros(max_seq_len,d_model)
        for pos in range(max_seq_len):
            for i in range(0,d_model,2):
                pe[pos,i] = math.sin(pos/(10000** ((2*i)/d_model)))
                pe[pos,i+1] = math.cos(pos/(10000** ((2*(i+1))/d_model)))
        pe = pe.unsqueeze(0)
        self.register_buffer("pe", pe)
        
    def forward(self,x):
        # make embedding larger so that its more important
        x = x * math.sqrt(self.d_model)
        
        # add constant to embedding
        seq_len = x.size(1)
        x = x + torch.nn.parameter.Parameter(self.pe[:,:seq_len], requires_grad=False).cuda()
        return x

## MultiHeadAttention

In [5]:
class MultiHeadAttention(nn.Module):
    def __init__(self, heads, d_model, dropout=0.2):
        super().__init__()
        
        self.d_model = d_model
        self.d_k = d_model // heads
        self.h = heads
        
        # claculate Q, K, V representation of the token
        self.q_linear = nn.Linear(d_model,d_model)
        self.v_linear = nn.Linear(d_model,d_model)
        self.k_linear = nn.Linear(d_model,d_model)
        self.dropout = nn.Dropout(dropout)
        self.out = nn.Linear(d_model,d_model)
        
    def forward(self,q,k,v,mask=None):
        bs = q.size(0)
        
        # perform linear operation and split into h heads
        k = self.k_linear(k).view(bs,-1,self.h,self.d_k)
        q = self.q_linear(q).view(bs,-1,self.h,self.d_k)
        v = self.v_linear(v).view(bs,-1,self.h,self.d_k)
        
        # transpose to get dimensions bs * heads * sequence_len * d_model
        k = k.transpose(1,2)
        q = q.transpose(1,2)
        v = v.transpose(1,2)
        
        scores = attention(q, k, v, self.d_k, None, self.dropout)
        
        # concatenate heads and put through final linear layer
        concat = scores.transpose(1,2).contiguous().view(bs,-1,self.d_model)
        
        output = self.out(concat)
        
        return output
    
    # calculate attention
def attention(q, k, v, d_k, mask=None, dropout=None):
    scores = torch.matmul(q,k.transpose(2,-1)) / math.sqrt(d_k)
    
    if mask is not None:
        mask = mask.unsqueeze(1)
        scores = scores.masked_fill_(mask == 0, -1e9)
    
    scores = nn.functional.softmax(scores, dim=-1)
    
    if dropout:
        scores = dropout(scores)
    
    output = torch.matmul(scores, v)
    return output

# Feed-forward Network

In [6]:
class FeedForward(nn.Module):
    def __init__(self,d_model,d_ff = 2048, dropout=0.2):
        super().__init__()
        self.linear1 = nn.Linear(d_model,d_ff)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(d_ff,d_model)
        self.relu = nn.ReLU()
    def forward(self,x):
        x = self.dropout(self.relu(self.linear1(x)))
        x = self.linear2(x)
        return x

# Layernorm

In [7]:
class Norm(nn.Module):
    def __init__(self, d_model, eps = 1e-6):
        super().__init__()
        
        self.size = d_model
        
        # create two learnable parameters to calibrate normalization
        self.alpha = nn.Parameter(torch.ones(self.size))
        self.bias = nn.Parameter(torch.zeros(self.size))
        self.eps = eps
    
    def forward(self,x):
        norm = self.alpha * (x - x.mean(dim=-1,keepdim=True))/(x.std(dim=-1,keepdim=True)+self.eps)+self.bias
        return norm

# Transformer

## Encoder and Decoder

In [8]:
# build one encoder layer with one multi-head attention layer and one feed-forward layer
class EncoderLayer(nn.Module):
    def __init__(self,d_model,heads,dropout=0.2):
        super().__init__()
        self.norm1 = Norm(d_model)
        self.norm2 = Norm(d_model)
        self.attn = MultiHeadAttention(heads, d_model)
        self.ff = FeedForward(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
    
    def forward(self,x,mask): # input (seq_len, embedding_dim)
        x2 = self.norm1(x) 
        x = x + self.dropout1(self.attn(x2,x2,x2,mask)) # 
        x2 = self.norm2(x)
        x = x + self.dropout2(self.ff(x2))
        return x

# a decoder layer with two multi-head attention layers and one feed-forward layer
class DecoderLayer(nn.Module):
    def __init__(self,d_model,heads,dropout=0.2):
        super().__init__()
        self.norm1 = Norm(d_model)
        self.norm2 = Norm(d_model)
        self.norm3 = Norm(d_model)
        
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)
        
        self.attn1 = MultiHeadAttention(heads,d_model)
        self.attn2 = MultiHeadAttention(heads,d_model)
        self.ff=FeedForward(d_model).cuda()
        
    def forward(self,x,e_outputs,src_mask,trg_mask):
        x2 = self.norm1(x)
        x = x + self.dropout1(self.attn1(x2,x2,x2,trg_mask))
        x2 = self.norm2(x)
        x = x + self.dropout2(self.attn2(x2,e_outputs,e_outputs,src_mask))
        x2 = self.norm3(x)
        x = x + self.dropout3(self.ff(x2))
        return x
    
    

In [9]:
# define a function that generate multiple layers:
def get_clones(module,N):
    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])

# encoder and decoder
class Encoder(nn.Module):
    def __init__(self,num_tokens,d_model,N,heads):
        super().__init__()
        self.N = N
        self.embed = Embedder(num_tokens,d_model) # num_tokens is vocabulary size
        self.pe = PositionalEncoder(d_model)
        self.layers = get_clones(EncoderLayer(d_model,heads), N)
        self.norm = Norm(d_model)
        
    def forward(self,src,mask): # src size (seq_len,)
        x = self.embed(src) # embed size (seq_len,embedding dimension) note: sometimes use hidden_dimension
        x = self.pe(x) 
        for i in range(N):
            x = self.layers[i](x,mask) # go though N layers, input (seq_len,embedding_dim), output 
        return self.norm(x)

class Decoder(nn.Module):
    def __init__(self,num_tokens,d_model,N,heads):
        super().__init__()
        self.N = N
        self.embed = Embedder(num_tokens,d_model)
        self.pe = PositionalEncoder(d_model)
        self.layers = get_clones(DecoderLayer(d_model,heads), N)
        self.norm = Norm(d_model)
    
    def forward(self, trg, e_outputs, src_mask, trg_mask):
        x = self.embed(trg)
        x = self.pe(x)
        for i in range(self.N):
            x = self.layers[i](x,e_outputs,src_mask,trg_mask)
        return self.norm(x)

## Transformer

In [10]:
class Transformer(nn.Module):
    def __init__(self,src_num_tokens,trg_num_tokens,d_model,N,heads):
        super().__init__()
        self.encoder = Encoder(src_num_tokens,d_model,N,heads)
        #self.decoder = Decoder(trg_num_tokens,d_model,N,heads)
        self.out = nn.Linear(d_model,trg_num_tokens)
    
    def forward(self, src, src_mask):
        e_outputs = self.encoder(src, src_mask)
        #d_output = self.decoder(trg,e_outputs,src_mask,trg_mask)
        output = self.out(e_outputs)
        return output

# customized dataset

In [11]:
class customizedDataset(data.Dataset):
    def __init__(self,npy_path):
        # read in data
        self.data = np.load(npy_path)
        print("Dataset shape: ", self.data.shape)
    
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self,index):
        sequence = self.data[index]
        input_seq = sequence[:-1]
        output_seq = sequence[1:]
        return (input_seq,output_seq)

## dataloders

In [11]:
train_dataset = customizedDataset("Customized Dataset - Pop/Train/customized_training_512.npy")
train_dataloader = data.DataLoader(train_dataset,batch_size=2,shuffle=True)

Dataset shape:  (24613, 512)


In [12]:

val_dataset = customizedDataset("Customized Dataset - Pop/Val/customized_val_512.npy")

set1 = list(range(0,len(val_dataset),2))
set2 = list(range(1,len(val_dataset),2))
#set3 = list(range(2,len(val_dataset),4))
#set4 = list(range(3,len(val_dataset),4))

val1 = data.Subset(val_dataset,set1)
val2 = data.Subset(val_dataset,set2)
#val3 = data.Subset(val_dataset,set3)
#val4 = data.Subset(val_dataset,set4)

size=2

val_dataloader1 = data.DataLoader(val1,batch_size=size,shuffle=True)
val_dataloader2 = data.DataLoader(val2,batch_size=size,shuffle=True)
#val_dataloader3 = data.DataLoader(val3,batch_size=size,shuffle=True)
#val_dataloader4 = data.DataLoader(val4,batch_size=size,shuffle=True)

val_dataloader = [val_dataloader1,
               val_dataloader2]

Dataset shape:  (2272, 512)


In [13]:
# check
X_train = next(iter(train_dataloader))
print(X_train[0].shape) # input seq
print(X_train[1].shape) # output seq

torch.Size([2, 511])
torch.Size([2, 511])


In [14]:
# check
X_val = next(iter(val_dataloader[0]))
print(X_val[0].shape) # input seq
print(X_val[1].shape) # output seq

torch.Size([2, 511])
torch.Size([2, 511])


In [15]:
len(train_dataloader)

12307

# Train and Validation Routine

In [20]:
def create_mask(seq):
    size = seq.size(1)
    nopeak_mask = np.triu(np.ones((1,size,size)),k=1).astype("uint8")
    nopeak_mask = (torch.from_numpy(nopeak_mask)==0)
    return nopeak_mask
    

In [21]:
def create_mask_pad(seq,paded_seq):
    return (seq != padded_seq).unsqueeze(1)

In [22]:
a = torch.zeros((3,3),dtype=torch.long)
a[0,0] = 1
a[1,1] = 1
a[2,2] = 1
b = torch.ones((3,3),dtype=torch.long)
b = b * 3
c = a & b
c

tensor([[1, 0, 0],
        [0, 1, 0],
        [0, 0, 1]])

In [23]:
def evaluate(model,val_dataloader,device,loss_fn,num_vocab):
    model.eval()
    loss = 0
    steps = 0 # keep track of number of batches 
    with torch.no_grad():
        for batch in val_dataloader:
            steps += 1
            input_seq, output_seq = batch
            input_seq, output_seq = input_seq.long().to(device), output_seq.long().to(device)
            output = model(input_seq,None) #shape (batch,seq_len,num_vocab)
            output_flatten = output.view(-1,num_vocab) #shape (x,num_vocab)
            # output shape (batch,seq_len)
            loss +=  loss_fn(output_flatten,output_seq.view(-1)).item()
    loss = loss / steps
    print(f"Validation loss: {loss:.6f}\n")
    return loss

In [29]:
from tqdm.auto import tqdm
def train(model,train_dataloader,val_dataloader,ep,print_frequency,loss_fn,
          optimizer,device,run,grad_clip=1,scheduler=None,
          num_vocab = 218,kind="Pop"):
    model.train()
    val_best = float("inf")
    
    # total number of training steps:
    num_steps = ep * len(train_dataloader) # len(train_dataloader) is how many batches for 1 epoch
    progress = tqdm(range(num_steps))
    
    # calculate how often print the result
    print_every = math.floor(num_steps/print_frequency) # this is the # of batches processed before print
    
    
    # initialize
    steps = 0
    train_loss = []
    val_loss = []
    current_loss = 0
    
    for e in range(ep):
        for batch in train_dataloader:
            steps += 1
            input_seq, output_seq = batch
            input_seq, output_seq = input_seq.long().to(device), output_seq.long().to(device)
            
            optimizer.zero_grad()
            output = model(input_seq,None) # don't need mask, there is no padding
            output_flatten = output.view(-1,num_vocab)
            loss = loss_fn(output_flatten,output_seq.view(-1))
            current_loss += loss.item()
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),grad_clip)
            optimizer.step()
            
            progress.update(1)
            
            # print 
            if steps % print_every == 0:
                current_loss = current_loss / print_every
                print(f"EPOCHS  : {e+1}/{ep} Loss: {current_loss:.6f}")
                train_loss.append(current_loss)
                current_loss = 0
                
                # validation
                which_val = np.random.randint(0,1)
                current_val_loss = evaluate(model,val_dataloader[which_val],device,loss_fn,num_vocab)
                val_loss.append(current_val_loss)
                
                # save model
                if current_val_loss < val_best:
                    val_best = current_val_loss
                    torch.save(model.state_dict(),f"TransformerWeights/{kind}Transformer-run-{run}-val_loss_BEST.pth")
                else:
                    torch.save(model.state_dict(),f"TransformerWeights/{kind}Transformer-run-{run}-val_loss_LAST.pth")
                
            model.train()
        if scheduler and (e + (ep*(run-1)) <= 3): # less than 8 epochs
            scheduler.step()
            
            
    return train_loss, val_loss
    

In [21]:
a = 0.01
print(f"{a:.3f}")

0.010


# Train! - Complex Pop Transformer

In [25]:
# get vocabulary size
from miditok import REMI, get_midi_programs
from miditoolkit import MidiFile
tokenizer = REMI()
num_vocab = len(tokenizer.vocab)

In [26]:
num_vocab

218

In [27]:
# initialize model
d_model = 1024
N = 12 # num of transformer encoder layers within transformer encoder
heads = 8
loss_fn = nn.CrossEntropyLoss()
src_num_tokens = num_vocab
trg_num_tokens = num_vocab




## first 20 epochs - 2 layer, 4 head, batch_size=32

In [28]:
lr = 1e-6 # learning rate
PopTransformer = Transformer(src_num_tokens,trg_num_tokens,d_model,N,heads).cuda()
optimizer = torch.optim.Adam(PopTransformer.parameters(),lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer,1,gamma=0.25,verbose=True)

# initializes the parameters with a range of values that stops the singal fading or 
#  getting too big
for p in PopTransformer.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)

Adjusting learning rate of group 0 to 1.0000e-06.


In [27]:
train_loss, val_loss = train(PopTransformer,train_dataloader,val_dataloader,5,50,
      loss_fn,optimizer,'cuda',1,grad_clip=1,scheduler=scheduler,
                             num_vocab=num_vocab,kind="ModifiedSuperComplexPop")

  0%|          | 0/61535 [00:00<?, ?it/s]

EPOCHS  : 1/5 Loss: 3.319919
Validation loss: 2.893273

EPOCHS  : 1/5 Loss: 2.926236
Validation loss: 2.823462

EPOCHS  : 1/5 Loss: 2.861809
Validation loss: 2.782711

EPOCHS  : 1/5 Loss: 2.834690
Validation loss: 2.752697

EPOCHS  : 1/5 Loss: 2.780121
Validation loss: 2.666330

EPOCHS  : 1/5 Loss: 2.701588
Validation loss: 2.579406

EPOCHS  : 1/5 Loss: 2.623405
Validation loss: 2.448453

EPOCHS  : 1/5 Loss: 2.484658
Validation loss: 2.214432

EPOCHS  : 1/5 Loss: 2.315582
Validation loss: 2.022099

EPOCHS  : 1/5 Loss: 2.181854
Validation loss: 1.836695

Adjusting learning rate of group 0 to 5.0000e-06.
EPOCHS  : 2/5 Loss: 2.064630
Validation loss: 1.735756

EPOCHS  : 2/5 Loss: 1.982906
Validation loss: 1.605459

EPOCHS  : 2/5 Loss: 1.888807
Validation loss: 1.488358

EPOCHS  : 2/5 Loss: 1.790541
Validation loss: 1.379290

EPOCHS  : 2/5 Loss: 1.701470
Validation loss: 1.280403

EPOCHS  : 2/5 Loss: 1.623118
Validation loss: 1.187325

EPOCHS  : 2/5 Loss: 1.542547
Validation loss: 1.106253

In [28]:
np.save("TransformerLoss/ModifiedSuperComplexPopTransformer_run1_trainloss.npy",np.array(train_loss))
np.save("TransformerLoss/ModifiedSuperComplexPopTransformer_run1_valloss.npy",np.array(val_loss))

In [30]:
PopTransformer.load_state_dict(torch.load("TransformerWeights/ModifiedSuperComplexPopTransformer-run-1-val_loss_0.36.pth"))

<All keys matched successfully>

In [31]:
train_loss, val_loss = train(PopTransformer,train_dataloader,val_dataloader,5,50,
      loss_fn,optimizer,'cuda',2,grad_clip=1,scheduler=scheduler,
                             num_vocab=num_vocab,kind="ModifiedSuperComplexPop")

  0%|          | 0/61535 [00:00<?, ?it/s]

EPOCHS  : 1/5 Loss: 0.786824
Validation loss: 0.335687

EPOCHS  : 1/5 Loss: 0.758714
Validation loss: 0.307433

EPOCHS  : 1/5 Loss: 0.732214
Validation loss: 0.283259

EPOCHS  : 1/5 Loss: 0.706173
Validation loss: 0.256800

EPOCHS  : 1/5 Loss: 0.676352
Validation loss: 0.233120

EPOCHS  : 1/5 Loss: 0.649305
Validation loss: 0.210312

EPOCHS  : 1/5 Loss: 0.610089
Validation loss: 0.185629

EPOCHS  : 1/5 Loss: 0.578247
Validation loss: 0.164072

EPOCHS  : 1/5 Loss: 0.551488
Validation loss: 0.149189

EPOCHS  : 1/5 Loss: 0.524380
Validation loss: 0.134440

EPOCHS  : 2/5 Loss: 0.502593
Validation loss: 0.123661

EPOCHS  : 2/5 Loss: 0.482453
Validation loss: 0.114390

EPOCHS  : 2/5 Loss: 0.467371
Validation loss: 0.105669

EPOCHS  : 2/5 Loss: 0.442775
Validation loss: 0.097588

EPOCHS  : 2/5 Loss: 0.428977
Validation loss: 0.091310

EPOCHS  : 2/5 Loss: 0.418423
Validation loss: 0.085575

EPOCHS  : 2/5 Loss: 0.398248
Validation loss: 0.080494

EPOCHS  : 2/5 Loss: 0.390843
Validation loss: 0.

In [32]:
np.save("TransformerLoss/ModifiedSuperComplexPopTransformer_run2_trainloss.npy",np.array(train_loss))
np.save("TransformerLoss/ModifiedSuperComplexPopTransformer_run2_valloss.npy",np.array(val_loss))

# Generate music

In [16]:
def generate_music(model,name,input_seq_len,output_seq_len,start=0):
    # get tokenized remi representation of the music
    tokenizer = REMI() # initialize tokenizer
    folder = "Generated Music/Input/"
    #midi = MidiFile(folder+name+".mid") # classical
    midi = MidiFile(folder+name+".midi")
    events_MIDI = tokenizer.midi_to_tokens(midi)
    events_MIDI = events_MIDI[0]
    
    # find the last bar
    while events_MIDI[start] != 1:
        start -= 1
    

    # the sequence will start with a bar
    input_seq = events_MIDI[start:start+input_seq_len]
    #assert input_seq_len <= 512
    print("input length: ",len(input_seq))
    
    
    predicted_seq = [1]
    
    # find the last bar event from the input seqeunce and append it into predicted_seq
    #position = len(input_seq) - 1
    #while input_seq[position] != 1:
        #position -= 1
    #predicted_seq.extend(input_seq[position:])

    # fed into model to obtain hidden state
    input_seq = torch.tensor(input_seq,dtype=torch.long)
    input_seq = input_seq.to("cuda")
    model.eval()
    #print(input_seq)
    
    
    #op = model(input_seq.unsqueeze(0),None)
    #op = op.view(-1,218)
    #op = nn.functional.softmax(op,dim=1)
    #op = op.squeeze(0)
    #predicted_events = op.argmax(1)
    #predicted_events = torch.cat((torch.tensor([1],device="cuda"),predicted_events))
    #predicted_events = predicted_events.cpu().detach().numpy()
    #print(predicted_events)
    #return predicted_events
    
    #pred = [1]
    #input [1,2,3,4]
    #output [2,3,4,5]
    #pred=[1,2]
    #input [2,3,4,5]
    #output [3,4,5,6]
    #pred=[1,2,3]
    
    
    for i in np.arange(output_seq_len):
        # use model to predict the next output
        #print("input: ",input_seq)
        op = model(input_seq.unsqueeze(0),None) # seq_len, hidden_size
        op = op.view(-1,218)
        # the latest predicted event
        op = nn.functional.softmax(op,dim=1)
        #print("model output shape: ", op.shape)
        op = op.squeeze(0)
        op = op.argmax(1)
        predicted_seq.append(op[-1].cpu().detach().numpy().item())
        #print(len(input_seq))
        #print(input_seq)
        input_seq = input_seq[1:]
        #print(len(input_seq))
        #print(torch.tensor([op[-1]],device="cuda"))
        #print(input_seq)
        input_seq = torch.cat((input_seq,torch.tensor([op[-1]],device="cuda")))
        #print(len(input_seq))
        #print(input_seq)
        #assert len(input_seq) == input_seq_len
        
        
        
        
        #print("squzzezed output shape: ", op.shape)
        #beginning_event = op.argmax(1)[0]
        #predicted_event = op.argmax(1)[-1]
        #print("output: ",op.argmax(1))
        #print(beginning_event)
        #print(predicted_event)
        #print("length of predicted event: ",predicted_event.shape)
        #print("predicted_event ", predicted_event)
        
        
        # append the event
        #input_seq = torch.cat((input_seq,torch.tensor([predicted_event],device="cuda")))
        #input_seq = op # since appended the newest predicted event, remove the first event
        #print("new input: ", input_seq)
        #predicted_seq.append(predicted_event.cpu().detach().numpy().item())
        #print(predicted_seq)
    
    #predicted_seq.extend(op[1:].cpu().detach().numpy())
        
    print(predicted_seq)
    
    return predicted_seq


In [12]:
a = torch.tensor([1,2,3])
b = torch.tensor(3)

print(b.shape)
a = torch.cat((a,torch.tensor([b])))
a

torch.Size([])


tensor([1, 2, 3, 3])

In [13]:
a = np.array([1,2,3])
a = np.insert(a,0,10)


## initialize model

In [13]:
# get vocabulary size
from miditok import REMI, get_midi_programs
from miditoolkit import MidiFile
tokenizer = REMI()
num_vocab = len(tokenizer.vocab)

In [14]:
# initialize model
d_model = 1024
N = 12 # num of transformer encoder layers within transformer encoder
heads = 8
loss_fn = nn.CrossEntropyLoss()
src_num_tokens = num_vocab
trg_num_tokens = num_vocab

transformer = Transformer(src_num_tokens,trg_num_tokens,d_model,N,heads)


In [15]:
transformer.load_state_dict(torch.load("TransformerWeights/SecondFinestComplexPopTransformer-run-3-val_loss_0.11.pth"))

<All keys matched successfully>

In [19]:
music = "001"
#transformer = PopTransformer
output_seq = generate_music(transformer.cuda(),music,1024,500,0)

input length:  1024
[1, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 201, 76, 208, 43, 81, 86, 108, 57, 187, 179, 71, 2

In [127]:
len(output_seq)

1011

In [20]:
model_name = "FinestComplexPopTransformer"
run = "1"
mode = "0.11"

output_path = "Generated Music/"+model_name+"/run-"+run+"-"+music+"-"+mode+".midi"
#music_path = "Generated Music/Input/" + music + ".mid" # classical
music_path = "Generated Music/Input/" + music + ".midi"
tokenizer = REMI()
midi = MidiFile(music_path)
output_midi = tokenizer.tokens_to_midi([output_seq],get_midi_programs(midi))

output_midi.dump(output_path)

Generated Music/FinestComplexPopTransformer/run-1-001-0.11.midi
