In [1]:
from model import GPT, GPTConfig
import torch
from utils import *
import importlib

from mingpt_utils import set_seed
from mingpt_utils import sample_new, sample

import numpy as np
torch.cuda.empty_cache()

In [2]:
tokens = np.load('../data/formatted/tokens.npy', allow_pickle=True)
train = np.load('../data/shuffled/dataset_train.npy', allow_pickle=True)
midi_train = np.load('../data/shuffled/midi_train.npy', allow_pickle=True)
print(train.shape, midi_train.shape)

(43272, 1024) (43272, 1024, 8)


In [3]:
block_size = 1024
dataset = TokenDatasetMidi(train, midi_train, block_size, tokens)

data has 43272 pieces, 198 unique tokens.


In [4]:
epochs = 270
embedding = 256
heads = 4
layers = 4
batch_size = 128
learning_rate = 3e-5
num_workers = 4
midi_vocab = 128
token_size = len(tokens)

mconf = GPTConfig(token_size, block_size, midi_vocab, n_layer=layers, n_head=heads, n_embd=embedding)
session_model = GPT(mconf)

MODEL_NAME = "../models/model_"+ "epochs->" + str(epochs) + "_heads->" + str(heads) + "_embd->" + str(embedding) + "_batch->" + str(batch_size) + "_new_midi_embeddings"

session_model = load_model(MODEL_NAME, session_model)

03/27/2024 10:01:34 - INFO - model -   number of parameters: 3.269632e+06


Checkpoint loaded ../models/model_epochs->270_heads->4_embd->256_batch->128_new_midi_embeddings


In [5]:
import formats as fmt
import voicing as vc
voicing = vc.Voicing()

def generateSample(context, duration, style, tonality, session_model, dataset, split = True, temperature=1.0, sample=True, top_k=None, top_p=0.99):
    if split: 
        data, _ = fmt.getArrayOfElementsInChord(context, duration)
        print(data)
        data = ['<style>'] + [style] + ['Tonality'] + [tonality] + ['<start>'] + ['|'] + data
    else:
        data = context
    
    midi, _ = voicing.get_midi(data)
    # for d, m in zip(data, midi):
    #     print(d, m)

    i = 0
    while ( i < 200):    
        x = torch.tensor([dataset.stoi[s] for s in data], dtype=torch.long)[None,...].to('cuda')
        m = torch.tensor(midi, dtype=torch.long)[None,...].to('cuda')
        
        #print(x.shape, m.shape)
        y = sample_new(session_model, x, m, 1, temperature=temperature, sample=sample, top_k=top_k, top_p=top_p)[0]
        
        data = [dataset.itos[int(i)] for i in y if dataset.itos[int(i)]]
        
        if len(data) > 2:
            if data[-1] == data[-2]:
                print("Duplicated element: ", data[-1], data[-2])
                data = data[:-1]
                
        if data[-2] == '.' and data[-1] not in voicing.durations:
            print("Durations are not correct: ", data[-1], data[-2])
            data = data[:-2]
            
        if data[-2] in voicing.durations and data[-1] not in voicing.all_notes:
            print("Note is not correct: ", data[-1], data[-2])
            data = data[:-2]
            
        #print(data)
        midi, status = voicing.get_midi(data)
        if status == False:
            #erase the last element
            print("Error creating the MIDI format")
            break
        i+=1 

    #myChords = convertChordsFromOutput(data)
    print(data)
    return data

In [24]:
context = ['Fmaj7', 'Fmaj7', 'Bbmaj7']

divide = True
duration = np.full(len(context), 4.0, dtype=float)
myStyle = 'Pop'
tonality = 'F major'
data = generateSample(context, duration, myStyle, tonality, session_model, dataset, divide, temperature=1.2, sample=True, top_k=None, top_p=0.933)

c = 0
for e in data:
    if c % 20 == 0: 
        print()
    print(e, end=' ')
    c+=1

['.', '4.0', 'F', 'maj7', '.', '4.0', 'F', 'maj7', '.', '4.0', 'Bb', 'maj7']
['<style>', 'Pop', 'Tonality', 'F major', '<start>', '|', '.', '4.0', 'F', 'maj7', '.', '4.0', 'F', 'maj7', '.', '4.0', 'Bb', 'maj7', '|', '.', '4.0', 'Bb', 'maj7', '|', '.', '2.0', 'F', 'maj7', '.', '2.0', 'Bb', 'dom7', 'Form_A', '|', '.', '4.0', 'A', 'm', 'add 11', '|', '.', '4.0', 'D', 'dom7', 'add 11', '|', '.', '2.0', 'G', 'm7', 'add 9', '.', '2.0', 'D', 'dom7', 'add b9', '|', '.', '2.0', 'G', 'm7', 'add 9', '.', '1.0', 'D', 'm7', '/', 'C', '.', '1.0', 'G', 'dom7', '|', '.', '2.0', 'G', 'm7', '.', '2.0', 'D', 'dom7', 'add b9', '|', '.', '2.0', 'G', 'm7', 'add 9', '.', '1.0', 'D', 'm7', '/', 'C', '.', '1.0', 'G', 'dom7', '|', '.', '2.0', 'G', 'm7', '.', '2.0', 'D', 'dom7', 'add b9', '|', '.', '2.0', 'G', 'm7', 'add 9', '.', '1.0', 'D', 'm7', '/', 'C', '.', '1.0', 'G', 'dom7', 'add 9', '|', '.', '2.0', 'G', 'm7', '.', '2.0', 'D', 'dom7', 'add b9', '|', '.', '2.0', 'G', 'm7', 'add 9', '.', '1.0', 'D', 'm7', 

In [25]:
importlib.reload(vc)
voicing = vc.Voicing()

myStyle = data[1]
midi, _ = voicing.convert_chords_to_voicing(data)

voicing.export_to_midi(midi, "generated_"+myStyle)

song: 111311_27_3_2024_generated_Pop.mid
file: 111311_27_3_2024_generated_Pop.txt
MIDI file created! 
---------------------------------
