In [1]:
from model import GPT, GPTConfig
import torch
from utils import *
import importlib

from mingpt_utils import set_seed
from mingpt_utils import sample_new, sample

import numpy as np
torch.cuda.empty_cache()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokens = np.load('../data/formatted/tokens.npy', allow_pickle=True)
train = np.load('../data/shuffled/dataset_train.npy', allow_pickle=True)
midi_train = np.load('../data/shuffled/midi_train.npy', allow_pickle=True)
print(train.shape, midi_train.shape)

(43272, 1024) (43272, 1024, 8)


In [3]:
block_size = 1024
dataset = TokenDatasetMidi(train, midi_train, block_size, tokens)

data has 43272 pieces, 198 unique tokens.


In [4]:
epochs = 100
embedding = 512
heads = 4
layers = 4
batch_size = 128
learning_rate = 3e-5
num_workers = 4
midi_vocab = 128
token_size = len(tokens)

mconf = GPTConfig(token_size, block_size, midi_vocab, n_layer=layers, n_head=heads, n_embd=embedding)
session_model = GPT(mconf)

MODEL_NAME = "../models/model_"+ "epochs->" + str(epochs) + "_heads->" + str(heads) + "_embd->" + str(embedding) + "_batch->" + str(batch_size) + "_new_midi_embeddings"

session_model = load_model(MODEL_NAME, session_model)

05/20/2024 10:05:06 - INFO - model -   number of parameters: 1.283021e+07


Checkpoint loaded ../models/model_epochs->100_heads->4_embd->512_batch->128_new_midi_embeddings


In [5]:
import formats as fmt
import voicing as vc
voicing = vc.Voicing()

def generateSample(context, duration, style, tonality, session_model, dataset, split = True, temperature=1.0, sample=True, top_k=None, top_p=0.99):
    if split: 
        data, _ = fmt.getArrayOfElementsInChord(context, duration)
        print(data)
        data = ['<style>'] + [style] + ['Tonality'] + [tonality] + ['<start>'] + ['|'] + data
    else:
        data = context
    
    midi, _ = voicing.get_midi(data)
    # for d, m in zip(data, midi):
    #     print(d, m)

    i = 0
    while ( i < 90):    
        x = torch.tensor([dataset.stoi[s] for s in data], dtype=torch.long)[None,...].to('cuda')
        m = torch.tensor(midi, dtype=torch.long)[None,...].to('cuda')
        
        #print(x.shape, m.shape)
        y = sample_new(session_model, x, m, 1, temperature=temperature, sample=sample, top_k=top_k, top_p=top_p)[0]
        
        data = [dataset.itos[int(i)] for i in y if dataset.itos[int(i)]]
        
        if len(data) > 2:
            if data[-1] == data[-2]:
                print("Duplicated element: ", data[-1], data[-2])
                data = data[:-1]
                
        if data[-2] == '.' and data[-1] not in voicing.durations:
            print("Durations are not correct: ", data[-1], data[-2])
            data = data[:-2]
            
        if data[-2] in voicing.durations and data[-1] not in voicing.all_notes:
            print("Note is not correct: ", data[-1], data[-2])
            data = data[:-2]
            
        #print(data)
        midi, status = voicing.get_midi(data)
        if status == False:
            #erase the last element
            print("Error creating the MIDI format")
            break
        i+=1 

    #myChords = convertChordsFromOutput(data)
    #print(data)
    return data

In [6]:
context = ['Cmaj7', 'Dm7', 'Em7 add 9']

divide = True
duration = np.full(len(context), 4.0, dtype=float)
myStyle = 'Jazz'
tonality = 'Bb major'
data = generateSample(context, duration, myStyle, tonality, session_model, dataset, divide, temperature=1.1, sample=True, top_k=None, top_p=0.999)

c = 0
for e in data:
    if c % 20 == 0: 
        print()
    print(e, end=' ')
    c+=1

print('\n-------------------------\n')
seq = voicing.convertChordsFromOutput(data)
print(seq)

['.', '4.0', 'C', 'maj7', '.', '4.0', 'D', 'm7', '.', '4.0', 'E', 'm7', 'add 9']

<style> Jazz Tonality Bb major <start> | . 4.0 C maj7 . 4.0 D m7 . 4.0 E m7 add 9 | 
. 4.0 F m7 add 9 | . 4.0 A m7 add 9 | . 4.0 Ab dom7 alter #5 | . 4.0 
C# m7 add 9 | . 4.0 C# m7 add 9 | . 4.0 Bb m7 | . 4.0 B m7 add 9 
| . 4.0 E dom7 alter #5 | . 4.0 E m7 add 9 | . 4.0 E# dom7 add 9 | . 
4.0 E m7 add 9 | . 4.0 A dom7 add 13 | . 4.0 G# m7 add 9 | . 4.0 G 
dom7 add #9 Form_Coda b|| . 4.0 D dom7 add b9 
-------------------------

['Cmaj7', 'Dm7', 'Em7 add 9', 'Fm7 add 9', 'Am7 add 9', 'Ab7 alter #5', 'C#m7 add 9', 'C#m7 add 9', 'Bbm7', 'Bm7 add 9', 'E7 alter #5', 'Em7 add 9', 'E#7 add 9', 'Em7 add 9', 'A7 add 13', 'G#m7 add 9', 'G7 add #9']


In [7]:
importlib.reload(vc)
voicing = vc.Voicing()

myStyle = data[1]
midi, _ = voicing.convert_chords_to_voicing(data)

name = voicing.export_to_midi(midi, "generated_"+myStyle)

song: 120508_20_5_2024_generated_Jazz.mid
file: 120508_20_5_2024_generated_Jazz.txt
MIDI file created! 
---------------------------------


In [24]:
import voicing as vc
importlib.reload(vc)
voicing = vc.Voicing()

path = "/workspace/data/midi_files/"+name
voicing.play_midi(path)

In [25]:
voicing.MidiChord()

MIDI file generated:  122600_20_5_2024_detuned_Cmaj_chord.mid
