In [1]:
from data_preprocessing import get_midis_by_composer, process_midis_to_text

composer = "mozart"
mozart_midis = get_midis_by_composer(composer)

# [[train texts], [val texts], [test texts]]
mozart_texts = [[],[],[]]

for i in range(len(mozart_midis)):
    mozart_texts[i] = process_midis_to_text(mozart_midis[i], composer)

Now loading MIDIs from data\train.
Could not load data\train\mozart-piano_sonatas-nueva_carpeta-k281_piano_sonata_n03_3mov.mid: Could not decode key with 2 flats and mode 2
Could not load data\train\unknown_artist-i_o-mozart_k550.mid: MThd not found. Probably not a MIDI file
Loaded 231 MIDI files from data\train
Now loading MIDIs from data\val.
Loaded 20 MIDI files from data\val
Now loading MIDIs from data\test.
Could not load data\test\unknown_artist-i_o-mozart_q1_2.mid: MThd not found. Probably not a MIDI file
Loaded 22 MIDI files from data\test
273 MIDI files retrieved.
Successfully processed 231 MIDIs into text.
Successfully processed 20 MIDIs into text.
Successfully processed 22 MIDIs into text.


In [2]:
from data_preprocessing import VocabBuilder
import torch

# Training sequences
mozart_training_texts = mozart_texts[0]

# Build vocab from training data
vb = VocabBuilder(mozart_training_texts)
train_ids = vb. train_ids

# Encode validation and testing data texts
val_ids = torch.tensor([tok for seq in mozart_texts[1] for tok in vb.encode(seq)], dtype=torch.long)
test_ids = torch.tensor([tok for seq in mozart_texts[2] for tok in vb.encode(seq)], dtype=torch.long)

Vocabulary size (train only): 607


In [13]:
import importlib
import model_helpers, models

importlib.reload(model_helpers)
importlib.reload(models)

<module 'models' from 'c:\\Users\\racer\\OneDrive - Duke University\\CS372\\project_clone\\classical-music-generation-model\\models.py'>

In [9]:
from models import MidiTextTransformer, train_midi_text_transformer

vocab_size = vb.vocab_size

model = MidiTextTransformer(vocab_size=vocab_size, d_model=256, n_head=4, n_layer=6,
                          dim_ff=512, block_size=512)

trained_model = train_midi_text_transformer(
    model,
    train_ids=train_ids,
    val_ids=val_ids,
    vocab_size=vocab_size,
    max_iters=6000,
    eval_interval=250,
    lr=3e-4,
)

step 0: train loss 6.085, acc 0.049 | val loss 6.089, acc 0.051
step 250: train loss 2.220, acc 0.439 | val loss 2.321, acc 0.435
step 500: train loss 1.751, acc 0.525 | val loss 1.929, acc 0.501
step 750: train loss 1.640, acc 0.547 | val loss 1.777, acc 0.526
step 1000: train loss 1.561, acc 0.564 | val loss 1.720, acc 0.540
step 1250: train loss 1.419, acc 0.592 | val loss 1.614, acc 0.564
step 1500: train loss 1.435, acc 0.590 | val loss 1.617, acc 0.565
step 1750: train loss 1.428, acc 0.597 | val loss 1.649, acc 0.553
step 2000: train loss 1.362, acc 0.612 | val loss 1.518, acc 0.587
step 2250: train loss 1.401, acc 0.600 | val loss 1.466, acc 0.594
step 2500: train loss 1.309, acc 0.618 | val loss 1.554, acc 0.582
step 2750: train loss 1.303, acc 0.628 | val loss 1.500, acc 0.592
step 3000: train loss 1.267, acc 0.634 | val loss 1.452, acc 0.599
step 3250: train loss 1.255, acc 0.637 | val loss 1.484, acc 0.593
step 3500: train loss 1.296, acc 0.626 | val loss 1.415, acc 0.608
s

In [19]:
from models import generate_midi_tokens_with_transformer
from data_preprocessing import SEQ_SOS, SEQ_EOS

# IDs for special tokens
SOS_ID = vb.stoi[SEQ_SOS]
EOS_ID = vb.stoi[SEQ_EOS]

# Seed with first few tokens from the first piece in the testing set
seed_tokens = vb.encode(mozart_texts[2][0])[:50]

generated_ids = generate_midi_tokens_with_transformer(
    model,
    sos_id=SOS_ID,
    eos_id=EOS_ID,
    start_tokens=seed_tokens,
    max_new_tokens=10000,
)

generated_text = vb.decode(generated_ids)

print("First 200 chars of generated text:\n")
print(generated_text[:200])


First 200 chars of generated text:

<SOS> COMPOSER_mozart KEY_G TIME_SIGNATURE_2/4 TEMPO_BPM_110 MEASURE BEAT POS_0 NOTE_55 DUR_48 VEL_7 NOTE_71 DUR_48 VEL_7 NOTE_79 DUR_48 VEL_7 BEAT POS_0 NOTE_50 DUR_48 VEL_7 NOTE_79 DUR_12 VEL_7 POS_


In [20]:
from midi_conversion import text_to_midi
import os

mid = text_to_midi(generated_text)

# Create output directory if it doesn't exist
os.makedirs("generated", exist_ok=True)

# Save to path
output_path = os.path.join("generated", "mozart_output.mid")
mid.save(output_path)