In [1]:
import random

import numpy as np
from torch.utils.data import DataLoader
import torch
from src.model.model import MusicTransformer
from pathlib import Path

from src.utils.data.guitar_dataset import GuitarDataset
from src.utils.data.random_guitar_seq_dataset import RandomGuitarSeqDataset
from src.utils.hyperparameters import BATCH_SIZE, BLOCK_SIZE, EMBEDDING_DIM, N_LAYER, N_HEAD, DROPOUT, VOCAB_SIZE, \
    LEARNING_RATE

In [2]:
data_root = Path("../data/")
midi_folder = data_root / "midi/"
augmented_folder = data_root / "augmented/"
tokenized_folder = data_root / "tokenized/"
splits_folder = data_root / "splits/"
train_tok_folder = tokenized_folder / "train-aug/"
val_tok_folder = tokenized_folder / "val/"
train_midi_folder = data_root / "train-midi/"
val_midi_folder = data_root / "val-midi/"

In [3]:
# hyper-parameters
batch_size = BATCH_SIZE

block_size = BLOCK_SIZE
n_embd = EMBEDDING_DIM
vocab_size = VOCAB_SIZE
n_layer = N_LAYER
n_head = N_HEAD
dropout = DROPOUT

learning_rate = LEARNING_RATE
training_split = 0.8

In [4]:
# setting the device
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [5]:
# AMP setup, works only on CUDA
use_amp = (device == "cuda")
if use_amp and hasattr(torch.cuda, "is_bf16_supported") and torch.cuda.is_bf16_supported():
    amp_dtype = torch.bfloat16
else:
    amp_dtype = torch.float16

scaler = torch.amp.GradScaler('cuda', enabled=use_amp)

In [6]:
print("Using device", device)

Using device cuda


In [7]:
# prepare datasets
train_files = sorted(train_tok_folder.glob("*.json"))
val_files = sorted(val_tok_folder.glob("*.json"))
random.shuffle(train_files)

# train_ds = GuitarDataset(block_size=block_size, stride=block_size // 2, file_list=train_files)
# val_ds = GuitarDataset(block_size=block_size, stride=block_size // 2, file_list=val_files)
train_ds = RandomGuitarSeqDataset(block_size=block_size, epoch_len=2000, file_list=train_files)
val_ds = RandomGuitarSeqDataset(block_size=block_size,  epoch_len=400, file_list=val_files)

In [8]:
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True, drop_last=True)
val_dl = DataLoader(val_ds, batch_size=32, shuffle=True, drop_last=True)

In [9]:
print("Training tokens count", train_dl.dataset.total_tokens)
print("Validation tokens count", val_dl.dataset.total_tokens)

Training tokens count 470450
Validation tokens count 56855


In [10]:
model = MusicTransformer(
    vocab_size=vocab_size,
    n_embd=n_embd,
    n_head=n_head,
    n_layer=n_layer,
    block_size=block_size,
    dropout=dropout
).to(device)

In [11]:
# using AdamW optimisation
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, betas=(0.9, 0.95), weight_decay=0.1)

In [12]:
print("Parameter count: ", sum([p.numel() for p in model.parameters()]))
print("Training on ", device)
print("Using amp?", use_amp)

Parameter count:  30877368
Training on  cuda
Using amp? False


In [13]:
# from src.model.model import Head
# import numpy as np
# x_test, y_test = train_ds.__getitem__(56)
# x_test, y_test = x_test.view(1,-1).to(device), y_test.view(1,-1).to(device)
#
print(-np.log(1/8000))
device
#
# logits, loss = model(x_test, y_test)
# loss

8.987196820661973


device(type='cuda')

In [14]:
epochs = 20
V = vocab_size
lnV = np.log(V)

for epoch in range(epochs):
    # ---- train -----
    model.train()
    for x, y in train_dl:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad(set_to_none=True)

        # using torch.autocast here with device_type to avoid backend-specific contexts
        with torch.amp.autocast('cuda', enabled=torch.cuda.is_available()):
            logits, loss = model(x, y)

        if use_amp:
            scaler.scale(loss).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

    # ----- validate -----
    model.eval()
    val_loss, total_tokens = 0.0, 0
    with torch.no_grad():
        for x, y in val_dl:
            x, y = x.to(device), y.to(device)
            _, l = model(x, y) # loss here is already mean loss per token for this batch
            # as we have already reshaped logits in forward when computing loss against targets
            num_tokens = y.numel()
            val_loss += l.item() * num_tokens
            total_tokens += num_tokens

    avg_loss = val_loss / total_tokens
    ppl = np.exp(avg_loss)
    bpc = avg_loss / np.log(2)
    improv_ratio = V / ppl
    delta_nats = lnV - avg_loss

    print(
        f"epoch {epoch:03d} "
        f"train {loss.item():.4f} "
        f"val_loss {avg_loss:.4f}  ppl {ppl:.0f}  "
        f"bpc {bpc:.3f}  Δnats {delta_nats:.3f}  x-better {improv_ratio:.2f}x  (lnV {lnV:.3f})"
    )

OutOfMemoryError: CUDA out of memory. Tried to allocate 188.00 MiB. GPU 0 has a total capacity of 15.45 GiB of which 84.44 MiB is free. Including non-PyTorch memory, this process has 15.16 GiB memory in use. Of the allocated memory 14.87 GiB is allocated by PyTorch, and 34.79 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [16]:
x, y = val_dl.dataset.__getitem__(15)
temp = x[:500]
out = model.generate(temp.view(1, -1).to(device), max_new_tokens=2500).cpu()
out

tensor([[2258,  456,   23,  ..., 1368,  519,  921]])

In [17]:
from miditok import REMI, TokenizerConfig

tokenizer = REMI(params=Path("../data/tokenized/config/tokenizer.json"))
print("Is trained", tokenizer.is_trained)
out_midi = tokenizer.decode(out[0])
temp_midi = tokenizer.decode(temp)

Is trained True


  from .autonotebook import tqdm as notebook_tqdm
  super().__init__(tokenizer_config, params)


In [19]:
out_midi.dump_midi(data_root / "output" / "test_gen.mid")
temp_midi.dump_midi(data_root / "output" / "input_input.mid")