In [1]:
import os
import shutil

source_dir = './maestro-v2.0.0'  # Replace with the actual path if needed
destination_dir = './maestro_flat' # Directory to store flattened files

# Create the destination directory if it doesn't exist
if not os.path.exists(destination_dir):
    os.makedirs(destination_dir)

# Walk through the source directory
for root, _, files in os.walk(source_dir):
    for file in files:
        # Check if the file is a MIDI file
        if file.endswith('.midi') or file.endswith('.mid'):
            source_path = os.path.join(root, file)
            destination_path = os.path.join(destination_dir, file)
            # Move the file
            shutil.move(source_path, destination_path)

print(f"Flattening complete. MIDI files are now in '{destination_dir}'")

Flattening complete. MIDI files are now in './maestro_flat'


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchaudio
import numpy as np
import os
from pathlib import Path
import matplotlib.pyplot as plt
from tqdm import tqdm
import math
import glob
from pathlib import Path
import torchaudio.transforms as T


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/opt/conda/lib/python3.11/runpy.py", line 198, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/opt/conda/lib/python3.11/runpy.py", line 88, in _run_code
    exec(code, run_globals)
  File "/home/aboomina/task3kernel/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/aboomina/task3kernel/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/home/aboomina/ta

In [2]:
MIDI_DIR = "./maestro_flat/"
SEQ_LEN = 512
BATCH_SIZE = 8
EPOCHS = 10
EMBED_DIM = 512
NUM_HEADS = 8
NUM_LAYERS = 6
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
TOKENIZER_DIR = "./tokenizer_config/"

In [3]:
from tqdm import tqdm
from miditok import REMI
from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
tokenizer = REMI()
tokenizer.save_params(Path(TOKENIZER_DIR))  # Save tokenizer config

# Tokenize all MIDI files
token_sequences = []
for file in os.listdir(MIDI_DIR):
    if file.endswith(".mid") or file.endswith(".midi"):
        tokens = tokenizer.midi_to_tokens(Path(MIDI_DIR) / file)
        token_ids = tokens[0].ids
        
        if len(token_ids) >= SEQ_LEN:
            token_sequences.append(token_ids)

  tokenizer.save_params(Path(TOKENIZER_DIR))  # Save tokenizer config
  tokens = tokenizer.midi_to_tokens(Path(MIDI_DIR) / file)


In [5]:
class MidiDataset(Dataset):
    def __init__(self, sequences, seq_len):
        self.data = []
        for seq in sequences:
            for i in range(0, len(seq) - seq_len, seq_len):
                chunk = seq[i:i+seq_len]
                self.data.append(chunk)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = torch.tensor(self.data[idx][:-1], dtype=torch.long)
        y = torch.tensor(self.data[idx][1:], dtype=torch.long)
        return x, y

dataset = MidiDataset(token_sequences, SEQ_LEN)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [6]:
class MidiTransformer(nn.Module):
    def __init__(self, vocab_size, embed_dim, num_heads, num_layers):
        super().__init__()
        self.token_emb = nn.Embedding(vocab_size, embed_dim)
        self.pos_emb = nn.Parameter(torch.zeros(1, SEQ_LEN - 1, embed_dim))
        decoder_layer = nn.TransformerDecoderLayer(d_model=embed_dim, nhead=num_heads)
        self.transformer = nn.TransformerDecoder(decoder_layer, num_layers)
        self.fc_out = nn.Linear(embed_dim, vocab_size)

    def forward(self, x):
        # x shape: (batch, seq_len)
        x = self.token_emb(x) + self.pos_emb[:, :x.size(1), :]
        x = x.permute(1, 0, 2)  # (seq_len, batch, embed_dim)
        tgt_mask = nn.Transformer.generate_square_subsequent_mask(x.size(0)).to(x.device)
        out = self.transformer(x, x, tgt_mask=tgt_mask)
        out = self.fc_out(out)
        return out.permute(1, 0, 2)  # (batch, seq_len, vocab)

# --------- Training ---------


In [None]:
model = MidiTransformer(vocab_size=len(tokenizer), embed_dim=EMBED_DIM,
                        num_heads=NUM_HEADS, num_layers=NUM_LAYERS).to(DEVICE)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

print(f"Training on {DEVICE}...")
for epoch in range(EPOCHS):
    model.train()
    epoch_loss = 0
    for batch in tqdm(dataloader):
        x, y = batch
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output.reshape(-1, output.size(-1)), y.reshape(-1))
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {epoch_loss/len(dataloader):.4f}")

torch.save(model.state_dict(), "midi_transformer.pth")

In [7]:
model = MidiTransformer(vocab_size=len(tokenizer), embed_dim=EMBED_DIM,
                        num_heads=NUM_HEADS, num_layers=NUM_LAYERS).to(DEVICE)
model.load_state_dict(torch.load("midi_transformer.pth", map_location=DEVICE))
model.eval()

MidiTransformer(
  (token_emb): Embedding(282, 512)
  (transformer): TransformerDecoder(
    (layers): ModuleList(
      (0-5): 6 x TransformerDecoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (multihead_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=512, bias=True)
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
        (dropout3): Dropout(

In [9]:
import torch
from pathlib import Path
from miditok import REMI, TokSequence

# Initialize tokenizer
tokenizer = REMI(params=Path("tokenizer_config/tokenizer.json"))

# Tokenize MIDI file
tokenized_midi = tokenizer.midi_to_tokens("maestro_flat/seed1.midi")[0]  # Only use the first track

# Take the first N tokens as seed (e.g., 32)
generated = tokenized_midi.ids[:32]
print(generated)

# Define your constants (replace with actual values)
for _ in range(SEQ_LEN):
    input_seq = torch.tensor(generated[-(SEQ_LEN - 1):], dtype=torch.long).unsqueeze(0).to(DEVICE)
    with torch.no_grad():
        output = model(input_seq)
    next_token = torch.argmax(output[0, -1], dim=-1).item()
    generated.append(next_token)

# Convert Tokens to MIDI
sequence = TokSequence(ids=generated)
print(sequence)
print(type(sequence))

# Complete missing token info (like strings)
tokenizer.complete_sequence(sequence)
print(sequence)
print(type(sequence))

# Convert to MIDI
pls = Path("new")
tokenizer.decode(tokens=[sequence], programs=None, output_path=Path("new"))





  tokenized_midi = tokenizer.midi_to_tokens("maestro_flat/seed1.midi")[0]  # Only use the first track


[4, 206, 26, 110, 135, 215, 33, 112, 131, 4, 189, 41, 113, 126, 194, 41, 114, 127, 197, 33, 113, 128, 200, 26, 113, 126, 204, 26, 111, 130, 208, 33]
TokSequence(tokens=[], ids=[4, 206, 26, 110, 135, 215, 33, 112, 131, 4, 189, 41, 113, 126, 194, 41, 114, 127, 197, 33, 113, 128, 200, 26, 113, 126, 204, 26, 111, 130, 208, 33, 113, 126, 200, 26, 111, 130, 208, 26, 113, 126, 200, 26, 111, 130, 208, 26, 113, 126, 200, 26, 111, 130, 208, 26, 113, 126, 200, 26, 111, 130, 208, 26, 113, 126, 200, 26, 111, 130, 208, 26, 113, 126, 200, 26, 111, 130, 208, 26, 113, 126, 200, 26, 111, 130, 208, 26, 113, 126, 200, 26, 111, 130, 208, 26, 113, 126, 200, 26, 111, 130, 208, 26, 113, 126, 208, 26, 111, 130, 200, 26, 113, 126, 208, 26, 111, 130, 208, 26, 113, 126, 208, 26, 111, 126, 208, 26, 113, 130, 208, 26, 111, 126, 208, 26, 113, 126, 208, 26, 111, 126, 208, 26, 113, 126, 208, 26, 111, 126, 208, 26, 113, 126, 208, 26, 111, 126, 208, 26, 113, 208, 208, 208, 208, 26, 111, 126, 208, 208, 208, 208, 208, 208

RuntimeError: File not found