In [3]:
from miditok import REMI, TokenizerConfig
from miditok.pytorch_data import DatasetMIDI, DataCollator
from miditok.utils import split_files_for_training
from torch.utils.data import DataLoader
from pathlib import Path
from transformers import GPT2LMHeadModel, GPT2Config, get_linear_schedule_with_warmup
from tqdm.notebook import tqdm
import torch
from torch.optim import Adam

In [5]:
files_paths = list(Path("/Users/nicholasbarsi-rhyne/Projects/classical_music_generator/db").glob("**/*.mid"))

In [78]:
config = TokenizerConfig(num_velocities=16,
                         use_chords=True,
                         use_programs=True)
tokenizer = REMI(config)

if not Path("tokens").exists():
    Path("tokens").mkdir()

# Train the tokenizer with Byte Pair Encoding (BPE)
files_paths = list(Path("/Users/nicholasbarsi-rhyne/Projects/classical_music_generator/db").glob("**/*.mid"))
tokenizer.train(vocab_size=500, files_paths=files_paths)
tokenizer.save(Path("tokens", "tokenizer.json"))






In [79]:
tokenizer

500 tokens with ('T',) io format (one token stream), trained with BPE

In [137]:
dataset_chunks_dir = Path("tokens", "midi_chunks")
split_files_for_training(
    files_paths=files_paths,
    tokenizer=tokenizer,
    save_dir=dataset_chunks_dir,
    max_seq_len=100,
)

# Create a Dataset, a DataLoader and a collator to train a model
dataset = DatasetMIDI(
    files_paths=list(dataset_chunks_dir.glob("**/*.mid")),
    tokenizer=tokenizer,
    max_seq_len=100,
    bos_token_id=tokenizer["BOS_None"],
    eos_token_id=tokenizer["EOS_None"],
)
collator = DataCollator(tokenizer.pad_token_id, copy_inputs_as_labels=True)
dataloader = DataLoader(dataset, batch_size=16, collate_fn=collator, shuffle=True)

# Initialize GPT-2 configuration and model
config = GPT2Config(
    vocab_size=tokenizer.vocab_size,  # Match your tokenizer's vocab size
    n_positions=100,  # Maximum sequence length
    n_ctx=100,
    n_embd=300,  # Embedding dimension
    n_layer=3,  # Number of transformer layers
    n_head=3,  # Number of attention heads
    pad_token_id=tokenizer.pad_token_id
)

model = GPT2LMHeadModel(config)
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')

# Setup training parameters
learning_rate = 5e-5
epochs = 20
batch_size = 64  # Adjust based on your GPU memory
warmup_steps = 1000

# Initialize optimizer and scheduler
optimizer = Adam(model.parameters(), lr=learning_rate)
scheduler = get_linear_schedule_with_warmup(
    optimizer, 
    num_warmup_steps=warmup_steps,
    num_training_steps=len(dataloader) * epochs
)

  split_files_for_training(


In [148]:
tokens = next(iter(dataloader))['input_ids'][0].tolist()
print(tokenizer.decode(tokens))
print(tokens)

Score(ttype=Tick, tpq=8, begin=0, end=69, tracks=1, notes=34, time_sig=1, key_sig=0, markers=0)
[415, 176, 446, 421, 179, 463, 421, 430, 421, 182, 446, 421, 185, 286, 25, 421, 430, 421, 189, 446, 421, 192, 430, 417, 37, 421, 195, 446, 410, 198, 479, 417, 58, 108, 136, 201, 447, 417, 55, 410, 204, 470, 417, 53, 410, 415, 176, 447, 417, 55, 410, 179, 479, 417, 50, 427, 182, 447, 417, 55, 410, 185, 470, 417, 53, 410, 188, 447, 417, 55, 410, 191, 473, 427, 286, 24, 421, 194, 431, 427, 198, 473, 427, 448, 421, 201, 431, 427, 204, 473, 427, 286, 24, 421, 0, 0, 0, 0, 0, 0]


In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.train()

for epoch in range(epochs):
    total_loss = 0
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
    
    for batch in progress_bar:
        # Get inputs
        inputs = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        # Forward pass
        outputs = model(
            input_ids=inputs,
            attention_mask=attention_mask,
            labels=labels
        )
        
        loss = outputs.loss
        total_loss += loss.item()
        
        # Update progress bar
        progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})
        
        # Backward pass
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
    
    avg_loss = total_loss / len(dataloader)
    print(f'Epoch {epoch+1} average loss: {avg_loss:.4f}')

# Save the model
model.save_pretrained('midi_gpt2_model')
tokenizer.save_pretrained('midi_gpt2_model')

Epoch 1/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 1 average loss: 5.9409


Epoch 2/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 2 average loss: 5.2907


Epoch 3/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 3 average loss: 4.7903


Epoch 4/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 4 average loss: 4.3164


Epoch 5/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 5 average loss: 3.7934


Epoch 6/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 6 average loss: 3.3533


Epoch 7/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 7 average loss: 2.9972


Epoch 8/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 8 average loss: 2.7265


Epoch 9/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 9 average loss: 2.5251


Epoch 10/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 10 average loss: 2.3740


Epoch 11/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 11 average loss: 2.2586


Epoch 12/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 12 average loss: 2.1681


Epoch 13/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 13 average loss: 2.0953


Epoch 14/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 14 average loss: 2.0353


Epoch 15/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 15 average loss: 1.9846


Epoch 16/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 16 average loss: 1.9405


Epoch 17/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 17 average loss: 1.9085


Epoch 18/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 18 average loss: 1.8862


Epoch 19/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 19 average loss: 1.8710


Epoch 20/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 20 average loss: 1.8621


In [18]:
def generate_midi(
    model,
    tokenizer,
    prompt=None,
    max_length=512,
    temperature=0.7,
    num_return_sequences=1,
    top_k=50,
    top_p=0.95
):
    # Set the model to evaluation mode
    model.eval()
    
    # If no prompt is provided, start with the BOS token
    if prompt is None:
        # Use the correct way to access the BOS token
        input_ids = torch.tensor([[tokenizer["BOS_None"]]]).to(device)
    else:
        # For miditok tokenizers, encode might work differently
        tokens = tokenizer(prompt)
        input_ids = torch.tensor([tokens]).to(device)
    
    # Generate
    with torch.no_grad():
        outputs = model.generate(
            input_ids,
            max_length=max_length,
            temperature=temperature,
            num_return_sequences=num_return_sequences,
            top_k=top_k,
            top_p=top_p,
            pad_token_id=tokenizer['PAD_None'],  # Use the tokenizer's actual pad token ID
            eos_token_id=tokenizer["EOS_None"],   # Use the tokenizer's actual EOS token ID
            do_sample=True
        )
    
    # Decode the sequences to MIDI objects
    generated_midis = []
    for sequence in outputs:
        # Convert the sequence to a list first
        tokens_list = sequence.cpu().numpy().tolist()
        # Use the appropriate decode method for miditok
        midi_obj = tokenizer.decode(tokens_list)
        generated_midis.append(midi_obj)
    
    return generated_midis

# Load the saved model and tokenizer
model = GPT2LMHeadModel.from_pretrained('midi_gpt2_model')
model.to(device)

# Generate new music
sequences = generate_midi(
    model,
    tokenizer,
    prompt=None,  # Can provide a starting sequence if desired
    temperature=0.7,  # Adjust for more/less randomness
    num_return_sequences=15
)
sequences[1].dump_midi("generated_classical.mid")

In [81]:
import torch
import torch.nn as nn

class BidirectionalLSTMModel(nn.Module):
    def __init__(self, vocab_size, hidden_size1=512, hidden_size2=256, embedding_dim=100):
        super(BidirectionalLSTMModel, self).__init__()
        
        # Embedding layer to convert token IDs to vectors
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        # First Bidirectional LSTM layer
        self.lstm1 = nn.LSTM(
            input_size=embedding_dim,
            hidden_size=hidden_size1,
            batch_first=True,
            bidirectional=True
        )
        
        self.dropout1 = nn.Dropout(0.1)
        
        # Second Bidirectional LSTM layer
        self.lstm2 = nn.LSTM(
            input_size=hidden_size1 * 2,
            hidden_size=hidden_size2,
            batch_first=True,
            bidirectional=True
        )
        
        self.dense1 = nn.Linear(hidden_size2 * 2, hidden_size2)
        self.dropout2 = nn.Dropout(0.1)
        
        # Output layer (predicts next token)
        self.dense2 = nn.Linear(hidden_size2, vocab_size)
        
    def forward(self, x):
        # Convert token IDs to embeddings
        x = self.embedding(x)
        
        # First LSTM layer
        lstm1_out, _ = self.lstm1(x)
        lstm1_out = self.dropout1(lstm1_out)
        
        # Second LSTM layer
        lstm2_out, _ = self.lstm2(lstm1_out)
        
        # For language modeling, we need predictions for each position
        # Reshape to (batch_size * seq_length, hidden_size)
        batch_size, seq_length, hidden_size = lstm2_out.shape
        lstm2_out = lstm2_out.contiguous().view(-1, hidden_size)
        
        # Dense layers
        dense1_out = self.dense1(lstm2_out)
        dense1_out = self.dropout2(dense1_out)
        
        # Output layer (logits for each token in vocabulary)
        logits = self.dense2(dense1_out)
        
        # Reshape back to (batch_size, seq_length, vocab_size)
        logits = logits.view(batch_size, seq_length, -1)
        
        return logits


In [82]:
tokenizer.vocab_size

500

In [83]:
model = BidirectionalLSTMModel(tokenizer.vocab_size)

In [84]:
def train_model(model, dataloader, epochs, device):
    # Move model to device
    model = model.to(device)
    
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()  # Ignore padded positions (-100)
    optimizer = Adam(model.parameters(), lr=1e-4)
    
    # Training loop
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
        
        for batch in progress_bar:
            # Get batch data and move to device
            input_ids = batch['input_ids'].to(device)
            labels = batch['labels'].to(device)
            
            # Zero gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(input_ids)
            
            # Reshape for loss calculation
            _, _, vocab_size = outputs.shape
            outputs = outputs.view(-1, vocab_size)
            labels = labels.view(-1)
            
            # Calculate loss
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            # Update progress bar
            total_loss += loss.item()
            progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})
        
        avg_loss = total_loss / len(dataloader)
        print(f'Epoch {epoch+1} average loss: {avg_loss:.4f}')
    
    return model

In [87]:
trained_model = train_model(model, dataloader, epochs, device)

Epoch 1/20:   0%|          | 0/1733 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
sample_input = torch.randint(0, tokenizer.vocab_size, (1, 100))  # Batch size 1, sequence length 512
sample_input = sample_input.to(device)

# Script the model using torch.jit.trace
scripted_model = torch.jit.trace(trained_model, sample_input)

# Save the scripted model
scripted_model.save("lstm_model_jit.pt")

Tensor-likes are not close!

Mismatched elements: 209397 / 209408 (100.0%)
Greatest absolute difference: 3.3013447523117065 at index (0, 485, 120) (up to 1e-05 allowed)
Greatest relative difference: 6183.479794332025 at index (0, 314, 307) (up to 1e-05 allowed)
  _check_trace(


In [106]:
def generate_music_lstm(
    model,
    tokenizer,
    prompt=None,
    max_length=100,
    temperature=0.7,
    device='cuda' if torch.cuda.is_available() else 'cpu'
):
    # Set model to evaluation mode
    model.eval()
    
    # Start with BOS token if no prompt is provided
    if prompt is None:
        current_sequence = torch.tensor([[tokenizer["BOS_None"]]]).to(device)
    else:
        tokens = tokenizer(prompt)
        current_sequence = torch.tensor([tokens]).to(device)
    
    generated_sequence = current_sequence.clone()
    
    # Generate tokens one by one
    with torch.no_grad():
        for _ in range(max_length):
            # Get model predictions
            outputs = model(current_sequence)
            
            # Get the predictions for the last token
            next_token_logits = outputs[:, -1, :]
            
            # Apply temperature
            next_token_logits = next_token_logits / temperature
            
            # Apply softmax to get probabilities
            probs = torch.softmax(next_token_logits, dim=-1)
            
            # Sample from the distribution
            next_token = torch.multinomial(probs, num_samples=1)
            
            # Append the new token to the sequence
            generated_sequence = torch.cat([generated_sequence, next_token], dim=1)
            current_sequence = generated_sequence
            
            # Stop if we generate an EOS token
            if next_token.item() == tokenizer["EOS_None"]:
                break
    
    # Convert the generated sequence to MIDI
    generated_tokens = generated_sequence.cpu().numpy().tolist()[0]

    print(generated_tokens)
    midi_obj = tokenizer.decode(generated_tokens)
    print(midi_obj)
    
    return midi_obj, generated_tokens

# Use the function to generate music
model.to(device)  # Make sure model is on the right device
generated_midi, tokens = generate_music_lstm(
    model,
    tokenizer,
    prompt=None,
    temperature=0.7  # Adjust this value to control randomness (lower = more conservative)
)

# Save the generated MIDI file
generated_midi.dump_midi("lstm_generated_classical.mid")

[1, 95, 183, 343, 201, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280]
Score(ttype=Tick, tpq=8, begin=0, end=0, tracks=0, notes=0, time_sig=1, key_sig=0, markers=0)


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.train()

for epoch in range(epochs):
    total_loss = 0
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
    
    for batch in progress_bar:
        # Get inputs
        inputs = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        # Forward pass
        outputs = model(
            input_ids=inputs,
            attention_mask=attention_mask,
            labels=labels
        )
        
        loss = outputs.loss
        total_loss += loss.item()
        
        # Update progress bar
        progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})
        
        # Backward pass
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
    
    avg_loss = total_loss / len(dataloader)
    print(f'Epoch {epoch+1} average loss: {avg_loss:.4f}')

# Save the model
model.save_pretrained('midi_gpt2_model')
tokenizer.save_pretrained('midi_gpt2_model')

Epoch 1/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 1 average loss: 5.9409


Epoch 2/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 2 average loss: 5.2907


Epoch 3/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 3 average loss: 4.7903


Epoch 4/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 4 average loss: 4.3164


Epoch 5/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 5 average loss: 3.7934


Epoch 6/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 6 average loss: 3.3533


Epoch 7/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 7 average loss: 2.9972


Epoch 8/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 8 average loss: 2.7265


Epoch 9/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 9 average loss: 2.5251


Epoch 10/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 10 average loss: 2.3740


Epoch 11/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 11 average loss: 2.2586


Epoch 12/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 12 average loss: 2.1681


Epoch 13/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 13 average loss: 2.0953


Epoch 14/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 14 average loss: 2.0353


Epoch 15/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 15 average loss: 1.9846


Epoch 16/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 16 average loss: 1.9405


Epoch 17/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 17 average loss: 1.9085


Epoch 18/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 18 average loss: 1.8862


Epoch 19/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 19 average loss: 1.8710


Epoch 20/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 20 average loss: 1.8621


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.train()

for epoch in range(epochs):
    total_loss = 0
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
    
    for batch in progress_bar:
        # Get inputs
        inputs = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        # Forward pass
        outputs = model(
            input_ids=inputs,
            attention_mask=attention_mask,
            labels=labels
        )
        
        loss = outputs.loss
        total_loss += loss.item()
        
        # Update progress bar
        progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})
        
        # Backward pass
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
    
    avg_loss = total_loss / len(dataloader)
    print(f'Epoch {epoch+1} average loss: {avg_loss:.4f}')

# Save the model
model.save_pretrained('midi_gpt2_model')
tokenizer.save_pretrained('midi_gpt2_model')

Epoch 1/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 1 average loss: 5.9409


Epoch 2/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 2 average loss: 5.2907


Epoch 3/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 3 average loss: 4.7903


Epoch 4/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 4 average loss: 4.3164


Epoch 5/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 5 average loss: 3.7934


Epoch 6/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 6 average loss: 3.3533


Epoch 7/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 7 average loss: 2.9972


Epoch 8/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 8 average loss: 2.7265


Epoch 9/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 9 average loss: 2.5251


Epoch 10/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 10 average loss: 2.3740


Epoch 11/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 11 average loss: 2.2586


Epoch 12/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 12 average loss: 2.1681


Epoch 13/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 13 average loss: 2.0953


Epoch 14/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 14 average loss: 2.0353


Epoch 15/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 15 average loss: 1.9846


Epoch 16/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 16 average loss: 1.9405


Epoch 17/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 17 average loss: 1.9085


Epoch 18/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 18 average loss: 1.8862


Epoch 19/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 19 average loss: 1.8710


Epoch 20/20:   0%|          | 0/69 [00:00<?, ?it/s]

Epoch 20 average loss: 1.8621


In [61]:
generated_midi

Score(ttype=Tick, tpq=8, begin=0, end=0, tracks=0, notes=0, time_sig=1, key_sig=0, markers=0)

In [None]:
# Generate multiple pieces with different temperatures
temperatures = [0.5, 0.7, 0.9, 1.0]
for i, temp in enumerate(temperatures):
    generated_midi = generate_music_lstm(
        model,
        tokenizer,
        temperature=temp
    )
    generated_midi.dump_midi(f"lstm_generated_classical_temp_{temp}.mid")