In [1]:
# Imports
import torch
from torch import nn
from src.utils.preprocess_utils import midi_to_multiclass_vectors, multiclass_vectors_to_midi
import numpy as np
import torch.nn.functional as F
from mido import MidiFile, MidiTrack, Message
from datetime import datetime

In [2]:
# Define model architecture
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, embedding_size, num_heads, hidden_size, num_layers, dropout=0.2):
        super(TransformerModel, self).__init__()
        self.input_projection = nn.Linear(vocab_size, embedding_size)
        self.transformer = nn.Transformer(
            d_model=embedding_size,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            dim_feedforward=hidden_size,
            dropout=dropout,
            batch_first=True
        )
        self.output_projection = nn.Linear(embedding_size, vocab_size)

    def forward(self, x):
        # x: [batch_size, sequence_length, vocab_size]
        embedded = self.input_projection(x)  # [batch_size, sequence_length, embedding_size]
        transformer_out = self.transformer.encoder(embedded)  # [batch_size, sequence_length, embedding_size]
        output = self.output_projection(transformer_out)  # [batch_size, sequence_length, vocab_size]
        return output

In [3]:
# Load model
model_name = "2025-01-08_11-05-32_e32_nh4_h128_l3_sl50_tr400"
model = torch.load(f'weights/model_4/{model_name}')
model.eval()

  model = torch.load(f'weights/model_4/{model_name}')


TransformerModel(
  (input_projection): Linear(in_features=85, out_features=32, bias=True)
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-2): 3 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)
          )
          (linear1): Linear(in_features=32, out_features=128, bias=True)
          (dropout): Dropout(p=0.2, inplace=False)
          (linear2): Linear(in_features=128, out_features=32, bias=True)
          (norm1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.2, inplace=False)
          (dropout2): Dropout(p=0.2, inplace=False)
        )
      )
      (norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
    )
    (decoder): TransformerDecoder(
      (layers): ModuleList(
        (0-5): 6 x

In [32]:
# Load input
input = midi_to_multiclass_vectors("data/input4.mid", 100)
sequence_length = 100  # Desired length of the generated sequence
noise_factor = 0.2
threshold = 0.45
print(len(input))
print(input)

15
[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0

In [33]:
def generate_sequence(model, start_sequence, sequence_length, threshold=0.5, noise_factor=0.05):
    model.eval()

    start_tensor = torch.tensor(start_sequence, dtype=torch.float).unsqueeze(0).to(torch.device("cuda"))  # [1, sequence_length, input_size]

    generated_sequence = list(start_sequence)

    for i in range(sequence_length):
        with torch.no_grad():
            output = model(start_tensor)  # [1, i, output_size]
            logits = output.squeeze(0)  # Remove the batch dimension: [i, output_size]

            # Apply sigmoid individually to get probabilities for the next active notes
            output_probs = torch.sigmoid(logits)

            noise = torch.randn_like(output_probs) * noise_factor  # noise_factor controls the magnitude of randomness
            perturbed_probs = output_probs + noise
            perturbed_probs = torch.clamp(perturbed_probs, 0, 1)

            # Notes will be on if their probability is above specified threshold
            output_vector = (perturbed_probs[-1] > threshold).float()

            generated_sequence.append(output_vector)
            # generated_sequence_tensor = np.array(generated_sequence)
            start_tensor = torch.tensor(generated_sequence, dtype=torch.float32).unsqueeze(0).to(torch.device("cuda"))  # Add batch dimension
    
    return generated_sequence

generated_sequence = generate_sequence(model.to(torch.device("cuda")), input, sequence_length, threshold=threshold, noise_factor=noise_factor)

print("Generated Sequence:")
#print(generated_sequence)

Generated Sequence:


In [34]:
current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
file_path = f"generations/model_4/{current_datetime}_nf{noise_factor}_w{sequence_length}_th{threshold}_M_{model_name}.mid"

multiclass_vectors_to_midi(generated_sequence, file_path, 400)

MIDI file saved to generations/model_4/2025-01-08_12-43-59_nf0.2_w100_th0.45_M_2025-01-08_11-05-32_e32_nh4_h128_l3_sl50_tr400.mid
