In [6]:
# Imports
import torch
from torch import nn
from src.utils.preprocess_utils import midi_to_multiclass_vectors, multiclass_vectors_to_midi
import numpy as np
import torch.nn.functional as F
from mido import MidiFile, MidiTrack, Message
from datetime import datetime

In [7]:
# Define model architecture
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, embedding_size, num_heads, hidden_size, num_layers, dropout=0.2):
        super(TransformerModel, self).__init__()

        self.linear_in = nn.Linear(vocab_size, embedding_size)

        self.transformer = nn.Transformer(
            d_model=embedding_size,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dim_feedforward=hidden_size,
            dropout=dropout,
            batch_first=True
        )

        self.linear_out = nn.Linear(embedding_size, vocab_size)
        
    def forward(self, src, tgt):
        # src: [batch_size, sequence_length-1, vocab_size]
        # tgt: [batch_size, sequence_length-1, vocab_size]

        src_embedded = self.linear_in(src)  # [batch_size, seq_len, d_model]
        tgt_embedded = self.linear_in(tgt)  # [batch_size, seq_len, d_model]

        transformer_output = self.transformer(src_embedded, tgt_embedded)  # [batch_size, sequence_length-1, vocab_size]

        output = self.linear_out(transformer_output) # [batch_size, seq_len, vocab_size]

        return output

In [8]:
# Load model
model_name = "2025-01-13_17-52-44_e64_nh4_h128_l3_sl50_tr400"
model = torch.load(f'weights/model_5/{model_name}')
model.eval()

  model = torch.load(f'weights/model_5/{model_name}')


TransformerModel(
  (linear_in): Linear(in_features=85, out_features=64, bias=True)
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-2): 3 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
          )
          (linear1): Linear(in_features=64, out_features=128, bias=True)
          (dropout): Dropout(p=0.2, inplace=False)
          (linear2): Linear(in_features=128, out_features=64, bias=True)
          (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.2, inplace=False)
          (dropout2): Dropout(p=0.2, inplace=False)
        )
      )
      (norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    )
    (decoder): TransformerDecoder(
      (layers): ModuleList(
        (0-2): 3 x Transf

In [29]:
# Load input
input = midi_to_multiclass_vectors("data/input4.mid", 100)
sequence_length = 200  # Desired length of the generated sequence
noise_factor = 0.2
threshold = 0.45
print(len(input))

15


In [None]:
def generate_sequence(model, start_sequence, sequence_length, threshold=0.5, noise_factor=0.05):
    model.eval()

    start_tensor = torch.tensor(start_sequence, dtype=torch.float).unsqueeze(0).to(torch.device("cuda"))  # [1, sequence_length, input_size]

    generated_sequence = list(start_sequence)

    src = start_tensor
    tgt = start_tensor

    for i in range(sequence_length):
        with torch.no_grad():
            output = model(src, tgt)  # [1, i, output_size]
            logits = output.squeeze(0)  # [i, output_size]

            # Apply sigmoid individually to get probabilities for the next active notes
            output_probs = torch.sigmoid(logits)

            noise = torch.randn_like(output_probs) * noise_factor  # noise_factor controls the magnitude of randomness
            perturbed_probs = output_probs + noise
            perturbed_probs = torch.clamp(perturbed_probs, 0, 1)

            # Notes will be on if their probability is above specified threshold
            output_vector = (perturbed_probs[-1] > threshold).float()

            generated_sequence.append(output_vector)
            tgt = torch.cat((tgt, output_vector.unsqueeze(0).unsqueeze(0)), dim=1)
            src = tgt
    
    return generated_sequence

generated_sequence = generate_sequence(model.to(torch.device("cuda")), input, sequence_length, threshold=threshold, noise_factor=noise_factor)

print("Generated Sequence:")
print(generated_sequence)

Generated Sequence:
[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [31]:
current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
file_path = f"generations/model_5/{current_datetime}_nf{noise_factor}_w{sequence_length}_th{threshold}_M_{model_name}.mid"

multiclass_vectors_to_midi(generated_sequence, file_path, 400)

MIDI file saved to generations/model_5/2025-01-13_18-07-11_nf0.2_w200_th0.45_M_2025-01-13_17-52-44_e64_nh4_h128_l3_sl50_tr400.mid
