In [1]:
import os
import numpy as np
import pretty_midi
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


In [2]:

# -----------------------------
# Data Preprocessing Functions
# -----------------------------

def midi_to_piano_roll(midi_file, fps=100):
    """
    Convert a MIDI file to a piano roll representation.
    Args:
        midi_file (str): Path to the MIDI file.
        fps (int): Frames per second for the piano roll.
    Returns:
        numpy.ndarray: Piano roll representation (time_steps, pitches).
    """
    midi_data = pretty_midi.PrettyMIDI(midi_file)
    piano_roll = midi_data.get_piano_roll(fs=fps).T  # Shape: (time_steps, pitches)
    piano_roll = (piano_roll > 0).astype(np.float32)  # Binary activation (note on/off)
    return piano_roll

def pad_or_crop(piano_roll, max_time_steps):
    """
    Ensure the piano roll has a fixed number of time steps by padding or cropping.
    """
    if piano_roll.shape[0] > max_time_steps:
        return piano_roll[:max_time_steps, :]
    else:
        padding = max_time_steps - piano_roll.shape[0]
        return np.pad(piano_roll, ((0, padding), (0, 0)), mode='constant')


In [3]:

# -----------------------------
# Custom Dataset for MIDI Data
# -----------------------------

class MIDIDataset(Dataset):
    def __init__(self, midi_dir, max_time_steps=128, fps=100):
        self.midi_files = [os.path.join(midi_dir, f) for f in os.listdir(midi_dir) if f.endswith('.mid')]
        self.max_time_steps = max_time_steps
        self.fps = fps

    def __len__(self):
        return len(self.midi_files)

    def __getitem__(self, idx):
        piano_roll = midi_to_piano_roll(self.midi_files[idx], fps=self.fps)
        piano_roll = pad_or_crop(piano_roll, self.max_time_steps)
        return torch.tensor(piano_roll, dtype=torch.float32)



In [4]:

# -----------------------------
# RNN Model
# -----------------------------

class RNNGenerator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2):
        super(RNNGenerator, self).__init__()

        self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        rnn_out, _ = self.rnn(x)
        output = self.fc(rnn_out)
        return output


In [5]:

# -----------------------------
# Training Loop
# -----------------------------

def train_rnn(model, dataloader, num_epochs=10, lr=0.001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0

        for piano_roll in dataloader:
            piano_roll = piano_roll.to(device)
            optimizer.zero_grad()

            # Forward pass
            output = model(piano_roll)
            loss = criterion(output, piano_roll)

            # Backward pass
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss / len(dataloader):.4f}")


In [6]:

# -----------------------------
# Ringtone Generation
# -----------------------------

def generate_ringtone(model, max_time_steps, device):
    """
    Generate a new ringtone from random input.
    """
    model.eval()
    random_input = torch.randn(1, max_time_steps, 128).to(device)  # Random input tensor

    with torch.no_grad():
        output = model(random_input)

    output_piano_roll = output.squeeze(0).cpu().numpy()
    return (output_piano_roll > 0.5).astype(np.float32)  # Binary thresholding

def piano_roll_to_midi(piano_roll, output_path, fps=100):
    """
    Convert a piano roll array back to a MIDI file.
    Args:
        piano_roll (numpy.ndarray): Piano roll array (time_steps, pitches).
        output_path (str): Path to save the MIDI file.
        fps (int): Frames per second (tempo).
    """
    midi_data = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=0)  # Default to Acoustic Grand Piano

    piano_roll = (piano_roll > 0.5).astype(int)  # Ensure binary
    for pitch in range(piano_roll.shape[1]):
        notes = np.where(piano_roll[:, pitch] == 1)[0]
        if len(notes) > 0:
            start_time = notes[0] / fps
            end_time = (notes[-1] + 1) / fps
            note = pretty_midi.Note(velocity=100, pitch=pitch, start=start_time, end=end_time)
            instrument.notes.append(note)

    midi_data.instruments.append(instrument)
    midi_data.write(output_path)
    print(f"Saved generated ringtone to {output_path}")


In [8]:

# -----------------------------
# Main Script
# -----------------------------

if __name__ == "__main__":
    # Configuration
    midi_dir = "../archive/"
    batch_size = 16
    max_time_steps = 300
    input_size = 128
    hidden_size = 256
    output_size = 128
    num_epochs = 25
    num_layers = 4

    # Dataset and DataLoader
    dataset = MIDIDataset(midi_dir, max_time_steps=max_time_steps)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # Model
    model = RNNGenerator(input_size, hidden_size, output_size, num_layers)

    # Train
    train_rnn(model, dataloader, num_epochs=num_epochs)

    # Save Model
    torch.save(model.state_dict(), "rnn_midi_generator.pth")

    # Generate a Ringtone
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    ringtone_piano_roll = generate_ringtone(model, max_time_steps, device)

    # Save as MIDI
    piano_roll_to_midi(ringtone_piano_roll, "generated_ringtone.mid")

Epoch 1/25, Loss: 0.0205
Epoch 2/25, Loss: 0.0177
Epoch 3/25, Loss: 0.0157
Epoch 4/25, Loss: 0.0139
Epoch 5/25, Loss: 0.0123
Epoch 6/25, Loss: 0.0109
Epoch 7/25, Loss: 0.0096
Epoch 8/25, Loss: 0.0083
Epoch 9/25, Loss: 0.0071
Epoch 10/25, Loss: 0.0060
Epoch 11/25, Loss: 0.0051
Epoch 12/25, Loss: 0.0044
Epoch 13/25, Loss: 0.0038
Epoch 14/25, Loss: 0.0033
Epoch 15/25, Loss: 0.0030
Epoch 16/25, Loss: 0.0027
Epoch 17/25, Loss: 0.0023
Epoch 18/25, Loss: 0.0021
Epoch 19/25, Loss: 0.0018
Epoch 20/25, Loss: 0.0017
Epoch 21/25, Loss: 0.0016
Epoch 22/25, Loss: 0.0014
Epoch 23/25, Loss: 0.0012
Epoch 24/25, Loss: 0.0012
Epoch 25/25, Loss: 0.0011
Saved generated ringtone to generated_ringtone.mid
