In [1]:
import os
import random
import time as TimeLib

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch_geometric.data import Data, DataLoader
from torch_geometric.loader import DataLoader
from torch_geometric.nn import MessagePassing, global_mean_pool
from torch_geometric.utils import add_self_loops, degree, remove_self_loops, to_dense_adj, dense_to_sparse
from torch.nn import Linear, ReLU, BatchNorm1d, Module, Sequential
from torch_scatter import scatter
import torch_geometric.transforms as T
from torch_geometric.data import Data
from torch_geometric.data import Batch
from torch.optim import Adam
from torch.nn import MSELoss

import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

from scipy.stats import ortho_group
from scipy.io.wavfile import write

import fluidsynth
import mido
import pretty_midi
import networkx as nx
from mido import MidiFile, MidiTrack, Message
NORMAL_GRAPH = 89
MINI_GRAPH = 21
SEQ_FEATURE_LENGTH = 20

In [2]:
# Get the number of available GPUs
num_gpus = torch.cuda.device_count()

# Print the number of available GPUs
print("Number of available GPUs: ", num_gpus)

# Print the name of each GPU
for i in range(num_gpus):
    print("GPU ", i, " name: ", torch.cuda.get_device_name(i))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Number of available GPUs:  1
GPU  0  name:  NVIDIA GeForce RTX 3070 Ti Laptop GPU


In [3]:
# Load the MIDI file
file = mido.MidiFile('example.midi')
msg_cnt = 0
data_tuples = []
for i, track in enumerate(file.tracks):
    print('Track {}: {}'.format(i, track.name))
    other_time = 0
    for msg in track:
        other_time += msg.time
        if msg.type == 'note_on':
            note_tuple = (msg.note - 20, msg.velocity, other_time)
            other_time = 0
            data_tuples.append(note_tuple)
            msg_cnt += 1
    print(msg_cnt)

velocity_values = [t[1] for t in data_tuples]
timestep_values = [t[2] for t in data_tuples]

velocity_mean = 0 #np.mean(velocity_values)
velocity_std = np.std(velocity_values)
timestep_mean = 0 #np.mean(timestep_values)
timestep_std = np.std(timestep_values)
print(data_tuples[:30])
initial_data_tuples = data_tuples.copy()
data_tuples = [(t[0], (t[1] - 0) / velocity_std, (t[2] - 0) / timestep_std) for t in data_tuples]
print(data_tuples[:30])

Track 0: 
0
Track 1: 
15788
[(51, 60, 1049), (51, 0, 93), (35, 44, 86), (51, 54, 9), (39, 55, 168), (35, 0, 32), (39, 0, 129), (42, 52, 2), (42, 0, 115), (52, 76, 32), (51, 0, 7), (47, 56, 9), (52, 0, 24), (37, 61, 149), (54, 68, 0), (47, 0, 16), (52, 77, 36), (54, 0, 58), (54, 51, 0), (52, 0, 8), (52, 60, 41), (47, 57, 22), (54, 0, 10), (47, 0, 94), (46, 58, 46), (46, 0, 174), (52, 0, 10), (37, 0, 3), (51, 68, 0), (44, 35, 34)]
[(51, 1.6886551024520846, 10.53108590593801), (51, 0.0, 0.9336425064368302), (35, 1.2383470751315289, 0.863368339285671), (51, 1.5197895922068763, 0.09035250062291905), (39, 1.547933843914411, 1.6865800116278225), (35, 0.0, 0.32125333554815666), (39, 0.0, 1.2950525089285065), (42, 1.4635010887918067, 0.02007833347175979), (42, 0.0, 1.154504174626188), (52, 2.1389631297726406, 0.32125333554815666), (51, 0.0, 0.07027416715115926), (47, 1.5760780956219458, 0.09035250062291905), (52, 0.0, 0.24094000166111748), (37, 1.7167993541596194, 1.4958358436461043), (54, 1.91

In [4]:
def create_piano_graph(num_nodes):
    edges = [(i, i + 1) for i in range(1, num_nodes - 1)] + [(i + 1, i) for i in range(1, num_nodes - 1)]
    edges += [(0, i) for i in range(1, num_nodes)] + [(i, 0) for i in range(1, num_nodes)]
    edge_index = torch.tensor(edges, dtype=torch.long, device='cuda:0').t().contiguous()

    edge_features = [1] * (len(edges) - 2 * (num_nodes - 1)) + [0] * (2 * (num_nodes - 1))
    edge_attr = torch.tensor(edge_features, dtype=torch.float, device='cuda:0').view(-1, 1)
    print(edge_attr.shape)
    return edge_index, edge_attr

In [5]:

edge_index, edge_attr = create_piano_graph(NORMAL_GRAPH)
edge_index_mini, edge_attr_mini = create_piano_graph(MINI_GRAPH)
x = torch.zeros((len(data_tuples), NORMAL_GRAPH, SEQ_FEATURE_LENGTH), dtype=torch.float)

torch.Size([350, 1])
torch.Size([78, 1])


In [6]:
for i, (note, velocity, timestep) in enumerate(data_tuples):
    if i > 0:
        x[i, :, :-1] = x[i-1, :, 1:]  # Shift all rows one to the front
        x[i, :, -1] = x[i-1, :, -1]
    # Normalize velocity and timestep
    x[i, note, -1] = velocity
    x[i, 0, -1] = timestep

In [7]:
graph_data_list = []

for i in range(len(x)):
    data = Data(x=x[i], edge_index=edge_index, edge_attr=edge_attr,  y=x[i].view(1, 89, 20))
    graph_data_list.append(data)
train_dataset = graph_data_list[:1000]
val_dataset = graph_data_list[1000:1250]
test_dataset = graph_data_list[1250:1500]

In [8]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [9]:
class MPNNLayer(MessagePassing):
    def __init__(self, emb_dim, edge_dim, aggr='add'):
        # Set the aggregation function
        super().__init__(aggr=aggr)

        self.emb_dim = emb_dim
        self.edge_dim = edge_dim

        self.mlp_msg = Sequential(
            Linear(2*emb_dim + edge_dim, emb_dim), BatchNorm1d(emb_dim), ReLU(),
            Linear(emb_dim, emb_dim), BatchNorm1d(emb_dim), ReLU()
          )

        self.mlp_upd = Sequential(
            Linear(2*emb_dim, emb_dim), BatchNorm1d(emb_dim), ReLU(),
            Linear(emb_dim, emb_dim), BatchNorm1d(emb_dim), ReLU()
          )

    def forward(self, h, edge_index, edge_attr):
        out = self.propagate(edge_index, h=h, edge_attr=edge_attr)
        return out

    def message(self, h_i, h_j, edge_attr):
        msg = torch.cat([h_i, h_j, edge_attr], dim=-1)
        return self.mlp_msg(msg)

    def aggregate(self, inputs, index):
        return scatter(inputs, index, dim=self.node_dim, reduce=self.aggr)

    def update(self, aggr_out, h):
        upd_out = torch.cat([h, aggr_out], dim=-1)
        return self.mlp_upd(upd_out)

    def __repr__(self) -> str:
        return (f'{self.__class__.__name__}(emb_dim={self.emb_dim}, aggr={self.aggr})')

In [24]:
class MPNNModelEnDecoder(Module):
    def __init__(self, num_layers, emb_dim, in_feature_dim, edge_dim, out_node_count, out_feature_dim):
        super().__init__()

        self.out_feature_dim = out_feature_dim
        self.out_node_count = out_node_count
        # Linear projection for initial node features
        # dim: d_n -> d
        self.lin_in = Linear(in_feature_dim, emb_dim)

        # Stack of MPNN layers
        self.convs = torch.nn.ModuleList()
        for layer in range(num_layers):
            self.convs.append(MPNNLayer(emb_dim, edge_dim, aggr='add'))

        # Global pooling/readout function `R` (mean pooling)
        # PyG handles the underlying logic via `global_mean_pool()`
        self.pool = global_mean_pool

        # Linear prediction head
        # dim: d -> num_nodes * out_dim
        self.lin_pred = Linear(emb_dim, out_node_count * out_feature_dim)
        # n nodes in the graph
        self.relu = nn.ReLU()

    def forward(self, data):
        print(" THE NUMBER OF TIMES WE ARE HEREEEEEEEEEEE")
        h = self.lin_in(data.x)  # (n, d_n) -> (n, d)
        print(data.edge_index.shape)
        for conv in self.convs:
            h = h + conv(h, data.edge_index, data.edge_attr)  # (n, d) -> (n, d)
            # Note that we add a residual connection after each MPNN layer

        print("we are here in pool:")
        h_graph = self.pool(h, data.batch)  # (n, d) -> (batch_size, d)
        print(h_graph.shape)

        out_feature_matrix = self.lin_pred(h_graph)  # (batch_size, d) -> (batch_size, num_nodes * out_dim)
        print("o:", out_feature_matrix.shape)
        # Reshape the output to the desired matrix shape (batch_size, num_nodes, out_dim)
        out_matrix = out_feature_matrix.view(-1, self.out_node_count, self.out_feature_dim)
        out_matrix[:, 0, :] = self.relu(out_matrix[:, 0, :])

        out_matrix[:, 1:, :] = out_matrix[:, 1:, :].clamp(0, 1)
        return out_matrix

In [25]:
# class Decoder(Module):
#     def __init__(self, in_feature_dim=20, out_dim=20, in_nodes=10, out_nodes=89):
#         super().__init__()
#
#         self.in_feature_dim = in_feature_dim
#         self.out_dim = out_dim
#         self.in_nodes = in_nodes
#         self.out_nodes = out_nodes
#
#         self.fc1 = Linear(in_feature_dim * in_nodes, 256)
#         self.fc2 = Linear(256, 512)
#         self.fc3 = Linear(512, out_dim * out_nodes)
#         self.relu = ReLU()
#
#     def forward(self, x):
#         x = x.view(-1, self.in_nodes * self.in_feature_dim)  # Flatten input matrix
#         x = self.relu(self.fc1(x))
#         x = self.relu(self.fc2(x))
#         x = self.fc3(x)
#
#         # Reshape the output to the desired matrix shape (batch_size, out_nodes, out_dim)
#         out_matrix = x.view(-1, self.out_nodes, self.out_dim)
#
#         return out_matrix


In [38]:
class GraphUNet(Module):
    def __init__(self, num_layers=4, emb_dim=64, in_feature_dim=SEQ_FEATURE_LENGTH, edge_dim=1, out_node_count = MINI_GRAPH, out_feature_dim=SEQ_FEATURE_LENGTH, in_node_count=NORMAL_GRAPH):
        super().__init__()
        self.encoder = MPNNModelEnDecoder(num_layers, emb_dim, in_feature_dim , edge_dim, out_node_count, out_feature_dim)
        self.decoder = MPNNModelEnDecoder(num_layers, emb_dim, out_feature_dim, edge_dim, in_node_count , in_feature_dim)

    def forward(self, data):
        h_encoded = self.encoder(data)
        print("wtf?",data.batch)

        data_decoded = data.clone()
        print(":::::::", data_decoded.edge_attr.shape)
        data_decoded.x = h_encoded.view(-1, self.encoder.out_feature_dim)
        data_decoded.batch = torch.tensor(([0] * MINI_GRAPH), dtype=torch.int64, device='cuda:0')
        data_decoded.edge_index = edge_index_mini
        data_decoded.edge_attr = edge_attr_mini

        h_decoded = self.decoder(data_decoded)

        return h_decoded


In [39]:
# 3. Initialize the model, optimizer, and loss function
model = GraphUNet().to(device)
optimizer = Adam(model.parameters(), lr=0.001)

def train(model, train_loader, optimizer, device):
    model.train()
    loss_all = 0

    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        y_pred = model(data)
        loss = F.mse_loss(y_pred, data.y)
        loss.backward()
        loss_all += loss.item() * data.num_graphs
        optimizer.step()
    return loss_all / len(train_loader.dataset)

def eval(model, loader, device):
    model.eval()
    error = 0

    for data in loader:
        data = data.to(device)
        with torch.no_grad():
            y_pred = model(data)
            # Mean Absolute Error using std (computed when preparing data)
            error += (y_pred * std - data.y * std).abs().sum().item()
    return error / len(loader.dataset)

In [40]:
def run_experiment(model, model_name, train_loader, val_loader, test_loader, n_epochs=100):

    print(f"Running experiment for {model_name}, training on {len(train_loader.dataset)} samples for {n_epochs} epochs.")

    print("\nModel architecture:")
    print(model)
    total_param = 0
    for param in model.parameters():
        total_param += np.prod(list(param.data.size()))
    print(f'Total parameters: {total_param}')
    model = model.to(device)

    # Adam optimizer with LR 1e-3
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # LR scheduler which decays LR when validation metric doesn't improve
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.9, patience=5, min_lr=0.00001)

    print("\nStart training:")
    best_val_error = None
    perf_per_epoch = [] # Track Test/Val MAE vs. epoch (for plotting)
    t = TimeLib.time()
    for epoch in range(1, n_epochs+1):
        # Call LR scheduler at start of each epoch
        lr = scheduler.optimizer.param_groups[0]['lr']

        # Train model for one epoch, return avg. training loss
        loss = train(model, train_loader, optimizer, device)

        # Evaluate model on validation set
        val_error = eval(model, val_loader, device)

        if best_val_error is None or val_error <= best_val_error:
            # Evaluate model on test set if validation metric improves
            test_error = eval(model, test_loader, device)
            best_val_error = val_error

        if epoch % 10 == 0:
            # Print and track stats every 10 epochs
            print(f'Epoch: {epoch:03d}, LR: {lr:5f}, Loss: {loss:.7f}, '
                  f'Val MAE: {val_error:.7f}, Test MAE: {test_error:.7f}')

        scheduler.step(val_error)
        perf_per_epoch.append((test_error, val_error, epoch, model_name))

    t = TimeLib.time() - t
    train_time = t/60
    print(f"\nDone! Training took {train_time:.2f} mins. Best validation MAE: {best_val_error:.7f}, corresponding test MAE: {test_error:.7f}.")

    return best_val_error, test_error, train_time, perf_per_epoch

In [41]:
model = GraphUNet(num_layers=4, emb_dim=64, in_feature_dim = SEQ_FEATURE_LENGTH, edge_dim=1, out_node_count = MINI_GRAPH, out_feature_dim=SEQ_FEATURE_LENGTH, in_node_count=89)
model_name = type(model).__name__
best_val_error, test_error, train_time, perf_per_epoch = run_experiment(
    model,
    model_name,
    train_loader,
    val_loader,
    test_loader,
    n_epochs=100
)
RESULTS[model_name] = (best_val_error, test_error, train_time)
df_temp = pd.DataFrame(perf_per_epoch, columns=["Test MAE", "Val MAE", "Epoch", "Model"])
DF_RESULTS = DF_RESULTS.append(df_temp, ignore_index=True)

Running experiment for GraphUNet, training on 1000 samples for 100 epochs.

Model architecture:
GraphUNet(
  (encoder): MPNNModelEnDecoder(
    (lin_in): Linear(in_features=20, out_features=64, bias=True)
    (convs): ModuleList(
      (0-3): 4 x MPNNLayer(emb_dim=64, aggr=add)
    )
    (lin_pred): Linear(in_features=64, out_features=420, bias=True)
    (relu): ReLU()
  )
  (decoder): MPNNModelEnDecoder(
    (lin_in): Linear(in_features=20, out_features=64, bias=True)
    (convs): ModuleList(
      (0-3): 4 x MPNNLayer(emb_dim=64, aggr=add)
    )
    (lin_pred): Linear(in_features=64, out_features=1780, bias=True)
    (relu): ReLU()
  )
)
Total parameters: 348952

Start training:
 THE NUMBER OF TIMES WE ARE HEREEEEEEEEEEE
torch.Size([2, 350])
we are here in pool:
torch.Size([1, 64])
o: torch.Size([1, 420])
wtf? tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0,

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1, 88, 20]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

In [None]:
graph_output_list = []

# Process each data element and store the output
for data in graph_data_list:
    data = data.to(device)
    with torch.no_grad():
        output = model(data)
    graph_output_list.append(output.cpu())

In [None]:
def extract_tuples(x):
    extracted_data_tuples = []
    for i in range(0, len(x)):
        if i > 0:
            prev_x = x[i - 1].squeeze()
        else:
            prev_x = torch.zeros_like(x[i]).squeeze()
        curr_x = x[i].squeeze()
        diff_mask = torch.abs(prev_x[:, -1] - curr_x[:, -1])
        mask = torch.zeros_like(diff_mask)
        mask[1:] = 1  # exclude 0th index
        masked_diff = diff_mask * mask  # apply the mask
        changed_note = torch.argmax(masked_diff).item()
        # Get the velocity and timestep
        velocity = curr_x[changed_note, -1].item()
        timestep = curr_x[0, -1].item()

        extracted_data_tuples.append((changed_note, velocity, timestep))

    return extracted_data_tuples
all_data_tuples = []
all_data_tuples = extract_tuples(graph_output_list)

In [None]:
print(data_tuples[0:5])
print(all_data_tuples[0:5])

In [None]:
def tuples_to_midi(data_tuples, filename, velocity_std, timestep_std):
    # Initialize a new MIDI file and track
    midi_file = MidiFile()
    track = MidiTrack()
    midi_file.tracks.append(track)

    # Process tuples and add MIDI messages to the track
    COUNTER = 0
    for note, velocity, timestep in data_tuples:
        # De-normalize velocity and timestep
        velocity = round(velocity * velocity_std)
        timestep = round(timestep * timestep_std)

        ini_tuple = initial_data_tuples[COUNTER]

        if note != ini_tuple[0] or velocity != ini_tuple[1] or timestep != ini_tuple[2]:
            print(initial_data_tuples[COUNTER])
            print(note, velocity, timestep)
            print()
        COUNTER+=1
        # Adjust note range
        note = note + 20

        # Clamp values to the valid MIDI range
        note = max(21, min(108, note))
        velocity = max(0, min(127, velocity))
        #velocity = 64

        # Add MIDI messages to the track
        track.append(Message('note_on', note=note, velocity=velocity, time=timestep))

    # Save the MIDI file
    midi_file.save(filename)
tuples_to_midi(data_tuples, 'output.mid', velocity_std, timestep_std)