In [50]:
import numpy as np
from scipy.io.wavfile import write
import os
import partitura
import torch.nn as nn
import torch.optim as optim
import copy
import torch
import tqdm
from sklearn.model_selection import train_test_split
import pandas as pd
import math
import soundfile as sf
import re

def note_to_frequency(note_name):
    notes = ['A', 'A#', 'B', 'C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#']
    octave = int(note_name[-1])
    if note_name[:-1].upper()=="BB":
        note_index = 1
    else:
        note_index = notes.index(note_name[:-1].upper())
    
    midi_number = (octave + 1) * 12 + note_index - 9
    frequency = 440 * math.pow(2, (midi_number - 69) / 12)
    return frequency

def generate_sine_wave(frequency, duration, sample_rate=44100, amplitude=1.0):
    """Generates a sine wave at a given frequency for a specified duration."""
    t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
    y = amplitude * np.sin(2 * np.pi * frequency * t)
    return y

def create_wav_from_notes(note_names, durations, filename="output.wav", sample_rate=44100):
    audio_data = np.array([])
    for note_name, duration in zip(note_names, durations):
        frequency = note_to_frequency(note_name)
        sine_wave = generate_sine_wave(frequency, duration, sample_rate)
        audio_data = np.concatenate([audio_data, sine_wave])

    sf.write(filename, audio_data, sample_rate)

In [51]:
all_score_ids = []
all_score_voice = []
all_score_staff = []
all_score_type = []
all_score_tie_group = []
all_score_pitch = []


for file in os.listdir("./XML_Files"):
    score = partitura.load_score("./XML_Files/"+file)
    # Iterate through the notes in the first part
    for note in score.parts[0].notes:
        try:
            note_attributes = str(note).split(" ")

            if len(note_attributes)==8:
                note_id = note_attributes[2].split("=")[-1]
                voice = note_attributes[3].split("=")[-1]
                staff = note_attributes[4].split("=")[-1]
                note_type = note_attributes[5].split("=")[-1]
                tie_group = note_attributes[6].split("=")[-1]
                pitch = note_attributes[7].split("=")[-1]
            elif len(note_attributes)==7:
                note_id = note_attributes[2].split("=")[-1]
                voice = note_attributes[3].split("=")[-1]
                staff = note_attributes[4].split("=")[-1]
                note_type = note_attributes[5].split("=")[-1]
                tie_group = None
                pitch = note_attributes[6].split("=")[-1]
                
                all_score_ids.append(note_id)
                all_score_voice.append(voice)
                all_score_staff.append(staff)
                all_score_type.append(note_type)
                all_score_tie_group.append(tie_group)
                all_score_pitch.append(pitch)
        except:
            pass




In [52]:
dictionary_pitches = {}
dictionary_types = {}

list_numeric_pitches = []
list_numeric_note_durations = []

def convert_categorical_to_numeric(df):

  df_numeric = df

  for index, val in enumerate(df_numeric[3].unique()):
    dictionary_types[val] = float(index)
  
  for i in df_numeric[3]:
     list_numeric_note_durations.append(dictionary_types[i])

  for index, val in enumerate(df_numeric[5].unique()):
    dictionary_pitches[val] = float(index)
  
  for i in df_numeric[5]:
     list_numeric_pitches.append(dictionary_pitches[i])

  for col in df_numeric.drop([3, 5], axis=1).columns:
      if pd.api.types.is_categorical_dtype(df_numeric[col]) or df_numeric[col].dtype == 'object':
        df_numeric[col] = pd.Categorical(df_numeric[col]).codes

  df_numeric[3] = list_numeric_note_durations
  df_numeric[5] = list_numeric_pitches
  
  return df_numeric

def reverse_dict(my_dict):
    return {value: key for key, value in my_dict.items()}

### Predict the next notes

In [53]:

df = pd.DataFrame([all_score_ids, all_score_voice, all_score_staff, all_score_type, all_score_tie_group, all_score_pitch]).transpose()
df2 = convert_categorical_to_numeric(df)

X = df2
y = list_numeric_pitches

# Define the model
model = nn.Sequential(
    nn.Linear(6, 24),
    nn.ReLU(),
    nn.Linear(24, 12),
    nn.ReLU(),
    nn.Linear(12, 6),
    nn.ReLU(),
    nn.Linear(6, 1)
)

# loss function and optimizer
loss_fn = nn.MSELoss()  # mean square error
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# train-test split of the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True)

X_train = torch.tensor(np.array(X_train), dtype=torch.float)
y_train = torch.tensor(y_train, dtype=torch.float)
X_test = torch.tensor(np.array(X_test), dtype=torch.float)
y_test = torch.tensor(y_test, dtype=torch.float)

# training parameters
n_epochs = 100   # number of epochs to run
batch_size = 10  # size of each batch
batch_start = torch.arange(0, len(X_train), batch_size)

# Hold the best model
best_mse = np.inf   # init to infinity
best_weights = None
history = []

# training loop
for epoch in range(n_epochs):
    model.train()
    with tqdm.tqdm(batch_start, unit="batch", mininterval=0, disable=True) as bar:
        bar.set_description(f"Epoch {epoch}")
        for start in bar:
            # take a batch
            X_batch = X_train[start:start+batch_size]
            y_batch = y_train[start:start+batch_size]
            # forward pass
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            # backward pass
            optimizer.zero_grad()
            loss.backward()
            # update weights
            optimizer.step()
            # print progress
            bar.set_postfix(mse=float(loss))
    # evaluate accuracy at end of each epoch
    model.eval()
    y_pred = model(X_test)
    mse = loss_fn(y_pred, y_test)
    mse = float(mse)
    history.append(mse)
    if mse < best_mse:
        best_mse = mse
        best_weights = copy.deepcopy(model.state_dict())

# restore model and return best accuracy
model.load_state_dict(best_weights)

  if pd.api.types.is_categorical_dtype(df_numeric[col]) or df_numeric[col].dtype == 'object':
  if pd.api.types.is_categorical_dtype(df_numeric[col]) or df_numeric[col].dtype == 'object':
  if pd.api.types.is_categorical_dtype(df_numeric[col]) or df_numeric[col].dtype == 'object':
  if pd.api.types.is_categorical_dtype(df_numeric[col]) or df_numeric[col].dtype == 'object':
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


<All keys matched successfully>

### Predict the next note type (duration)

In [54]:
X = df2
y = list_numeric_note_durations

# Define the model
model2 = nn.Sequential(
    nn.Linear(6, 24),
    nn.ReLU(),
    nn.Linear(24, 12),
    nn.ReLU(),
    nn.Linear(12, 6),
    nn.ReLU(),
    nn.Linear(6, 1)
)

# loss function and optimizer
loss_fn = nn.MSELoss()  # mean square error
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# train-test split of the dataset
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, train_size=0.7, shuffle=True)

X_train2 = torch.tensor(np.array(X_train2), dtype=torch.float)
y_train2 = torch.tensor(y_train2, dtype=torch.float)
X_test2 = torch.tensor(np.array(X_test2), dtype=torch.float)
y_test2 = torch.tensor(y_test2, dtype=torch.float)

# training parameters
n_epochs = 100   # number of epochs to run
batch_size = 10  # size of each batch
batch_start = torch.arange(0, len(X_train2), batch_size)

# Hold the best model
best_mse = np.inf   # init to infinity
best_weights = None
history = []

# training loop
for epoch in range(n_epochs):
    model2.train()
    with tqdm.tqdm(batch_start, unit="batch", mininterval=0, disable=True) as bar:
        bar.set_description(f"Epoch {epoch}")
        for start in bar:
            # take a batch
            X_batch = X_train[start:start+batch_size]
            y_batch = y_train[start:start+batch_size]
            # forward pass
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            # backward pass
            optimizer.zero_grad()
            loss.backward()
            # update weights
            optimizer.step()
            # print progress
            bar.set_postfix(mse=float(loss))
    # evaluate accuracy at end of each epoch
    model2.eval()
    y_pred = model2(X_test2)
    mse = loss_fn(y_pred, y_test)
    mse = float(mse)
    history.append(mse)
    if mse < best_mse:
        best_mse = mse
        best_weights = copy.deepcopy(model.state_dict())

# restore model and return best accuracy
model2.load_state_dict(best_weights)

<All keys matched successfully>

In [55]:
predictions = model(X_test)
predictions_numpy = pd.Series(np.round(predictions.flatten().detach().numpy()))

pitches_index_dictionary = reverse_dict(dictionary_pitches)

predictions_pitch_categorical = []

for i in predictions_numpy:
    predictions_pitch_categorical.append(pitches_index_dictionary[i])

In [56]:
predictions2 = model2(X_test2)
predictions_numpy2 = pd.Series(np.round(predictions2.flatten().detach().numpy()))

duration_index_dictionary = reverse_dict(dictionary_types)

predictions_duration_categorical = []

for i in predictions_numpy2:
    try:
        predictions_duration_categorical.append(duration_index_dictionary[i])
    except:
        predictions_duration_categorical.append(duration_index_dictionary[3])

In [57]:
predictions_duration_transformed = []

for i in predictions_duration_categorical:
    if "16" in i:
        predictions_duration_transformed.append(1/16)
    elif "32" in i:
        predictions_duration_transformed.append(1/32)
    elif "128" in i:
        predictions_duration_transformed.append(1/128)
    elif "eigth" in i:
        predictions_duration_transformed.append(1/8)
    elif "half" in i:
        predictions_duration_transformed.append(1/2)
    elif "quarter" in i:
        predictions_duration_transformed.append(1/4)
    elif "whole" in i:
        predictions_duration_transformed.append(1)

In [58]:
predictions = model(X_test)
predictions_numpy = pd.Series(np.round(predictions.flatten().detach().numpy()))

pitches_index_dictionary = reverse_dict(dictionary_pitches)

predictions_pitch_categorical = []

for i in predictions_numpy:
    predictions_pitch_categorical.append(pitches_index_dictionary[i])

In [61]:
create_wav_from_notes(predictions_pitch_categorical[0:100], predictions_duration_transformed[0:100], "short_melody.wav")