In [1]:
pip install protobuf==3.20.*


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3.8 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [3]:
import glob
import os

import librosa
import librosa.display

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

import music21
import mido
from mido import MidiFile, MidiTrack, Message

In [4]:
# Get a list of csv files
csv_files = glob.glob("data/csv/*.csv")

# Get a list of wav files
audio_files = glob.glob("data/wav/*.wav")

wav_file_names = {file.split("/")[-1].split(".")[0] for file in audio_files}

all_mfccs = []
all_labels = []

for csv_file in csv_files:
    
    csv_file_name = csv_file.split("/")[-1].split(".")[0]
    
    if csv_file_name in wav_file_names:
        # Load the CSV file that contains columns 'start', 'end', 'pitch', 'syllable'
        df = pd.read_csv(csv_file)
        
        # Load and process the audio file corresponding to the row
        audio_file = f'data/wav/{csv_file_name}.wav'  
        y, sr = librosa.load(audio_file, sr=None)

        # Iterate over each row in the DataFrame to process each audio file
        for index, row in df.iterrows():
            start_time = row['start']
            end_time = row['end']
            pitch = row['pitch']
            syllable = row['syllable']
           
            start_sample = int(start_time * sr)
            end_sample = int(end_time * sr)
            audio_segment = y[start_sample:end_sample]
            mfccs = librosa.feature.mfcc(y=audio_segment, sr=sr, n_mfcc=13)

            # Append MFCCs and labels to the lists
            all_mfccs.append(mfccs.T)  # Transpose to have time_steps x features
            all_labels.append([start_time, end_time, pitch])

# Convert lists to numpy arrays
all_mfccs = np.concatenate(all_mfccs, axis=0)
all_labels = np.array(all_labels)

print(all_mfccs)
print(all_labels)



[[-391.43073    155.47241     17.796213  ...   -4.6647406    3.875946
   -15.378936 ]
 [-393.21747    147.7681       4.0019064 ...   -0.7845236    8.864843
   -24.797276 ]
 [-419.03735    124.122314   -10.7456455 ...   -1.1820716   10.188527
   -24.609009 ]
 ...
 [-574.42633     98.704926    23.784882  ...   -7.4547267  -14.941587
   -18.76279  ]
 [-568.67847    112.97853     27.634674  ...  -12.145161   -15.410059
   -15.896633 ]
 [-547.70917    140.97972     35.77067   ...   -9.35202    -14.029166
   -16.116203 ]]
[[ 2.1984  2.4922 62.    ]
 [ 2.5883  2.957  62.    ]
 [ 2.9922  3.2016 62.    ]
 ...
 [73.7356 74.2529 64.    ]
 [74.2529 74.4396 67.    ]
 [74.5115 75.9195 65.    ]]


In [5]:
print(all_mfccs.shape)
print(all_labels.shape)

(313614, 13)
(8484, 3)


In [6]:
filtered_mfccs = all_mfccs[:all_labels.shape[0]]

print(filtered_mfccs.shape)

(8484, 13)


In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(filtered_mfccs, all_labels, test_size=0.2, random_state=42)

In [8]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Reshape features for LSTM input (n_samples, n_timesteps, n_features)
X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

In [9]:
# Print shapes 
print("X_train shape:", X_train_reshaped.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test_reshaped.shape)
print("y_test shape:", y_test.shape)

X_train shape: (6787, 1, 13)
y_train shape: (6787, 3)
X_test shape: (1697, 1, 13)
y_test shape: (1697, 3)


In [10]:
# Build the LSTM model with two LSTM layers 
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(None, X_train_reshaped.shape[2])))
model.add(Dropout(0.2))  # Optional Dropout layer for regularization
model.add(LSTM(50))
model.add(Dropout(0.2))
model.add(Dense(50, activation='tanh'))  
model.add(Dense(1)) 

# Compile the model
model.compile(loss='mse', optimizer='SGD')

# Train the model
model.fit(X_train_reshaped, y_train, validation_data=(X_test_reshaped, y_test),epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f877f8f2970>

In [14]:
output_directory = "midi-output/"

def audio_to_midi(audio_file_path, model, scaler):
    # Load and process the audio file
    y, sr = librosa.load(audio_file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs_scaled = scaler.transform(mfccs.T)  
    mfccs_reshaped = mfccs_scaled.reshape(1, mfccs_scaled.shape[0], mfccs_scaled.shape[1])

    # Predict the labels (e.g., start_time, end_time, pitch) using the model
    predictions = model.predict(mfccs_reshaped)

    # Post-process the predictions to MIDI format
    midi_notes = []
    print(predictions[0])
    for prediction in predictions[0]: 
        pitch = int(prediction)  
        midi_notes.append((pitch, 256)) 

    # Create a new MIDI file and track
    mid = MidiFile(ticks_per_beat=480)
    track = MidiTrack()
    mid.tracks.append(track)
    
    current_time = 0

    # Add notes to the MIDI track
    for note, duration in midi_notes:
        duration_ticks = int(duration * mid.ticks_per_beat)
        track.append(Message('note_on', note=note, velocity=64, time=current_time))
        track.append(Message('note_off', note=note, velocity=64, time=duration_ticks))
        current_time += duration_ticks

    # Save the MIDI file
    audio_file_name = os.path.basename(audio_file_path).replace('.wav', '.mid')
    midi_file_path = os.path.join(output_directory, audio_file_name)
    mid.save(midi_file_path)
    print(f"MIDI file saved: {midi_file_path}")

# Loop through each WAV file to generate a MIDI file for it
for audio_file in audio_files:
    audio_to_midi(audio_file, model, scaler)

[62.750164]
MIDI file saved: midi-output/en016b.mid
[62.396114]
MIDI file saved: midi-output/en020a.mid
[62.75805]
MIDI file saved: midi-output/en016a.mid
[61.491085]
MIDI file saved: midi-output/en020b.mid
[63.922276]
MIDI file saved: midi-output/en021b.mid
[62.88551]
MIDI file saved: midi-output/en017a.mid
[61.539154]
MIDI file saved: midi-output/en001a.mid
[63.264946]
MIDI file saved: midi-output/en021a.mid
[63.153893]
MIDI file saved: midi-output/en017b.mid
[60.62855]
MIDI file saved: midi-output/en001b.mid
[64.47045]
MIDI file saved: midi-output/en006a.mid
[64.240906]
MIDI file saved: midi-output/en010a.mid
[63.35501]
MIDI file saved: midi-output/en026b.mid
[63.571663]
MIDI file saved: midi-output/en006b.mid
[64.71169]
MIDI file saved: midi-output/en010b.mid
[64.25761]
MIDI file saved: midi-output/en030a.mid
[63.020054]
MIDI file saved: midi-output/en026a.mid
[62.06268]
MIDI file saved: midi-output/en027a.mid
[62.478737]
MIDI file saved: midi-output/en011b.mid
[54.1328]
MIDI file 

In [18]:
import subprocess

# Get a list of all midi files
midi_files = glob.glob("midi-output/*.mid")

for midi_file in midi_files:
    midi_file_name = midi_file.split("/")[-1].split(".")[0]
    midi_stream = music21.converter.parse(midi_file)

    # Convert MIDI file into a music21 Score object
    score = music21.stream.Score()
    for part in midi_stream.parts:
        # Instead of inserting at position 0, insert at the end of the score
        score.append(part)

    # Save it as a PDF 
    ly_file_path = f"lilypond-output/{midi_file_name}.ly"
    score.write('lilypond', ly_file_path)

    # Using subprocess (requires LilyPond installed on your system):
    output_file_name = f"sheet-music-output/{midi_file_name}_sheet"
    subprocess.run(['lilypond', '--pdf', '-o', output_file_name, ly_file_path])  # Convert to PDF