In [1]:
import librosa
from sklearn.preprocessing import StandardScaler
from keras.models import load_model
import numpy as np
import joblib
import pretty_midi

In [2]:
# idea is to 
# 1) extract on and offset
# 2) do the same feature extraction method as training
# 3) load the model and predict notes
# 4) use the onset offset info together convert to midi
# 5) convert to sheet

In [3]:
# Load audio file
audio_path = "../data/wav/en001b.wav"
audio_data, sr = librosa.load(audio_path)

In [4]:
# Extract onset and offset timings
onset_frames = librosa.onset.onset_detect(y=audio_data, sr=sr, backtrack=True)
onset_times = librosa.frames_to_time(onset_frames, sr=sr)
offset_times = librosa.frames_to_time(onset_frames[1:], sr=sr)  # Assuming offset is the next onset

In [5]:
# Extract tempo
tempo, _ = librosa.beat.beat_track(y=audio_data, sr=sr)

In [6]:
if len(onset_times)<len(offset_times):
    offset_times = offset_times[:len(onset_times)]
else:
    onset_times = onset_times[:len(offset_times)]

In [7]:
def extract_features(audio_data, onset, offset):
    #trim
    y_trimmed, _ = librosa.effects.trim(audio_data)
    # Extract the audio segment
    segment = y_trimmed[int(onset * sr):int(offset * sr)]
    # Extract features (e.g., MFCCs)
    features = librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=13)
    return features.T  # Transpose to have shape (n_frames, n_mfcc)

In [8]:
features = []
for i in range(len(onset_times)):
    segment_features = extract_features(audio_data, onset_times[i], offset_times[i])
    features.append(segment_features)

features = np.vstack(features)



In [9]:
features.shape

(2763, 13)

In [10]:
# features.shape
scaler = joblib.load('scaler.pkl')
features_scaled = scaler.transform(features)
f_reshaped = features_scaled.reshape(features_scaled.shape[0],1,features_scaled.shape[1])

In [11]:
model = load_model('./model.h5')

pred = model.predict(f_reshaped)
# pred = model.predict(features_scaled)
pred = [int(i) for i in pred]
# pred[:50]



  pred = [int(i) for i in pred]


In [12]:
len(pred)
# pred

2763

In [13]:
def convert_to_midi(predicted_pitches, onset_times, offset_times, tempo=100):
    # Create a PrettyMIDI object
    midi_data = pretty_midi.PrettyMIDI(initial_tempo=tempo)

    # Create an Instrument instance for the piano
    piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
    piano = pretty_midi.Instrument(program=piano_program)

    # Convert predicted pitches to MIDI notes
    for pitch, onset, offset in zip(predicted_pitches, onset_times, offset_times):
        note = pretty_midi.Note(
            velocity=100, pitch=int(pitch), start=onset, end=offset
        )
        piano.notes.append(note)

    # Add the piano instrument to the PrettyMIDI object
    midi_data.instruments.append(piano)

    # Write the MIDI data to a file
    midi_data.write('output.mid')

In [14]:
convert_to_midi(pred, onset_times, offset_times, 100)

In [30]:
output_path = '../output/'

# Function to convert pitches to MIDI notes
def pitches_to_midi(onset_times, offset_times, pitches, output_file_path, tempo=100):
    midi = MidiFile()
    track = MidiTrack()
    midi.tracks.append(track)

    ticks_per_beat = 480 #standard MIDI ticks per beat #midi.ticks_per_beat

    track.append(MetaMessage('set_tempo', tempo=tempo))

    # Assign MIDI note numbers to pitches
    min_pitch = min(pred)  # MIDI note number for C4
    max_pitch = max(pred)  # MIDI note number for C5
    pitch_range = max_pitch - min_pitch

    interpolated_pitches = []
    for i in range(len(onset_times)-1):
        start_time = onset_times[i]
        end_time = offset_times[i]
        duration = end_time - start_time
        num_steps = int(duration * ticks_per_beat)

        if num_steps == 0:
            continue

        start_pitch = pitches[i]
        end_pitch = pitches[i+1]
        pitch_diff = end_pitch - start_pitch
        pitch_step = pitch_diff / num_steps

        for step in range(num_steps):
            interpolated_pitch = start_pitch + step * pitch_step
            interpolated_pitches.append(interpolated_pitch)

    current = 0
    for pitch in interpolated_pitches:
        # Calculate the MIDI note number
        predicted_pitch = min_pitch + int((pitch * pitch_range) % pitch_range)

        # Create a note-on message
        track.append(Message('note_on', note=predicted_pitch, velocity=100, time=current))

        # Create a note-off message (assuming a fixed duration for each note, adjust as needed)
        track.append(Message('note_off', note=predicted_pitch, velocity=100, time=current + ticks_per_beat))

        current += ticks_per_beat



    # Save the MIDI file
    midi.save(output_file_path + 'output.mid')

In [25]:
pitches_to_midi(onset_times, offset_times, pred, output_path)