In [1]:
import librosa
from sklearn.preprocessing import StandardScaler
from keras.models import load_model
import numpy as np
import joblib
from mido import MidiFile, MidiTrack, Message, MetaMessage
import pretty_midi

In [2]:
# idea is to 
# 1) extract on and offset
# 2) do the same feature extraction method as training
# 3) load the model and predict notes
# 4) use the onset offset info together convert to midi
# 5) convert to sheet

In [7]:
# Load audio file
audio_path = "../data/wav/en001b.wav"
audio_data, sr = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=13)

In [8]:
# Extract onset and offset timings
onset_frames = librosa.onset.onset_detect(y=audio_data, sr=sr, backtrack=True)
onset_times = librosa.frames_to_time(onset_frames, sr=sr)
offset_times = onset_times[1:].tolist() + [librosa.get_duration(y=audio_data, sr=sr)]  # Assuming offset is the next onset

In [9]:
# Extract tempo
tempo, _ = librosa.beat.beat_track(y=audio_data, sr=sr)

In [10]:
if len(onset_times)<len(offset_times):
    offset_times = offset_times[:len(onset_times)]
else:
    onset_times = onset_times[:len(offset_times)]

In [12]:

def align_frames_with_times(mfccs, sr, max_length, onsets, offsets):
    frame_times = librosa.frames_to_time(np.arange(len(mfccs.T)), sr=sr)
    aligned_features = []

    for onset, offset in zip(onsets, offsets):
        onset_frame = np.argmax(frame_times >= onset)
        offset_frame = np.argmax(frame_times >= offset)
        feature_sequence = mfccs[:, onset_frame:offset_frame]

        # Pad or truncate feature sequence to the fixed length
        if feature_sequence.shape[1] < max_length:
            pad_width = max_length - feature_sequence.shape[1]
            padded_sequence = np.pad(feature_sequence, ((0, 0), (0, pad_width)), mode='constant')
            aligned_features.append(padded_sequence)
        else:
            truncated_sequence = feature_sequence[:, :max_length]
            aligned_features.append(truncated_sequence)

    # Convert aligned_features to a numpy array
    aligned_features = np.array(aligned_features)

    # Mask the padding values
    mask = (aligned_features.sum(axis=-1) != 0).astype(np.float32)[:, :, np.newaxis]
    masked_features = aligned_features * mask
    return masked_features

In [14]:
features = align_frames_with_times(mfccs, sr, 100, onset_times, offset_times)

In [15]:
features.shape


(119, 13, 100)

In [32]:
# features.shape
# scaler = joblib.load('scaler.pkl')
# features_scaled = scaler.transform(features)
# f_reshaped = features_scaled.reshape(features_scaled.shape[0],1,features_scaled.shape[1])
# f_reshaped = features.reshape(features.shape[0],1,features.shape[1])
# f_reshaped = features.reshape(features.shape[0],1,13)

In [21]:
# f_reshaped.shape

In [23]:
len(onset_times)

119

In [16]:
model = load_model('./model.h5')

# pred = model.predict(f_reshaped)
pred = model.predict(features)
pred = [int(i) for i in pred]
# pred[:50]



  pred = [int(i) for i in pred]


In [17]:
len(pred)
# max(pred)

119

In [24]:
def convert_to_midi(predicted_pitches, onset_times, offset_times, tempo=120):
    # Create a PrettyMIDI object
    midi_data = pretty_midi.PrettyMIDI(initial_tempo=tempo)

    # Create an Instrument instance for the piano
    piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
    piano = pretty_midi.Instrument(program=piano_program)

    # Convert predicted pitches to MIDI notes
    for pitch, onset, offset in zip(predicted_pitches, onset_times, offset_times):
        note = pretty_midi.Note(
            velocity=100, pitch=int(pitch), start=onset, end=offset
        )
        piano.notes.append(note)

    # Add the piano instrument to the PrettyMIDI object
    midi_data.instruments.append(piano)

    # Write the MIDI data to a file
    midi_data.write('output.mid')

In [25]:
convert_to_midi(pred, onset_times, offset_times, 100)

In [20]:
from mido import MidiFile, MidiTrack, Message

midi_file = MidiFile()
track = MidiTrack()
midi_file.tracks.append(track)

# Assuming each predicted pitch has a corresponding duration (e.g., 1 second)
for pitch in pred:
    track.append(Message('note_on', note=pitch, velocity=64, time=0))
    track.append(Message('note_off', note=pitch, velocity=64, time=int(sr)))  # Duration of 1 second

midi_file.save('predicted_output.mid')

In [18]:
output_path = '../output/'

# Function to convert pitches to MIDI notes
def pitches_to_midi(onset_times, offset_times, pitches, output_file_path, tempo=100):
    midi = MidiFile()
    track = MidiTrack()
    midi.tracks.append(track)

    ticks_per_beat = 480 #standard MIDI ticks per beat #midi.ticks_per_beat

    track.append(MetaMessage('set_tempo', tempo=tempo))

    # Assign MIDI note numbers to pitches
    min_pitch = min(pred)  # MIDI note number for C4
    max_pitch = max(pred)  # MIDI note number for C5
    pitch_range = max_pitch - min_pitch

    interpolated_pitches = []
    for i in range(len(onset_times)-1):
        start_time = onset_times[i]
        end_time = offset_times[i]
        duration = end_time - start_time
        num_steps = int(duration * ticks_per_beat)

        if num_steps == 0:
            continue

        start_pitch = pitches[i]
        end_pitch = pitches[i+1]
        pitch_diff = end_pitch - start_pitch
        pitch_step = pitch_diff / num_steps

        for step in range(num_steps):
            interpolated_pitch = start_pitch + step * pitch_step
            interpolated_pitches.append(interpolated_pitch)

    current = 0
    for pitch in interpolated_pitches:
        # Calculate the MIDI note number
        predicted_pitch = min_pitch + int((pitch * pitch_range) % pitch_range)

        # Create a note-on message
        track.append(Message('note_on', note=predicted_pitch, velocity=100, time=current))

        # Create a note-off message (assuming a fixed duration for each note, adjust as needed)
        track.append(Message('note_off', note=predicted_pitch, velocity=100, time=current + ticks_per_beat))

        current += ticks_per_beat



    # Save the MIDI file
    midi.save(output_file_path + 'output.mid')

In [19]:
pitches_to_midi(onset_times, offset_times, pred, output_path)

In [None]:
    # # Assign MIDI note numbers to pitches
    # # You may need to adjust this based on your model's output
    # min_pitch = 61  # MIDI note number for C4
    # max_pitch = 71  # MIDI note number for C5
    # pitch_range = max_pitch - min_pitch

    # for onset, offset, pitch in zip(onset_times, offset_times, pitches):
    #     # Get the predicted pitch
    #     predicted_pitch = min_pitch + (int(pitch * pitch_range)%pitch_range)

    #     # Create a note-on message
    #     track.append(Message('note_on', note=predicted_pitch, velocity=100, time=int(onset * ticks_per_beat)))

    #     # Create a note-off message
    #     track.append(Message('note_off', note=predicted_pitch, velocity=100, time=int(offset * ticks_per_beat)))
