In [1]:
import librosa
from sklearn.preprocessing import StandardScaler
from keras.models import load_model
import numpy as np
import joblib
import pretty_midi

In [2]:
# idea is to 
# 1) extract on and offset
# 2) do the same feature extraction method as training
# 3) load the model and predict notes
# 4) use the onset offset info together convert to midi
# 5) convert to sheet

In [3]:
# Load audio file
audio_path = "../data/wav/en001b.wav"
audio_data, sr = librosa.load(audio_path)

In [4]:
# Extract onset and offset timings
onset_frames = librosa.onset.onset_detect(y=audio_data, sr=sr, backtrack=True)
onset_times = librosa.frames_to_time(onset_frames, sr=sr)
offset_times = librosa.frames_to_time(onset_frames[1:], sr=sr)  # Assuming offset is the next onset

In [5]:
# Extract tempo
tempo, _ = librosa.beat.beat_track(y=audio_data, sr=sr)

In [6]:
if len(onset_times)<len(offset_times):
    offset_times = offset_times[:len(onset_times)]
else:
    onset_times = onset_times[:len(offset_times)]

In [7]:
def extract_features(audio_data, onset, offset):
    #trim
    y_trimmed, _ = librosa.effects.trim(audio_data)
    # Extract the audio segment
    segment = y_trimmed[int(onset * sr):int(offset * sr)]
    # Extract features (e.g., MFCCs)
    features = librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=13)
    return features.T  # Transpose to have shape (n_frames, n_mfcc)

In [8]:
features = []
for i in range(len(onset_times)):
    segment_features = extract_features(audio_data, onset_times[i], offset_times[i])
    features.append(segment_features)

features = np.vstack(features)



In [9]:
features.shape

(2763, 13)

In [10]:
# features.shape
scaler = joblib.load('scaler.pkl')
features_scaled = scaler.transform(features)
f_reshaped = features_scaled.reshape(features_scaled.shape[0],1,features_scaled.shape[1])

In [11]:
model = load_model('./model.h5')

pred = model.predict(f_reshaped)
# pred = model.predict(features_scaled)
pred = [int(i) for i in pred]
# pred[:50]



  pred = [int(i) for i in pred]


In [14]:
len(pred)
# pred

2763

In [21]:
output_path = '../output/'

# Function to convert pitches to MIDI notes
def pitches_to_midi(pitches, output_file_path, tempo):
    # Create a PrettyMIDI object
    midi_data = pretty_midi.PrettyMIDI(initial_tempo=tempo)
    piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
    piano = pretty_midi.Instrument(program=piano_program)

    # Add notes to the piano instrument
    for i, pitch in enumerate(pitches):
        if pitch != 0:  # Skip rests (assuming pitch 0 represents a rest)
            note = pretty_midi.Note(velocity=127, pitch=int(pitch), start=i, end=i+1)  # Assuming each pitch lasts for one unit of time
            piano.notes.append(note)

    # Add the piano instrument to the MIDI data
    midi_data.instruments.append(piano)

    # Write the MIDI data to a file
    midi_data.write(output_file_path+'output.mid')

In [22]:
pitches_to_midi(pred, output_path, tempo)

In [30]:
# features.T.shape
# # Reshape MFCCs to match the model input shape
# # mfccs_reshaped = np.expand_dims(features.T, axis=0)

# scaler = joblib.load('scaler.pkl')
# mfccs_scaled = scaler.transform(features.T)
# # mfccs_scaled.shape
# mfccs_reshaped = np.expand_dims(features.T, axis=0)
# # mfccs_reshaped.shape
# X_actual_reshaped = mfccs_reshaped.reshape(mfccs_reshaped.shape[1],1,mfccs_reshaped.shape[2])

In [25]:
# model = load_model('./model.h5')

# pred = model.predict(X_actual_reshaped)
# pred = [int(i) for i in pred]
# pred[:50]