In [1]:
import pandas as pd
import librosa
import numpy as np
import joblib

In [2]:
# Load ground truth annotations from CSV file
ground_truth_path = "../data/csv/en001a.csv"
ground_truth = pd.read_csv(ground_truth_path)

# Load audio file
audio_path = "../data/wav/en001a.wav"
audio_data, sr = librosa.load(audio_path)

In [3]:
# Define a function to extract features from an audio segment
def extract_features(audio_data, onset, offset):
    # trim
    y_trimmed, _ = librosa.effects.trim(audio_data)
    # Extract the audio segment
    segment = y_trimmed[int(onset * sr):int(offset * sr)]
    # Extract features (e.g., MFCCs)
    features = librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=13)
    return features.T  # Transpose to have shape (n_frames, n_mfcc)

In [4]:
# Extract features according to ground truth annotations
features = []
pitch_values = []
for _, row in ground_truth.iterrows():
    onset, offset, pitch = row['start'], row['end'], row['pitch']
    segment_features = extract_features(audio_data, onset, offset)
    features.append(segment_features)
    pitch_values.extend([pitch] * segment_features.shape[0])

# Convert features and pitch_values to numpy arrays
features = np.vstack(features)
pitch_values = np.array(pitch_values)




In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.models import save_model
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.optimizers import Adadelta

In [6]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, pitch_values, test_size=0.2, random_state=42)

# Scale the features
s_scaler = StandardScaler()
X_train_scaled = s_scaler.fit_transform(X_train)
X_test_scaled = s_scaler.transform(X_test)

# Reshape y_train to 2D array for fitting the scaler
y_train_reshaped = y_train.reshape(-1, 1)

# Fit the MinMaxScaler on the y_train_reshaped
scaler = MinMaxScaler(feature_range=(0, 127)) # MIDI notes range from 0 to 127
scaler.fit(y_train_reshaped)

# Reshape features for LSTM input (assuming LSTM expects input shape of (n_samples, n_timesteps, n_features))
X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

In [7]:
X_train_reshaped.shape

(1373, 1, 13)

In [1]:
# Define the LSTM model
model = Sequential([
    LSTM(128, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2]), return_sequences=True),
    # LSTM(128, input_shape=(X_train_reshaped.shape[1], ), return_sequences=True),
    Dropout(0.2),
    LSTM(128),
    Dropout(0.2),
    Dense(128, activation='tanh'),
    Dense(1)  # Output layer for pitch prediction
])
# Adadelta optimizer
# optimizer = Adadelta(learning_rate=1.0)
# Compile the model
# model.compile(optimizer=optimizer, loss='mse')
model.compile(optimizer='SGD', loss='mse')

# Train the model
model.fit(X_train_reshaped, y_train, validation_data=(X_test_reshaped, y_test), epochs=35, batch_size=5)

NameError: name 'Sequential' is not defined

In [10]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Make predictions on the test set
y_pred = model.predict(X_test_reshaped).flatten()
y_pred = [int(i) for i in y_pred]

# Calculate mean squared error
mse = mean_squared_error(y_test, y_pred)

# Calculate mean absolute error
mae = mean_absolute_error(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)


Mean Squared Error: 3.130813953488372
Mean Absolute Error: 1.2761627906976745


In [11]:
print(y_pred)
print(y_test[:10])

[62, 67, 70, 69, 65, 67, 69, 64, 67, 63, 67, 69, 67, 66, 65, 67, 67, 69, 68, 64, 63, 66, 66, 66, 65, 67, 65, 67, 66, 67, 68, 67, 67, 66, 67, 63, 66, 67, 64, 67, 66, 68, 63, 65, 69, 66, 66, 62, 66, 65, 67, 67, 68, 63, 67, 64, 64, 65, 68, 65, 64, 67, 65, 62, 68, 66, 64, 67, 67, 65, 68, 61, 65, 61, 65, 68, 65, 64, 67, 64, 68, 68, 69, 68, 65, 66, 68, 68, 64, 67, 66, 66, 68, 67, 65, 67, 67, 65, 65, 67, 69, 67, 65, 66, 68, 65, 68, 68, 65, 66, 67, 67, 66, 66, 65, 66, 65, 62, 67, 64, 67, 61, 69, 69, 61, 67, 68, 65, 65, 65, 67, 61, 69, 67, 61, 64, 67, 65, 67, 66, 67, 65, 61, 67, 64, 64, 68, 65, 61, 68, 68, 68, 64, 69, 62, 66, 65, 68, 65, 65, 68, 67, 68, 66, 67, 69, 63, 67, 66, 66, 65, 65, 68, 68, 65, 65, 66, 67, 66, 65, 66, 68, 68, 64, 68, 66, 64, 65, 66, 65, 66, 67, 68, 65, 67, 61, 63, 62, 63, 66, 66, 64, 69, 67, 65, 61, 67, 67, 63, 66, 65, 66, 68, 67, 65, 68, 67, 62, 68, 64, 66, 69, 68, 66, 65, 67, 66, 67, 65, 68, 65, 65, 65, 65, 68, 66, 67, 66, 65, 67, 67, 66, 69, 68, 66, 64, 68, 67, 66, 65,

In [12]:
# Save the model
save_model(model, 'model.h5')

In [13]:
joblib.dump(s_scaler, 'scaler.pkl')

['scaler.pkl']