In [1]:
import numpy as np
import librosa
from keras.models import Sequential
from keras.layers import LSTM, Bidirectional, Dense
from sklearn.model_selection import train_test_split
import soundfile as sf
import matplotlib.pyplot as plt

In [None]:
y, sr = librosa.load('BWR.wav', sr=None)
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) # Extract MFCC
stft = np.abs(librosa.stft(y)) # STFT features
lpc_order = 10 # Extract LPC features
lpc = librosa.lpc(y, order=lpc_order)
lpc = np.squeeze(lpc) # dimension reduction

In [None]:
# Averaging features
mfcc_avg = np.mean(mfcc, axis=1)
stft_avg = np.mean(stft, axis=1)
# Concatenate features
features = np.concatenate((mfcc_avg, stft_avg, lpc), axis=0)
X = np.repeat(np.expand_dims(features, axis=0), 10, axis=0)
y = np.ones(10)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = np.expand_dims(X_train, axis=1)
X_test = np.expand_dims(X_test, axis=1)

In [None]:
# Define LSTM model
model_lstm = Sequential()
model_lstm.add(LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2])))
model_lstm.add(Dense(1, activation='sigmoid'))

In [None]:
# Compile and train LSTM model
model_lstm.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_lstm.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

In [None]:
# Define Bi-LSTM model
model_bilstm = Sequential()
model_bilstm.add(Bidirectional(LSTM(128), input_shape=(X_train.shape[1], X_train.shape[2])))
model_bilstm.add(Dense(1, activation='sigmoid'))

In [None]:
# Compile and train Bi-LSTM model
model_bilstm.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_bilstm.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

In [None]:
# Plot waveform of original audio
plt.figure(figsize=(14, 5))
plt.plot(y)
plt.title('Waveform of Original Audio')
plt.show()

In [None]:
# Plot melspectrogram of original audio
plt.figure(figsize=(14, 5))
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='mel')
plt.title('Mel Spectrogram of Original Audio')
plt.colorbar(format='%+2.0f dB')
plt.show()

In [None]:

# Plot waveform of trimmed audio
plt.figure(figsize=(14, 5))
plt.plot(y_trim)
plt.title('Waveform of Trimmed Audio')
plt.show()

In [None]:
# Plot melspectrogram of trimmed audio
plt.figure(figsize=(14, 5))
D = librosa.amplitude_to_db(np.abs(librosa.stft(y_trim)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='mel')
plt.title('Mel Spectrogram of Trimmed Audio')
plt.colorbar(format='%+2.0f dB')
plt.show()