In [20]:
import numpy as np
import librosa
import IPython.display as ipd
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer

In [21]:
def extract_features_and_labels(directory, label):
    files = os.listdir(directory)
    features = []
    labels = []
    max_len = 0
    for file in files:
        audio_file = os.path.join(directory, file)
        y, sr = librosa.load(audio_file)
        mfccs = librosa.feature.mfcc(y=y, n_mfcc=13, sr=sr)
        max_len = max(max_len, mfccs.shape[1])
        features.append(mfccs.T)
        labels.append(label)
    return features, labels, max_len

# Extract features and labels for positive samples
positive_features, positive_labels, pos_max_len = extract_features_and_labels(positive_directory, 1)

# Extract features and labels for negative samples
negative_features, negative_labels, neg_max_len = extract_features_and_labels(negative_directory, 0)

# Determine the maximum length for padding
max_len = max(pos_max_len, neg_max_len)

def pad_features(features, max_len):
    padded_features = []
    for feature in features:
        if feature.shape[0] < max_len:
            pad_width = max_len - feature.shape[0]
            padded_feature = np.pad(feature, ((0, pad_width), (0, 0)), mode='constant')
        else:
            padded_feature = feature[:max_len, :]
        padded_features.append(padded_feature)
    return np.array(padded_features)

# Pad the features
positive_features = pad_features(positive_features, max_len)
negative_features = pad_features(negative_features, max_len)

# Convert labels to numpy arrays
positive_labels = np.array(positive_labels)
negative_labels = np.array(negative_labels)

# Print the shapes
print(f"Positive features shape: {positive_features.shape}")
print(f"Positive labels shape: {positive_labels.shape}")
print(f"Negative features shape: {negative_features.shape}")
print(f"Negative labels shape: {negative_labels.shape}")

Positive features shape: (72, 1221, 13)
Positive labels shape: (72,)
Negative features shape: (71, 1221, 13)
Negative labels shape: (71,)


In [36]:
features = np.concatenate((positive_features, negative_features), axis=0)
labels = np.concatenate((positive_labels, negative_labels), axis=0)

# Print the shapes to verify
print(f"Features shape: {features.shape}")
print(f"Labels shape: {labels.shape}")


Features shape: (143, 1221, 13)
Labels shape: (143,)


In [37]:
num_samples = features.shape[0]
num_rows = features.shape[1]
num_cols = features.shape[2]
# Flatten each matrix into a vector
features = features.reshape(num_samples, num_rows * num_cols)

# Now features shape will be (num_samples, num_rows * num_cols)
print("Features shape after flattening:", features.shape)

Features shape after flattening: (143, 15873)


In [41]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [59]:
tf.random.set_seed(1234)

# Define input layer separately

model = Sequential([
    tf.keras.layers.InputLayer((15873,)),
    Dense(30, activation="sigmoid", name="L1"),
    Dense(10, activation="sigmoid", name="L2"),
], name="my_model")

# Print model summary
model.summary()

In [60]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])


history = model.fit(x=X_train, 
                    y=y_train, 
                    epochs=10,
                    validation_data=(X_test, y_test))

test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

Epoch 1/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step - accuracy: 0.3110 - loss: 1.3156 - val_accuracy: 0.9310 - val_loss: 0.6680
Epoch 2/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9558 - loss: 0.6528 - val_accuracy: 0.9310 - val_loss: 0.6756
Epoch 3/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9801 - loss: 0.6532 - val_accuracy: 0.8966 - val_loss: 0.6608
Epoch 4/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9857 - loss: 0.6188 - val_accuracy: 0.8966 - val_loss: 0.6212
Epoch 5/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9857 - loss: 0.5901 - val_accuracy: 0.9310 - val_loss: 0.5936
Epoch 6/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9857 - loss: 0.5641 - val_accuracy: 0.9310 - val_loss: 0.5826
Epoch 7/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━