In [1]:
import os
import numpy as np
import librosa
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split

In [18]:
def load_audio_data(dataset_path):
    X, y = [], []
    # Iterate through splits: training, validation, testing
    for split in ["training", "validation", "testing"]:
        split_path = os.path.join(dataset_path, split)
        if not os.path.exists(split_path):
            print(f"Split path does not exist: {split_path}")
            continue
        # Iterate through labels: fake, real
        for label in ["fake", "real"]:
            label_path = os.path.join(split_path, label)
            if not os.path.exists(label_path):
                print(f"Label path does not exist: {label_path}")
                continue
            print(f"Processing files in: {label_path}")
            # Iterate through audio files
            for file in os.listdir(label_path):
                file_path = os.path.join(label_path, file)
                try:
                    audio, sr = librosa.load(file_path, sr=16000)
                    if len(audio) < 16000:  # Pad to 1 second if shorter
                        audio = np.pad(audio, (0, 16000 - len(audio)), mode="constant")
                    else:
                        audio = audio[:16000]  # Trim to 1 second
                    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
                    mfcc = np.mean(mfcc.T, axis=0)  # Mean across time axis
                    X.append(mfcc)
                    y.append(label)
                except Exception as e:
                    print(f"Error loading {file_path}: {e}")
    return np.array(X), np.array(y)


In [22]:
dataset_path = r"C:\Desktop\Jupyter Files\Deepfake\for-2sec\for-2seconds"

# Load the data
X, y = load_audio_data(dataset_path)

print(f"Loaded {len(X)} samples.")

Processing files in: C:\Desktop\Jupyter Files\Deepfake\for-2sec\for-2seconds\training\fake
Processing files in: C:\Desktop\Jupyter Files\Deepfake\for-2sec\for-2seconds\training\real
Processing files in: C:\Desktop\Jupyter Files\Deepfake\for-2sec\for-2seconds\validation\fake
Processing files in: C:\Desktop\Jupyter Files\Deepfake\for-2sec\for-2seconds\validation\real
Processing files in: C:\Desktop\Jupyter Files\Deepfake\for-2sec\for-2seconds\testing\fake
Processing files in: C:\Desktop\Jupyter Files\Deepfake\for-2sec\for-2seconds\testing\real
Loaded 17870 samples.


In [24]:
# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
y = to_categorical(y)

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)


In [26]:
# Compute class weights to handle imbalance
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(np.argmax(y, axis=1)), y=np.argmax(y, axis=1))
class_weights = {i: class_weights[i] for i in range(len(class_weights))}

In [27]:
# Define the model
model = Sequential([
    Dense(256, activation="relu", input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.4),
    Dense(128, activation="relu"),
    BatchNormalization(),
    Dropout(0.4),
    Dense(64, activation="relu"),
    Dropout(0.3),
    Dense(2, activation="softmax")
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [28]:
# Compile the model
model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

In [29]:
# Callbacks for learning rate scheduling and early stopping
callbacks = [
    EarlyStopping(monitor="val_loss", patience=7, restore_best_weights=True),
    ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-6)
]

In [30]:
# Train the model
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    class_weight=class_weights,
    callbacks=callbacks,
    verbose=2
)

Epoch 1/100
358/358 - 7s - 20ms/step - accuracy: 0.7673 - loss: 0.5354 - val_accuracy: 0.9220 - val_loss: 0.2144 - learning_rate: 1.0000e-03
Epoch 2/100
358/358 - 2s - 6ms/step - accuracy: 0.8725 - loss: 0.3081 - val_accuracy: 0.9469 - val_loss: 0.1446 - learning_rate: 1.0000e-03
Epoch 3/100
358/358 - 2s - 6ms/step - accuracy: 0.9119 - loss: 0.2265 - val_accuracy: 0.9650 - val_loss: 0.0928 - learning_rate: 1.0000e-03
Epoch 4/100
358/358 - 2s - 6ms/step - accuracy: 0.9304 - loss: 0.1836 - val_accuracy: 0.9741 - val_loss: 0.0752 - learning_rate: 1.0000e-03
Epoch 5/100
358/358 - 2s - 6ms/step - accuracy: 0.9339 - loss: 0.1701 - val_accuracy: 0.9759 - val_loss: 0.0654 - learning_rate: 1.0000e-03
Epoch 6/100
358/358 - 2s - 6ms/step - accuracy: 0.9438 - loss: 0.1482 - val_accuracy: 0.9776 - val_loss: 0.0657 - learning_rate: 1.0000e-03
Epoch 7/100
358/358 - 2s - 6ms/step - accuracy: 0.9502 - loss: 0.1292 - val_accuracy: 0.9759 - val_loss: 0.0657 - learning_rate: 1.0000e-03
Epoch 8/100
358/358

In [31]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

Test Loss: 0.04830438643693924, Test Accuracy: 0.9857302904129028


In [40]:
# Generate predictions
y_pred = np.argmax(model.predict(X_test), axis=1)

# Convert y_test if it's one-hot encoded
if len(y_test.shape) > 1:
    y_test = np.argmax(y_test, axis=1)

# Compute metrics
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

report = classification_report(y_test, y_pred)
print("\nClassification Report:\n", report)

[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Confusion Matrix:
 [[1772   15]
 [  36 1751]]

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.99      0.99      1787
           1       0.99      0.98      0.99      1787

    accuracy                           0.99      3574
   macro avg       0.99      0.99      0.99      3574
weighted avg       0.99      0.99      0.99      3574



In [33]:
# Save the model and label encoder
model.save("fake_or_real_audio_lstm_model.h5")
import pickle
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

