In [None]:
import os
import librosa
import numpy as np

SR = 16000
N_MELS = 64
DURATION = 3  
SAMPLES = SR * DURATION
CLASS_MAP = {"real": 0, "fake": 1}

#Feature Extraction
def extract_log_mel(path):
    y, _ = librosa.load(path, sr=SR, mono=True)
    y = librosa.util.fix_length(y, size=SAMPLES)
    mel_spec = librosa.feature.melspectrogram(y=y, sr=SR, n_mels=N_MELS)
    log_mel = librosa.power_to_db(mel_spec, ref=np.max)
    return log_mel

#Load Dataset into Arrays
def load_dataset(split_dir):
    X, y = [], []
    for label in CLASS_MAP:
        class_dir = os.path.join(split_dir, label)
        if not os.path.exists(class_dir):
            continue
        files = [f for f in os.listdir(class_dir) if f.endswith(".wav")]
        for fname in files:
            fpath = os.path.join(class_dir, fname)
            try:
                log_mel = extract_log_mel(fpath)
                X.append(log_mel)
                y.append(CLASS_MAP[label])
            except:
                continue
    X = np.array(X)
    y = np.array(y)
    X = X[..., np.newaxis]
    return X, y

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, BatchNormalization, Dropout
from tensorflow.keras.layers import Reshape, Bidirectional, LSTM, Dense, Flatten
import tensorflow as tf
def build_cnn_bilstm(input_shape=(64, 94, 1)):
    inputs = Input(shape=input_shape)

    # --- CNN feature extractor ---
    x = Conv2D(16, kernel_size=(3, 3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = BatchNormalization()(x)

    x = Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = BatchNormalization()(x)

    x = Reshape((x.shape[2], x.shape[1] * x.shape[3]))(x)

    # --- BiLSTM ---
    x = Bidirectional(LSTM(64))(x)

    # --- Classification head ---
    x = Dropout(0.3)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model


In [None]:
if __name__ == "__main__":
    base_path = "Data_Split"
    np.random.seed(42)
    tf.random.set_seed(42)

    print("Loading training data...")
    X_train, y_train = load_dataset(os.path.join(base_path, "training"))
    print(f"Training set: {X_train.shape}, {y_train.shape}, Class distribution: {np.bincount(y_train)}")

    print("\nLoading validation data...")
    X_val, y_val = load_dataset(os.path.join(base_path, "validation"))
    print(f"Validation set: {X_val.shape}, {y_val.shape}, Class distribution: {np.bincount(y_val)}")

    print("\nLoading testing data...")
    X_test, y_test = load_dataset(os.path.join(base_path, "testing"))
    print(f"Testing set: {X_test.shape}, {y_test.shape}, Class distribution: {np.bincount(y_test)}")

# Compute normalization stats from training set only
    mean = np.mean(X_train)
    std = np.std(X_train)

# Normalize all splits using train stats
    X_train = (X_train - mean) / std
    X_val   = (X_val - mean) / std
    X_test  = (X_test - mean) / std
    print(f"\nTrain: {X_train.shape}, Validation: {X_val.shape}, Test: {X_test.shape}")

    # Build and train model
    model = build_cnn_bilstm(X_train.shape[1:])

    history=model.fit(X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=10,
            batch_size=32,
            shuffle=True)

    # Evaluate on test set
    test_loss, test_acc = model.evaluate(X_test, y_test)
    print(f"\nTest Accuracy: {test_acc:.4f}")


In [None]:
model.summary()

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc
# Plot accuracy and loss
def plot_history(history):
    plt.figure(figsize=(12, 5))

    # Accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Acc')
    plt.plot(history.history['val_accuracy'], label='Val Acc')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Training vs Validation Accuracy')
    plt.legend()

    # Loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training vs Validation Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

plot_history(history)


In [None]:
# Confusion Matrix
y_pred = (model.predict(X_test) > 0.5).astype("int32")
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Real', 'Fake'])
disp.plot(cmap='Blues')
plt.title('Confusion Matrix')
plt.show()