In [None]:
import os
import librosa
import numpy as np

SR = 16000
N_MELS = 64
DURATION = 3
SAMPLES = SR * DURATION
CLASS_MAP = {"real": 0, "fake": 1}
#Feature Extraction
def extract_log_mel(path):
    y, _ = librosa.load(path, sr=SR, mono=True)
    y = librosa.util.fix_length(y, size=SAMPLES)
    mel_spec = librosa.feature.melspectrogram(y=y, sr=SR, n_mels=N_MELS)
    log_mel = librosa.power_to_db(mel_spec, ref=np.max)
    return log_mel

#Load Dataset into Arrays
def load_dataset(split_dir):
    X, y = [], []
    for label in CLASS_MAP:
        class_dir = os.path.join(split_dir, label)
        if not os.path.exists(class_dir):
            continue
        files = [f for f in os.listdir(class_dir) if f.endswith(".wav")]
        for fname in files:
            fpath = os.path.join(class_dir, fname)
            try:
                log_mel = extract_log_mel(fpath)
                X.append(log_mel)
                y.append(CLASS_MAP[label])
            except:
                continue
    X = np.array(X)
    y = np.array(y)
    X = X[..., np.newaxis]
    return X, y

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import librosa
import librosa.display

In [None]:

def build_model(input_shape):
    model = models.Sequential([
        # First convolutional block
        layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Second convolutional block
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Third convolutional block
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Flatten and dense layers
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(1, activation='sigmoid')
    ])
    
    # Compile model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )
    
    return model


In [None]:
#TRAINING AND EVALUATION

def train_model(X_train, y_train, X_val, y_val, model_path='best_model.keras'):
    """Train the model with early stopping and learning rate reduction"""
    input_shape = X_train.shape[1:]
    model = build_model(input_shape)
    
    # Print model summary
    model.summary()
    
    # Callbacks for training
    callbacks_list = [
        callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        ),
        callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-6
        ),
        callbacks.ModelCheckpoint(
            filepath=model_path,
            monitor='val_loss',
            save_best_only=True
        )
    ]
    
    # Train the model
    history = model.fit(
        X_train, y_train,
        epochs=5,  
        batch_size=32,
        validation_data=(X_val, y_val),
        callbacks=callbacks_list,
        verbose=1,
        shuffle=True
    )
    
    return model, history

In [None]:
def evaluate_model(model, X_test, y_test):

    y_pred_prob = model.predict(X_test)
    y_pred = (y_pred_prob > 0.5).astype(int).flatten()
    
    # Calculate metrics
    test_loss, test_acc, test_auc = model.evaluate(X_test, y_test, verbose=0)
    
    # Print results
    print("\n===== Model Evaluation =====")
    print(f"Test Accuracy: {test_acc:.4f}")
    print(f"Test Loss: {test_loss:.4f}")
    
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=["Real", "Fake"]))
    
    return y_pred, y_pred_prob



In [None]:
np.random.seed(42)
tf.random.set_seed(42)

# 1. Load data
base_path = "./data_preprocessed"
    
print("Loading training data...")
X_train, y_train = load_dataset(os.path.join(base_path, "training"))
print(f"Training set: {X_train.shape}, {y_train.shape}, Class distribution: {np.bincount(y_train)}")

print("\nLoading validation data...")
X_val, y_val = load_dataset(os.path.join(base_path, "validation"))
print(f"Validation set: {X_val.shape}, {y_val.shape}, Class distribution: {np.bincount(y_val)}")

print("\nLoading testing data...")
X_test, y_test = load_dataset(os.path.join(base_path, "testing"))
print(f"Testing set: {X_test.shape}, {y_test.shape}, Class distribution: {np.bincount(y_test)}")


# Compute normalization stats from training set only
mean = np.mean(X_train)
std = np.std(X_train)

# Normalize all splits using train stats
X_train = (X_train - mean) / std
X_val   = (X_val - mean) / std
X_test  = (X_test - mean) / std

In [None]:
# 2. Train model
print("\nTraining model...")
model, history = train_model(X_train, y_train, X_val, y_val)


In [None]:

# 3. Evaluate model
print("\nEvaluating model...")
y_pred, y_pred_prob = evaluate_model(model, X_test, y_test)


In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def plot_confusion_matrix(y_true, y_pred, labels):
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=["Real", "Fake"], 
                yticklabels=["Real", "Fake"])
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.title('Confusion Matrix')
    plt.show()
    plt.savefig('cnn_confusion_matrix.png')
    plt.close()

plot_confusion_matrix(y_test, y_pred, ["Real", "Fake"])

In [None]:
# 1. Plot training history
def plot_training_history(history):
    print("\nPlotting training history...")
    
    # Plot training & validation loss values
    plt.figure(figsize=(10, 5))

    # Plot Loss curves
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # Plot Accuracy curves
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.tight_layout()
    plt.show()  # Display the plot
    plt.savefig('CNN_training_history.png')  # Save the plot
    plt.close()  # Close the plot to free memory

# Plot training history
plot_training_history(history)
