In [None]:
# Install required libraries if not installed
!pip install numpy pandas librosa matplotlib seaborn tqdm scikit-learn imbalanced-learn tensorflow resampy

# Import necessary libraries
import numpy as np
import pandas as pd
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Reshape, MaxPooling2D, Dropout, Conv2D, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2  # Added L2 Regularization

# Load dataset paths and labels
def load_dataset(dataset_path):
    paths, labels = [], []

    for subdir in os.listdir(dataset_path):
        subdir_path = os.path.join(dataset_path, subdir)
        if os.path.isdir(subdir_path):
            for filename in os.listdir(subdir_path):
                file_path = os.path.join(subdir_path, filename)
                paths.append(file_path)
                labels.append(subdir)  # REAL / FAKE

    df = pd.DataFrame({"speech": paths, "label": labels})
    print("✅ Dataset loaded successfully!")
    print(df.head())
    print("\n🔍 Label distribution:")
    print(df['label'].value_counts())

    return df

dataset_path = "/content/drive/MyDrive/AUDIO" 
df = load_dataset(dataset_path)

# Function to extract MFCC features efficiently
def extract_mfcc(audio_path, max_length=200):
    try:
        audio, sr = librosa.load(audio_path, sr=16000)
        mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20)

        # Pad or trim to fixed length
        if mfcc.shape[1] < max_length:
            mfcc = np.pad(mfcc, ((0, 0), (0, max_length - mfcc.shape[1])), mode='constant')
        else:
            mfcc = mfcc[:, :max_length]

        return mfcc
    except Exception as e:
        print(f" Error processing {audio_path}: {e}")
        return None

# Extract features using a subset of data
subset_df = df.sample(n=min(200, len(df)), random_state=42)  # Use min to avoid sampling errors
features, labels = [], []

for idx, row in tqdm(subset_df.iterrows(), total=len(subset_df), desc="Extracting MFCC"):
    mfcc = extract_mfcc(row['speech'])
    if mfcc is not None:
        features.append(mfcc)
        labels.append(row['label'])

# Convert to numpy arrays
features = np.array(features)
labels = np.array(labels)

# Encode labels
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)
labels_encoded = to_categorical(labels_encoded)

# Split into training and testing sets
xtrain, xtest, ytrain, ytest = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

xtrain = xtrain.reshape(xtrain.shape[0], xtrain.shape[1], xtrain.shape[2], 1)
xtest = xtest.reshape(xtest.shape[0], xtest.shape[1], xtest.shape[2], 1)

print(f" Data successfully prepared! Shape: {xtrain.shape}")

# Define an optimized LCNN-like model with L2 regularization
def build_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.001), input_shape=(xtrain.shape[1], xtrain.shape[2], 1)),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.3),

        Flatten(),
        Dense(256, activation='relu', kernel_regularizer=l2(0.001)),
        Dropout(0.5),
        Dense(2, activation='softmax')  # 2 classes: REAL and FAKE
    ])

    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = build_model()
model.summary()

# Initial evaluation (before fine-tuning)
print("\n🔍 Evaluating model before fine-tuning...")
loss, accuracy = model.evaluate(xtest, ytest)
print(f" Initial Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Train the model
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    xtrain, ytrain, epochs=15, batch_size=16, validation_data=(xtest, ytest), callbacks=[early_stop]
)

# Fine-tuning with a larger, balanced sample
fine_tune_df = df.groupby('label', group_keys=False).apply(lambda x: x.sample(n=min(250, len(x)), random_state=42))  

fine_tune_features, fine_tune_labels = [], []

for idx, row in tqdm(fine_tune_df.iterrows(), total=len(fine_tune_df), desc="Extracting Fine-Tune Data"):
    mfcc = extract_mfcc(row['speech'])
    if mfcc is not None:
        fine_tune_features.append(mfcc)
        fine_tune_labels.append(row['label'])

fine_tune_features = np.array(fine_tune_features).reshape(len(fine_tune_features), features.shape[1], features.shape[2], 1)
fine_tune_labels_encoded = to_categorical(le.transform(fine_tune_labels))

# Lower learning rate for fine-tuning
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

print("\n🔍 Fine-tuning the model...")
fine_tune_history = model.fit(fine_tune_features, fine_tune_labels_encoded, epochs=100, batch_size=8, validation_data=(xtest, ytest))

# Evaluate after fine-tuning
print("\n🔍 Evaluating model after fine-tuning...")
loss, accuracy = model.evaluate(xtest, ytest)
print(f" Final Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Verify improvement with predictions
y_pred_before = model.predict(xtest)
model.fit(fine_tune_features, fine_tune_labels_encoded, epochs=10, batch_size=8, validation_data=(xtest, ytest))
y_pred_after = model.predict(xtest)

print(f" Accuracy Before Fine-Tuning: {np.mean(np.argmax(y_pred_before, axis=1) == np.argmax(ytest, axis=1)):.4f}")
print(f" Accuracy After Fine-Tuning: {np.mean(np.argmax(y_pred_after, axis=1) == np.argmax(ytest, axis=1)):.4f}")


✅ Dataset loaded successfully!
                                              speech label
0  /content/drive/MyDrive/AUDIO/REAL/biden-origin...  REAL
1  /content/drive/MyDrive/AUDIO/REAL/linus-origin...  REAL
2  /content/drive/MyDrive/AUDIO/REAL/margot-origi...  REAL
3  /content/drive/MyDrive/AUDIO/REAL/musk-origina...  REAL
4  /content/drive/MyDrive/AUDIO/REAL/obama-origin...  REAL

🔍 Label distribution:
label
FAKE    56
REAL     8
Name: count, dtype: int64


Extracting MFCC: 100%|██████████| 64/64 [01:41<00:00,  1.59s/it]

✅ Data successfully prepared! Shape: (51, 20, 200, 1)



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



🔍 Evaluating model before fine-tuning...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 816ms/step - accuracy: 0.1538 - loss: 11.7094
📊 Initial Test Loss: 11.7094, Test Accuracy: 0.1538
Epoch 1/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 548ms/step - accuracy: 0.4540 - loss: 34.9082 - val_accuracy: 0.8462 - val_loss: 29.8704
Epoch 2/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 46ms/step - accuracy: 0.8946 - loss: 17.9391 - val_accuracy: 0.8462 - val_loss: 8.5287
Epoch 3/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.8696 - loss: 6.2988 - val_accuracy: 0.3846 - val_loss: 1.4176
Epoch 4/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.5211 - loss: 1.6113 - val_accuracy: 0.8462 - val_loss: 0.8410
Epoch 5/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.8509 - loss: 0.8467 - val_accuracy: 0.8462 - val_loss: 0.9259

  fine_tune_df = df.groupby('label', group_keys=False).apply(lambda x: x.sample(n=min(250, len(x)), random_state=42))  # 500 samples total
Extracting Fine-Tune Data: 100%|██████████| 64/64 [01:40<00:00,  1.57s/it]


🔍 Fine-tuning the model...
Epoch 1/100





[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 229ms/step - accuracy: 0.8999 - loss: 0.6275 - val_accuracy: 0.8462 - val_loss: 0.6636
Epoch 2/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8808 - loss: 0.6449 - val_accuracy: 0.8462 - val_loss: 0.6292
Epoch 3/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8798 - loss: 0.5863 - val_accuracy: 0.8462 - val_loss: 0.5969
Epoch 4/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9277 - loss: 0.4663 - val_accuracy: 0.8462 - val_loss: 0.5682
Epoch 5/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8831 - loss: 0.4850 - val_accuracy: 0.8462 - val_loss: 0.5358
Epoch 6/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9126 - loss: 0.4487 - val_accuracy: 0.8462 - val_loss: 0.5130
Epoch 7/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[