In [None]:
# Experiment 3: Training and Validation of the Random Search Hyperparameter Optimized ResNet-50 Model on the Celeb-DF (Version 1) Dataset

from google.colab import drive
drive.mount('/content/drive')

In [None]:
import joblib

# Load saved model
loaded_model = joblib.load('/content/drive/MyDrive/model_filename_original_resnet.pkl')

In [None]:
# Importation of Libraries
import os
import cv2
import numpy as np
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, roc_auc_score, recall_score, f1_score, confusion_matrix
from tqdm import tqdm
import joblib 
import seaborn as sns  
import matplotlib.pyplot as plt  
from sklearn.linear_model import LogisticRegression
from scipy.stats import randint

# Defines batch size
batch_size = 32

# Defines the base folder path to utilized dataset
base_path = '/content/drive/MyDrive/Celeb-DFF'

# Defines subfolder paths for real and fake videos
real_path_1 = os.path.join(base_path, 'Celeb-real')
fake_path = os.path.join(base_path, 'Celeb-synthesis')

# File paths for real and fake videos
fake_video_files = [os.path.join(fake_path, file) for file in os.listdir(fake_path) if file.endswith('.mp4')]
real_video_files = [os.path.join(real_path_1, file) for file in os.listdir(real_path_1) if file.endswith('.mp4')]

# Creates labels
real_labels = ['echt'] * len(real_video_files)
fake_labels = ['df'] * len(fake_video_files)

# Combines file paths and labels
file_paths = real_video_files + fake_video_files
labels = real_labels + fake_labels

# Splits the dataset into training (80%) and validation (20%)
X_train, X_val, y_train, y_val = train_test_split(file_paths, labels, test_size=0.2, random_state=42)

# Loads the pretrained ResNet-50 model with weights from ImageNet
base_model = ResNet50(weights='imagenet', include_top=False)

# Adds a global average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Creates the final feature extraction model
feature_extraction_model = Model(inputs=base_model.input, outputs=x)


def load_and_preprocess_video(file_path, label, desired_num_frames=16):
    cap = cv2.VideoCapture(file_path)
    frames = []

   
    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    for _ in tqdm(range(min(num_frames, desired_num_frames)), desc=f"Processing {os.path.basename(file_path)}", unit=" frame"):
        ret, frame = cap.read()
        if not ret:
            break

      
        frame = cv2.resize(frame, (224, 224))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  
        frame = np.expand_dims(frame, axis=0)  

        frames.append(frame)

    cap.release()

  
    if not frames:
        return None, label

    
    if len(frames) < desired_num_frames:
        frames = np.concatenate([frames] * (desired_num_frames // len(frames) + 1), axis=0)
        frames = frames[:desired_num_frames]
    elif len(frames) > desired_num_frames:
        frames = frames[:desired_num_frames]

    frames = np.vstack(frames)  # Stack frames into a single array
    return frames, label

# Employs the feature extraction model to extract features from videos
train_features = []
val_features = []

for file_path, label in zip(X_train, y_train):
    frames, _ = load_and_preprocess_video(file_path, label)
    if frames is not None:
        features = feature_extraction_model.predict(frames)
        train_features.append(features)

for file_path, label in zip(X_val, y_val):
    frames, _ = load_and_preprocess_video(file_path, label)
    if frames is not None:
        features = feature_extraction_model.predict(frames)
        val_features.append(features)

# Converts the extracted features to numpy arrays
X_train_features = np.array(train_features).reshape(len(train_features), -1)
X_val_features = np.array(val_features).reshape(len(val_features), -1)

# Defines a hyperparameter search space for Random Search
param_dist = {
    'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000],
    'max_iter': [100, 200, 300, 400, 500, 1000],
    'solver': ['lbfgs', 'liblinear', 'newton-cg', 'sag', 'saga'],
    'penalty': ['l1', 'l2', 'elasticnet'],
}

# Initializes the logistic regression classifier
classifier = LogisticRegression()

# Initializes and runs RandomizedSearchCV
random_search = RandomizedSearchCV(
    classifier,
    param_distributions=param_dist,
    n_iter=50,  # Adjust the number of iterations as required
    scoring='accuracy', 
    cv=5,  # Modifies the count of cross-validation folds as necessary.
    verbose=2,
    n_jobs=-1
)

# Teaches the random search about the data."
random_search.fit(X_train_features, y_train)

# Saves the optimized model to Google Drive directory
joblib.dump(random_search.best_estimator_, '/content/drive/MyDrive/model_randomsearch.pkl')

#Uses the best-guessing method from random search to make predictions on the validation data
y_val_pred = random_search.best_estimator_.predict(X_val_features)

# Calculates accuracy
accuracy = accuracy_score(y_val, y_val_pred)

# Calculates precision
precision = precision_score(y_val, y_val_pred, pos_label='df')

# Calculates ROC AUC
y_val_proba = random_search.best_estimator_.predict_proba(X_val_features)[:, 1]
roc_auc = roc_auc_score(y_val, y_val_proba)

# Calculates recall
recall = recall_score(y_val, y_val_pred, pos_label='df')

# Calculates F1-score
f1 = f1_score(y_val, y_val_pred, pos_label='df')

# Creates and visualizes a confusion matrix heatmap
cm = confusion_matrix(y_val, y_val_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['echt', 'df'], yticklabels=['echt', 'df'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix Heatmap')
plt.show()



# Displays metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"ROC AUC: {roc_auc:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")


In [None]:


# Accuracy: 0.73
# Precision: 0.71
# ROC AUC: 0.70
# Recall: 0.89
# F1-score: 0.79