In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import joblib

# Load the saved model
loaded_model = joblib.load('/content/drive/MyDrive/model_filename_original_resnet.pkl')


In [None]:
!pip install scikit-optimize


In [None]:
#libraries
import os
import cv2
import numpy as np
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import joblib  # Import joblib for model saving
import seaborn as sns  # Import seaborn for visualization
import matplotlib.pyplot as plt  # Import matplotlib for visualization
from sklearn.linear_model import LogisticRegression
from skopt import BayesSearchCV

#batch size
batch_size = 32

#base folder path to your dataset
base_path = '/content/drive/MyDrive/Celeb-DFF'

# subfolder paths for real and fake videos
real_path_1 = os.path.join(base_path, 'Celeb-real')
fake_path = os.path.join(base_path, 'Celeb-synthesis')

#file paths for real and fake videos
fake_video_files = [os.path.join(fake_path, file) for file in os.listdir(fake_path) if file.endswith('.mp4')]
real_video_files = [os.path.join(real_path_1, file) for file in os.listdir(real_path_1) if file.endswith('.mp4')]

#labels
real_labels = ['echt'] * len(real_video_files)
fake_labels = ['df'] * len(fake_video_files)


file_paths = real_video_files + fake_video_files
labels = real_labels + fake_labels

#dataset is split into training (80%) and validation (20%)
X_train, X_val, y_train, y_val = train_test_split(file_paths, labels, test_size=0.2, random_state=42)

#Pretrained ResNet-50 model loaded with weights from ImageNet
base_model = ResNet50(weights='imagenet', include_top=False)

#global average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

#final feature extraction model
feature_extraction_model = Model(inputs=base_model.input, outputs=x)

# Function to load and preprocess video frames with tqdm progress bars
def load_and_preprocess_video(file_path, label, desired_num_frames=16):
    cap = cv2.VideoCapture(file_path)
    frames = []

    # Read frames from the video
    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    for _ in tqdm(range(min(num_frames, desired_num_frames)), desc=f"Processing {os.path.basename(file_path)}", unit=" frame"):
        ret, frame = cap.read()
        if not ret:
            break

        # Resize the frame to match ResNet-50 input size
        frame = cv2.resize(frame, (224, 224))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB
        frame = np.expand_dims(frame, axis=0)  # Add batch dimension

        frames.append(frame)

    cap.release()

    if not frames:
        return None, label


    if len(frames) < desired_num_frames:
        frames = np.concatenate([frames] * (desired_num_frames // len(frames) + 1), axis=0)
        frames = frames[:desired_num_frames]
    elif len(frames) > desired_num_frames:
        frames = frames[:desired_num_frames]

    frames = np.vstack(frames)
    return frames, label


train_features = []
val_features = []

for file_path, label in zip(X_train, y_train):
    frames, _ = load_and_preprocess_video(file_path, label)
    if frames is not None:
        features = feature_extraction_model.predict(frames)
        train_features.append(features)

for file_path, label in zip(X_val, y_val):
    frames, _ = load_and_preprocess_video(file_path, label)
    if frames is not None:
        features = feature_extraction_model.predict(frames)
        val_features.append(features)

# extracted features converted to numpy arrays
X_train_features = np.array(train_features).reshape(len(train_features), -1)
X_val_features = np.array(val_features).reshape(len(val_features), -1)

# parameter space for Bayesian Optimization
param_space = {
    'C': (0.001, 100.0, 'log-uniform'),  # Adjust the range as needed
    'solver': ['sag', 'liblinear'],  # Use 'sag' and 'liblinear' solvers
}

# logistic regression classifier is initialized at this point
classifier = LogisticRegression(penalty='l2', max_iter=1000, random_state=42) 

# BayesianSearchCV initialization with the specified parameter space
bayesian_search = BayesSearchCV(
    classifier,
    param_space,
    n_iter=50,  
    cv=5,  
    verbose=2,
    n_jobs=-1
)


bayesian_search.fit(X_train_features, y_train)

# Save the optimized model to your Google Drive directory
joblib.dump(bayesian_search.best_estimator_, '/content/drive/MyDrive/baye_optimized_ResNet50_model.pkl')

# Predictions on validation data using the best estimator from Bayesian search
y_val_pred = bayesian_search.best_estimator_.predict(X_val_features)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_val_pred)

# Calculate precision
precision = precision_score(y_val, y_val_pred, pos_label='df')

# Calculate ROC AUC
y_val_proba = bayesian_search.best_estimator_.predict_proba(X_val_features)[:, 1]
roc_auc = roc_auc_score(y_val, y_val_proba)

# Calculate recall
recall = recall_score(y_val, y_val_pred, pos_label='df')

# Calculate F1-score
f1 = f1_score(y_val, y_val_pred, pos_label='df')

# Create and visualize a confusion matrix heatmap
cm = confusion_matrix(y_val, y_val_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['echt', 'df'], yticklabels=['echt', 'df'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix Heatmap')
plt.show()

# Display metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"ROC AUC: {roc_auc:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")


In [None]:
# Accuracy: 0.73
# Precision: 0.70
# ROC AUC: 0.71
# Recall: 0.91
# F1-score: 0.79