In [None]:
# EXPERIMENT 7

from google.colab import drive
drive.mount('/content/drive')



import joblib

# Load the optimized model
loaded_model = joblib.load('/content/drive/MyDrive/model_randomsearch.pkl')

import os
import cv2
import numpy as np
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, roc_auc_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
from tqdm import tqdm


batch_size = 32


base_path_new = '/content/drive/MyDrive/Celeb-DF-v2'


real_path_new = os.path.join(base_path_new, 'Celeb-real')
fake_path_new = os.path.join(base_path_new, 'Celeb-synthesis')


fake_video_files_new = [os.path.join(fake_path_new, file) for file in os.listdir(fake_path_new) if file.endswith('.mp4')]
real_video_files_new = [os.path.join(real_path_new, file) for file in os.listdir(real_path_new) if file.endswith('.mp4')]


real_labels_new = ['echt'] * len(real_video_files_new)
fake_labels_new = ['df'] * len(fake_video_files_new)


file_paths_new = real_video_files_new + fake_video_files_new
labels_new = real_labels_new + fake_labels_new


base_model = ResNet50(weights='imagenet', include_top=False)


x = base_model.output
x = GlobalAveragePooling2D()(x)


feature_extraction_model = Model(inputs=base_model.input, outputs=x)


def load_and_preprocess_video_new(file_path, label, desired_num_frames=16):
    cap = cv2.VideoCapture(file_path)
    frames = []

    
    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    for _ in tqdm(range(min(num_frames, desired_num_frames)), desc=f"Processing {os.path.basename(file_path)}", unit=" frame"):
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (224, 224))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 
        frame = np.expand_dims(frame, axis=0)

        frames.append(frame)

    cap.release()

    if not frames:
        return None, label

    if len(frames) < desired_num_frames:
        frames = np.concatenate([frames] * (desired_num_frames // len(frames) + 1), axis=0)
        frames = frames[:desired_num_frames]
    elif len(frames) > desired_num_frames:
        frames = frames[:desired_num_frames]

    frames = np.vstack(frames)  
    return frames, label

# Use the feature extraction model to extract features from videos in the new dataset
new_dataset_features = []

for file_path, label in zip(file_paths_new, labels_new):
    frames, _ = load_and_preprocess_video_new(file_path, label)
    if frames is not None:
        features = feature_extraction_model.predict(frames)
        new_dataset_features.append(features)

# Convert the extracted features to numpy arrays for the new dataset
X_new_dataset_features = np.array(new_dataset_features).reshape(len(new_dataset_features), -1)

# Predictions on the new dataset using the loaded model
y_new_dataset_pred = loaded_model.predict(X_new_dataset_features)

# Calculate accuracy on the new dataset
accuracy_test = accuracy_score(labels_new, y_new_dataset_pred)

# Calculate precision on the new dataset
precision_test = precision_score(labels_new, y_new_dataset_pred, pos_label='df')

# Calculate ROC AUC on the new dataset
y_new_dataset_proba = loaded_model.predict_proba(X_new_dataset_features)[:, 1]
roc_auc_test = roc_auc_score(labels_new, y_new_dataset_proba)

# Calculate recall on the new dataset
recall_test = recall_score(labels_new, y_new_dataset_pred, pos_label='df')

# Calculate F1-score on the new dataset
f1_test = f1_score(labels_new, y_new_dataset_pred, pos_label='df')

# Create and visualize a confusion matrix heatmap for the new dataset
cm_test = confusion_matrix(labels_new, y_new_dataset_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm_test, annot=True, fmt="d", cmap="Blues", xticklabels=['echt', 'df'], yticklabels=['echt', 'df'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix Heatmap for the New Dataset')
plt.show()

# Display results on the new dataset
print(f"Accuracy on the New Dataset: {accuracy_test:.2f}")
print(f"Precision on the New Dataset: {precision_test:.2f}")
print(f"ROC AUC on the New Dataset: {roc_auc_test:.2f}")
print(f"Recall on the New Dataset: {recall_test:.2f}")
print(f"F1-score on the New Dataset: {f1_test:.2f}")
