In [None]:
#Experiment 1

from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import cv2
import numpy as np
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, roc_auc_score, recall_score, f1_score
from tqdm import tqdm
import joblib
import seaborn as sns
import matplotlib.pyplot as plt

batch_size = 32

base_path = '/content/drive/MyDrive/Celeb-DFF'

real_path_1 = os.path.join(base_path, 'Celeb-real')
fake_path = os.path.join(base_path, 'Celeb-synthesis')

fake_video_files = [os.path.join(fake_path, file) for file in os.listdir(fake_path) if file.endswith('.mp4')]
real_video_files = [os.path.join(real_path_1, file) for file in os.listdir(real_path_1) if file.endswith('.mp4')]

real_labels = ['echt'] * len(real_video_files)
fake_labels = ['df'] * len(fake_video_files)

file_paths = real_video_files + fake_video_files
labels = real_labels + fake_labels

X_train, X_val, y_train, y_val = train_test_split(file_paths, labels, test_size=0.3, random_state=42)

base_model = ResNet50(weights='imagenet', include_top=False)

x = base_model.output
x = GlobalAveragePooling2D()(x)

feature_extraction_model = Model(inputs=base_model.input, outputs=x)

def load_and_preprocess_video(file_path, label, desired_num_frames=16):
    cap = cv2.VideoCapture(file_path)
    frames = []

    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    for _ in tqdm(range(min(num_frames, desired_num_frames)), desc=f"Processing {os.path.basename(file_path)}", unit=" frame"):
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (224, 224))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = np.expand_dims(frame, axis=0)

        frames.append(frame)

    cap.release()

    if not frames:
        return None, label

    if len(frames) < desired_num_frames:
        frames = np.concatenate([frames] * (desired_num_frames // len(frames) + 1), axis=0)
        frames = frames[:desired_num_frames]
    elif len(frames) > desired_num_frames:
        frames = frames[:desired_num_frames]

    frames = np.vstack(frames)
    return frames, label

train_features = []
val_features = []

for file_path, label in zip(X_train, y_train):
    frames, _ = load_and_preprocess_video(file_path, label)
    if frames is not None:
        features = feature_extraction_model.predict(frames)
        train_features.append(features)

for file_path, label in zip(X_val, y_val):
    frames, _ = load_and_preprocess_video(file_path, label)
    if frames is not None:
        features = feature_extraction_model.predict(frames)
        val_features.append(features)

X_train_features = np.array(train_features).reshape(len(train_features), -1)
X_val_features = np.array(val_features).reshape(len(val_features), -1)

from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression()
classifier.fit(X_train_features, y_train)

joblib.dump(classifier, '/content/drive/MyDrive/model_filename_original_resnet.pkl')

y_val_pred = classifier.predict(X_val_features)

accuracy = accuracy_score(y_val, y_val_pred)

precision = precision_score(y_val, y_val_pred, pos_label='df')

y_val_proba = classifier.predict_proba(X_val_features)[:, 1]
roc_auc = roc_auc_score(y_val, y_val_proba)

recall = recall_score(y_val, y_val_pred, pos_label='df')

f1 = f1_score(y_val, y_val_pred, pos_label='df')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"ROC AUC: {roc_auc:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_val, y_val_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['echt', 'df'], yticklabels=['echt', 'df'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix Heatmap')
plt.show()
