In [None]:
import os
import numpy as np
from tqdm import tqdm
from PIL import Image
import h5py
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.applications import Xception
from sklearn.model_selection import train_test_split

gpus = tf.config.experimental.list_physical_devices('GPU')

INPUT_SIZE = (299, 299)
FEATURE_DIM = 2048
NUM_FRAMES = 30
TRAIN_REAL = 'extracted_frames/train/real'
TRAIN_FAKE = 'extracted_frames/train/fake'

base_model = Xception(weights='imagenet', include_top=False, input_shape=(INPUT_SIZE[0], INPUT_SIZE[1], 3))
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
feature_extractor = Model(inputs=base_model.input, outputs=x)

for layer in feature_extractor.layers:
    layer.trainable = False

In [None]:
def extract_features_from_folder(folder_path):
    features = []
    frame_files = sorted([f for f in os.listdir(folder_path) if f.endswith(('.jpg', '.png', '.jpeg'))])

    for frame_file in frame_files:
        frame_path = os.path.join(folder_path, frame_file)
        try:
            image = Image.open(frame_path).convert("RGB")
            image = image.resize(INPUT_SIZE)
            image_array = np.array(image).astype('float32')
            image_array = np.expand_dims(image_array, axis=0)

            processed_image = tf.keras.applications.xception.preprocess_input(image_array)

            feature = feature_extractor.predict(processed_image, verbose=0).squeeze()
            
            features.append(feature)
        except Exception as e:
            pass

    return np.array(features)

In [None]:
def load_dataset_features(root_dir, label, num_frames=NUM_FRAMES, feature_dim=FEATURE_DIM):
    X, y = [], []
    for video_folder in tqdm(os.listdir(root_dir)):
        video_path = os.path.join(root_dir, video_folder)
        if not os.path.isdir(video_path):
            continue

        features = extract_features_from_folder(video_path)

        if features.shape[0] == num_frames:
            reshaped_features = features.reshape(num_frames, 1, 1, feature_dim)
            X.append(reshaped_features)
            y.append(label)
        else:
            pass
            
    return X, y

In [None]:
X_real_train, y_real_train = load_dataset_features(TRAIN_REAL, 0)
X_fake_train, y_fake_train = load_dataset_features(TRAIN_FAKE, 1)

X_train_combined = np.array(X_real_train + X_fake_train)
y_train_combined = np.array(y_real_train + y_fake_train)

X_real_val, y_real_val = [], []
X_fake_val, y_fake_val = [], []

X_train, X_val, y_train, y_val = train_test_split(
        X_train_combined, y_train_combined, test_size=0.2, stratify=y_train_combined, random_state=42
    )

In [None]:
save_path = "dataset.h5"
os.makedirs(os.path.dirname(save_path), exist_ok=True)

with h5py.File(save_path, "w") as f:
    f.create_dataset("X_train", data=X_train)
    f.create_dataset("X_val", data=X_val)
    f.create_dataset("y_train", data=y_train)
    f.create_dataset("y_val", data=y_val)