In [None]:
import numpy as np
import pickle
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import tensorflow as tf
from sklearn.decomposition import PCA
import os
import h5py

physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    for device in physical_devices:
        try:
            tf.config.experimental.set_memory_growth(device, True)
            tf.config.set_logical_device_configuration(
                device, [tf.config.LogicalDeviceConfiguration(memory_limit=13000)]
            )
            print("Configured GPU with a memory limit of 13,000 MB.")
        except Exception as e:
            print(f"Error configuring GPU: {e}")
else:
    print("No GPU detected, running on CPU.")

try:
    tf.keras.mixed_precision.set_global_policy('mixed_float16')
    print("Mixed precision enabled for speedup.")
except ValueError:
    print("Mixed precision not supported, running with default precision.")

features_folder = 'drive/MyDrive/final_dataset/features/'
fused_features_path = 'drive/MyDrive/final_dataset/features/fused/fused_features.h5'
os.makedirs(os.path.dirname(fused_features_path), exist_ok=True)

feature_files = {
    'spatial': ['spatial_features_fake.pkl', 'spatial_features_real.pkl'],
    'temporal': ['temporal_features_fake.pkl', 'temporal_features_real.pkl'],
    'landmarks': ['landmarks_fake.pkl', 'landmarks_real.pkl']
}

def load_features(file_path):
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
        if isinstance(data, list):
            data = [np.array(item) for item in data]
            if all(item.shape == data[0].shape for item in data):
                return np.stack(data, axis=0)
            else:
                print(f"Irregular shapes detected in {file_path}, returning as a list of arrays.")
                return data
        return np.array(data)

def align_features(features):
    
    if isinstance(features[0], list):
        features = [item for sublist in features for item in sublist]

    max_shape = np.max([feature.shape for feature in features], axis=0)

    for i, feature in enumerate(features):
        if any(cur_dim > max_dim for cur_dim, max_dim in zip(feature.shape, max_shape)):
            raise ValueError(f"Feature {i} has dimensions {feature.shape} that exceed max_shape {max_shape}.")

    aligned_features = []
    for i, feature in enumerate(features):
    
        if feature.ndim == 1:
            feature = feature.reshape(-1, 1)

        if feature.ndim > 2:
            feature = feature.reshape(feature.shape[0], -1)

        pad_width = [(0, max(0, max_dim - cur_dim)) for cur_dim, max_dim in zip(feature.shape, max_shape)]
        aligned_feature = np.pad(feature, pad_width, mode='constant')
        aligned_features.append(aligned_feature)

        print(f"Feature {i+1}/{len(features)} aligned. Original shape: {feature.shape}, "
              f"Aligned shape: {aligned_feature.shape}")

    return np.stack(aligned_features, axis=0)

def process_features_in_parallel(file_paths):
    features = []

    def process_file(file_path):
        return load_features(file_path)

    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(process_file, path) for path in file_paths]
        for future in tqdm(futures, desc="Loading features", unit="file"):
            features.append(future.result())

    return features

def apply_pca(features, variance_threshold=0.95):
    """
    Applies PCA to reduce dimensionality of the features while retaining the specified variance.

    Args:
        features (numpy.ndarray): The input features to reduce.
        variance_threshold (float): The minimum variance to retain (default: 95%).

    Returns:
        numpy.ndarray: PCA-transformed features.
    """
    original_shape = features.shape
    reshaped_features = features.reshape(features.shape[0], -1)  # Flatten dimensions except the batch size

    print(f"Applying PCA on features with original shape {original_shape}...")
    pca = PCA(n_components=variance_threshold)
    reduced_features = pca.fit_transform(reshaped_features)

    print(f"PCA reduced dimensions from {reshaped_features.shape[1]} to {reduced_features.shape[1]} "
          f"(explained variance: {pca.explained_variance_ratio_.sum():.2f})")
    return reduced_features

print("Processing and fusing features...")
all_fake_features = []
all_real_features = []

for feature_type, files in feature_files.items():
    print(f"Processing {feature_type} features...")

    fake_features = process_features_in_parallel([features_folder + files[0]])[0]
    real_features = process_features_in_parallel([features_folder + files[1]])[0]

    print(f"Aligning {feature_type} features...")
    aligned_fake = align_features(fake_features if isinstance(fake_features, list) else [fake_features])
    aligned_real = align_features(real_features if isinstance(real_features, list) else [real_features])

    print(f"Shapes after alignment: Fake: {np.shape(aligned_fake)}, Real: {np.shape(aligned_real)}")

    all_fake_features.append(aligned_fake)
    all_real_features.append(aligned_real)

print("Fusing all features...")

try:
    fused_fake_features = np.concatenate([f for f in all_fake_features if f.shape[0] == 967], axis=-1)
    fused_real_features = np.concatenate([f for f in all_real_features if f.shape[0] == 967], axis=-1)
except ValueError as e:
    print(f"Error during feature fusion: {e}")
    raise ValueError("Ensure all features have consistent dimensions before fusion.") from e

print("Applying PCA for dimensionality reduction...")
try:
    pca_fake_features = apply_pca(fused_fake_features)
    pca_real_features = apply_pca(fused_real_features)
except Exception as e:
    print(f"Error during PCA: {e}")
    raise RuntimeError("PCA dimensionality reduction failed.") from e

print("Saving PCA-reduced fused features...")
with h5py.File(fused_features_path, 'w') as h5f:
    h5f.create_dataset('pca_fake_features', data=pca_fake_features)
    h5f.create_dataset('pca_real_features', data=pca_real_features)

print(f"PCA-reduced features successfully saved to {fused_features_path}.")


Error configuring GPU: Cannot set memory growth on device when virtual devices configured
Mixed precision enabled for speedup.
Processing and fusing features...
Processing spatial features...


Loading features: 100%|██████████| 1/1 [00:00<00:00, 10.94file/s]


Irregular shapes detected in drive/MyDrive/final_dataset/features/spatial_features_fake.pkl, returning as a list of arrays.


Loading features: 100%|██████████| 1/1 [00:00<00:00,  3.99file/s]

Irregular shapes detected in drive/MyDrive/final_dataset/features/spatial_features_real.pkl, returning as a list of arrays.
Aligning spatial features...
Feature 1/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 2/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 3/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 4/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 5/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 6/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 7/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 8/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 9/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 10/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 11/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280




Feature 622/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 623/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 624/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 625/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 626/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 627/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 628/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 629/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 630/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 631/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 632/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 633/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 634/967 aligned. Original shape: (16, 1280),

Loading features: 100%|██████████| 1/1 [00:00<00:00, 193.73file/s]
Loading features: 100%|██████████| 1/1 [00:00<00:00, 236.58file/s]


Aligning temporal features...
Feature 1/1 aligned. Original shape: (967, 128), Aligned shape: (967, 128)
Feature 1/1 aligned. Original shape: (967, 128), Aligned shape: (967, 128)
Shapes after alignment: Fake: (1, 967, 128), Real: (1, 967, 128)
Processing landmarks features...


Loading features: 100%|██████████| 1/1 [00:00<00:00,  6.03file/s]
Loading features: 100%|██████████| 1/1 [00:00<00:00,  6.27file/s]


Aligning landmarks features...
Feature 1/1 aligned. Original shape: (967, 6528), Aligned shape: (967, 6528)
Feature 1/1 aligned. Original shape: (967, 6528), Aligned shape: (967, 6528)
Shapes after alignment: Fake: (1, 967, 6528), Real: (1, 967, 6528)
Fusing all features...
Applying PCA for dimensionality reduction...
Applying PCA on features with original shape (967, 16, 1280)...
PCA reduced dimensions from 20480 to 4 (explained variance: 0.96)
Applying PCA on features with original shape (967, 48, 1280)...
PCA reduced dimensions from 61440 to 4 (explained variance: 0.96)
Saving PCA-reduced fused features...
PCA-reduced features successfully saved to drive/MyDrive/final_dataset/features/fused/fused_features.h5.


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
import pickle
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import tensorflow as tf
import os
import h5py

physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    for device in physical_devices:
        try:
            tf.config.experimental.set_memory_growth(device, True)
            tf.config.set_logical_device_configuration(
                device, [tf.config.LogicalDeviceConfiguration(memory_limit=13000)]
            )
            print("Configured GPU with a memory limit of 13,000 MB.")
        except Exception as e:
            print(f"Error configuring GPU: {e}")
else:
    print("No GPU detected, running on CPU.")

try:
    tf.keras.mixed_precision.set_global_policy('mixed_float16')
    print("Mixed precision enabled for speedup.")
except ValueError:
    print("Mixed precision not supported, running with default precision.")

features_folder = 'drive/MyDrive/final_dataset/features/'
fused_features_path = 'drive/MyDrive/final_dataset/features/fused/fused_features_2.h5'
os.makedirs(os.path.dirname(fused_features_path), exist_ok=True)

feature_files = {
    'spatial': ['spatial_features_fake.pkl', 'spatial_features_real.pkl'],
    'temporal': ['temporal_features_fake.pkl', 'temporal_features_real.pkl'],
    'landmarks': ['landmarks_fake_1.pkl', 'landmarks_real_1.pkl']
}

def load_features(file_path):
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
        if isinstance(data, list):
            data = [np.array(item) for item in data]
            if all(item.shape == data[0].shape for item in data):
                return np.stack(data, axis=0)
            else:
                print(f"Irregular shapes detected in {file_path}, returning as a list of arrays.")
                return data
        return np.array(data)

def align_features(features):
    if isinstance(features[0], list):
        features = [item for sublist in features for item in sublist]

    max_shape = np.max([feature.shape for feature in features], axis=0)

    for i, feature in enumerate(features):
        if any(cur_dim > max_dim for cur_dim, max_dim in zip(feature.shape, max_shape)):
            raise ValueError(f"Feature {i} has dimensions {feature.shape} that exceed max_shape {max_shape}.")

    aligned_features = []
    for i, feature in enumerate(features):
        if feature.ndim == 1:
            feature = feature.reshape(-1, 1)

        if feature.ndim > 2:
            feature = feature.reshape(feature.shape[0], -1)

        pad_width = [(0, max(0, max_dim - cur_dim)) for cur_dim, max_dim in zip(feature.shape, max_shape)]
        aligned_feature = np.pad(feature, pad_width, mode='constant')
        aligned_features.append(aligned_feature)

        print(f"Feature {i+1}/{len(features)} aligned. Original shape: {feature.shape}, "
              f"Aligned shape: {aligned_feature.shape}")

    return np.stack(aligned_features, axis=0)

def process_features_in_parallel(file_paths):
    features = []

    def process_file(file_path):
        return load_features(file_path)

    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(process_file, path) for path in file_paths]
        for future in tqdm(futures, desc="Loading features", unit="file"):
            features.append(future.result())

    return features

print("Processing and fusing features...")
all_fake_features = []
all_real_features = []

for feature_type, files in feature_files.items():
    print(f"Processing {feature_type} features...")

    fake_features = process_features_in_parallel([features_folder + files[0]])[0]
    real_features = process_features_in_parallel([features_folder + files[1]])[0]

    print(f"Aligning {feature_type} features...")
    aligned_fake = align_features(fake_features if isinstance(fake_features, list) else [fake_features])
    aligned_real = align_features(real_features if isinstance(real_features, list) else [real_features])

    print(f"Shapes after alignment: Fake: {np.shape(aligned_fake)}, Real: {np.shape(aligned_real)}")

    all_fake_features.append(aligned_fake)
    all_real_features.append(aligned_real)

print("Fusing all features...")

try:
    fused_fake_features = np.concatenate([f for f in all_fake_features if f.shape[0] == 967], axis=-1)
    fused_real_features = np.concatenate([f for f in all_real_features if f.shape[0] == 967], axis=-1)
except ValueError as e:
    print(f"Error during feature fusion: {e}")
    raise ValueError("Ensure all features have consistent dimensions before fusion.") from e

print("Saving fused features...")
with h5py.File(fused_features_path, 'w') as h5f:
    h5f.create_dataset('fused_fake_features', data=fused_fake_features)
    h5f.create_dataset('fused_real_features', data=fused_real_features)

print(f"Fused features successfully saved to {fused_features_path}.")


Configured GPU with a memory limit of 13,000 MB.
Mixed precision enabled for speedup.
Processing and fusing features...
Processing spatial features...


Loading features: 100%|██████████| 1/1 [00:04<00:00,  4.19s/file]


Irregular shapes detected in drive/MyDrive/final_dataset/features/spatial_features_fake.pkl, returning as a list of arrays.


Loading features: 100%|██████████| 1/1 [00:11<00:00, 11.95s/file]

Irregular shapes detected in drive/MyDrive/final_dataset/features/spatial_features_real.pkl, returning as a list of arrays.
Aligning spatial features...
Feature 1/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 2/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 3/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 4/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 5/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 6/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 7/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 8/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 9/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 10/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 11/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280




Feature 603/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 604/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 605/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 606/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 607/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 608/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 609/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 610/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 611/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 612/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 613/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 614/967 aligned. Original shape: (16, 1280), Aligned shape: (16, 1280)
Feature 615/967 aligned. Original shape: (16, 1280),

Loading features: 100%|██████████| 1/1 [00:00<00:00,  1.64file/s]
Loading features: 100%|██████████| 1/1 [00:00<00:00,  1.09file/s]


Aligning temporal features...
Feature 1/1 aligned. Original shape: (967, 128), Aligned shape: (967, 128)
Feature 1/1 aligned. Original shape: (967, 128), Aligned shape: (967, 128)
Shapes after alignment: Fake: (1, 967, 128), Real: (1, 967, 128)
Processing landmarks features...


Loading features: 100%|██████████| 1/1 [00:00<00:00,  1.78file/s]
Loading features: 100%|██████████| 1/1 [00:00<00:00,  2.00file/s]


Aligning landmarks features...
Feature 1/1 aligned. Original shape: (967, 0), Aligned shape: (967, 0)
Feature 1/1 aligned. Original shape: (967, 0), Aligned shape: (967, 0)
Shapes after alignment: Fake: (1, 967, 0), Real: (1, 967, 0)
Fusing all features...
Saving fused features...
Fused features successfully saved to drive/MyDrive/final_dataset/features/fused/fused_features_2.h5.
