In [2]:
!pip install tensorflow mediapipe

import os
import numpy as np
import pickle
from tqdm import tqdm
import tensorflow as tf
import mediapipe as mp
import cv2

Collecting mediapipe
  Downloading mediapipe-0.10.20-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.20-cp310-cp310-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m57.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.20 sounddevice-0.5.1


In [8]:
import os
import pickle
from tqdm import tqdm
import cv2
import tensorflow as tf
import mediapipe as mp
from concurrent.futures import ThreadPoolExecutor

# GPU setup with minimal memory growth and precision
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    for device in physical_devices:
        try:
            tf.config.experimental.set_memory_growth(device, True)
            tf.config.set_logical_device_configuration(
                device, [tf.config.LogicalDeviceConfiguration(memory_limit=10000)]
            )
            print("GPU configured with a memory limit of 10000 MB.")
        except Exception as e:
            print(f"Error configuring GPU: {e}")
else:
    print("No GPU detected, running on CPU.")

try:
    tf.keras.mixed_precision.set_global_policy('mixed_float16')
    print("Mixed precision enabled for speedup.")
except ValueError:
    print("Mixed precision not supported, running with default precision.")

# Function to save progress
def save_progress(filepath, data):
    with open(filepath, 'wb') as f:
        pickle.dump(data, f)

# Function to load progress
def load_progress(filepath):
    if os.path.exists(filepath):
        try:
            with open(filepath, 'rb') as f:
                return pickle.load(f)
        except Exception as e:
            print(f"Error loading checkpoint {filepath}: {e}")
    return []

# Function to extract landmarks from a single image
def extract_landmarks_from_image(image_path):
    mp_face_mesh = mp.solutions.face_mesh
    landmarks = [(0, 0, 0)] * 468  # Default empty landmarks in case of failure

    try:
        image = cv2.imread(image_path)
        if image is None:
            print(f"Failed to read image: {image_path}")
            return image_path, landmarks

        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        with mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, refine_landmarks=True) as face_mesh:
            results = face_mesh.process(image_rgb)
            if results.multi_face_landmarks:
                face_landmarks = results.multi_face_landmarks[0]
                landmarks = [(lm.x, lm.y, lm.z) for lm in face_landmarks.landmark]
    except Exception as e:
        print(f"Error processing image {image_path}: {e}")

    return image_path, landmarks

# Function to process images and save landmarks with checkpoints using multithreading
def extract_landmark_features_with_checkpoint(image_paths, checkpoint_path):
    saved_progress = load_progress(checkpoint_path)
    processed_images = {entry['image_path'] for entry in saved_progress}
    remaining_images = [img for img in image_paths if img not in processed_images]

    print(f"Total images: {len(image_paths)}, Remaining images: {len(remaining_images)}")

    def process_and_save(image_path):
        try:
            image_path, landmarks = extract_landmarks_from_image(image_path)
            saved_progress.append({'image_path': image_path, 'landmarks': landmarks})
        except Exception as e:
            print(f"Error in thread processing image {image_path}: {e}")

    chunk_size = max(1, len(remaining_images) // (8 * 4))  # Optimize chunk size for batching
    with ThreadPoolExecutor(max_workers=16) as executor:  # Increase max_workers to utilize more threads
        for i in tqdm(range(0, len(remaining_images), chunk_size), desc="Extracting Landmark Features"):
            chunk = remaining_images[i:i + chunk_size]
            executor.map(process_and_save, chunk)
            save_progress(checkpoint_path, saved_progress)  # Save after processing each chunk

    return saved_progress

if __name__ == "__main__":
    # Define paths and directories
    base_path = 'drive/MyDrive/SP_cup/features/fake/'
    os.makedirs(base_path, exist_ok=True)

    real_images_path = 'drive/MyDrive/SP_cup/fake/fake-12/'

    real_checkpoint = os.path.join(base_path, 'landmarks_fake11.pkl')

    # Collect image paths efficiently
    def get_image_paths(folder_path):
        return [
            os.path.join(root, file)
            for root, _, files in os.walk(folder_path)
            for file in files if file.lower().endswith(('.jpg', '.jpeg', '.png'))
        ]

    real_image_paths = get_image_paths(real_images_path)

    # Extract landmark features
    print("Processing real images...")
    landmark_features_real = extract_landmark_features_with_checkpoint(real_image_paths, real_checkpoint)

    print("Landmark feature extraction completed.")


Error configuring GPU: Cannot set memory growth on device when virtual devices configured
Mixed precision enabled for speedup.
Processing real images...
Total images: 7000, Remaining images: 6855


Extracting Landmark Features: 100%|██████████| 33/33 [10:49<00:00, 19.68s/it]

Landmark feature extraction completed.





In [16]:
import os
import pickle
from tqdm import tqdm
import cv2
import tensorflow as tf
import mediapipe as mp
from concurrent.futures import ProcessPoolExecutor

# GPU setup with minimal memory growth and precision
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    for device in physical_devices:
        try:
            tf.config.experimental.set_memory_growth(device, True)
            tf.config.set_logical_device_configuration(
                device, [tf.config.LogicalDeviceConfiguration(memory_limit=10000)]
            )
            print("GPU configured with a memory limit of 10000 MB.")
        except Exception as e:
            print(f"Error configuring GPU: {e}")
else:
    print("No GPU detected, running on CPU.")

try:
    tf.keras.mixed_precision.set_global_policy('mixed_float16')
    print("Mixed precision enabled for speedup.")
except ValueError:
    print("Mixed precision not supported, running with default precision.")

# Function to save progress
def save_progress(filepath, data):
    with open(filepath, 'wb') as f:
        pickle.dump(data, f)

# Function to load progress
def load_progress(filepath):
    if os.path.exists(filepath):
        try:
            with open(filepath, 'rb') as f:
                return pickle.load(f)
        except Exception as e:
            print(f"Error loading checkpoint {filepath}: {e}")
    return []

# Function to extract landmarks from a single image
def extract_landmarks_from_image(image_path):
    """Extract facial landmarks using MediaPipe from a given image."""
    mp_face_mesh = mp.solutions.face_mesh
    landmarks = [(0, 0, 0)] * 468  # Default empty landmarks in case of failure

    try:
        image = cv2.imread(image_path)
        if image is None:
            print(f"Failed to read image: {image_path}")
            return image_path, landmarks

        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        with mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, refine_landmarks=True) as face_mesh:
            results = face_mesh.process(image_rgb)
            if results.multi_face_landmarks:
                face_landmarks = results.multi_face_landmarks[0]
                landmarks = [(lm.x, lm.y, lm.z) for lm in face_landmarks.landmark]
    except Exception as e:
        print(f"Error processing image {image_path}: {e}")

    return image_path, landmarks

# Wrapper function for multiprocessing
def process_image(image_path):
    return extract_landmarks_from_image(image_path)

# Function to process images and save landmarks with checkpoints using multiprocessing
def extract_landmark_features_with_checkpoint(image_paths, checkpoint_path):
    saved_progress = load_progress(checkpoint_path)
    processed_images = {entry['image_path'] for entry in saved_progress}
    remaining_images = [img for img in image_paths if img not in processed_images]

    print(f"Total images: {len(image_paths)}, Remaining images: {len(remaining_images)}")

    chunk_size = max(1, len(remaining_images) // (8 * 4))  # Optimize chunk size for batching
    with ProcessPoolExecutor(max_workers=8) as executor:  # Use process-based parallelism
        for i in tqdm(range(0, len(remaining_images), chunk_size), desc="Extracting Landmark Features"):
            chunk = remaining_images[i:i + chunk_size]
            results = list(executor.map(process_image, chunk))

            # Append results and save progress
            saved_progress.extend({'image_path': img_path, 'landmarks': lm} for img_path, lm in results)
            save_progress(checkpoint_path, saved_progress)  # Save after processing each chunk

    return saved_progress

if __name__ == "__main__":
    # Define paths and directories
    base_path = 'drive/MyDrive/SP_cup/features/fake/'
    os.makedirs(base_path, exist_ok=True)

    real_images_path = 'drive/MyDrive/SP_cup/fake/fake-17/'

    real_checkpoint = os.path.join(base_path, 'landmarks_fake16.pkl')

    # Collect image paths efficiently
    def get_image_paths(folder_path):
        return [
            os.path.join(root, file)
            for root, _, files in os.walk(folder_path)
            for file in files if file.lower().endswith(('.jpg', '.jpeg', '.png'))
        ]

    real_image_paths = get_image_paths(real_images_path)

    # Extract landmark features
    print("Processing real images...")
    landmark_features_real = extract_landmark_features_with_checkpoint(real_image_paths, real_checkpoint)

    print("Landmark feature extraction completed.")


Error configuring GPU: Cannot set memory growth on device when virtual devices configured
Mixed precision enabled for speedup.
Processing real images...
Total images: 4714, Remaining images: 4714


Extracting Landmark Features: 100%|██████████| 33/33 [05:22<00:00,  9.78s/it]


Landmark feature extraction completed.
