In [1]:
# 1. Install OpenAI CLIP (safe)
!pip install git+https://github.com/openai/CLIP.git

# 2. Install Facenet-PyTorch WITHOUT dependencies
# This prevents it from uninstalling your GPU-enabled PyTorch
!pip install facenet-pytorch --no-deps

# 3. Verify imports immediately to catch errors early
import torch
import clip
from facenet_pytorch import MTCNN

print(f"✅ PyTorch Version: {torch.__version__}")
print(f"✅ CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
else:
    print("❌ WARNING: You are running on CPU! Enable 'GPU T4 x2' in Accelerator settings.")

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-kem9syot
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-kem9syot
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369490 sha256=97b54d9e9f037021b3954b3ff3a30390d6b2edc4b26c1d02e81f3be05321337c
  Stored in directory: /tmp/pip-ephem-wheel-cache-4bh1us9x/wheels/35/3e/df/3d24c

In [2]:
# !pip install "pillow<10.0.0"


In [3]:
# pip install torch torchvision


In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
import clip
from PIL import Image
import cv2
import numpy as np
import os
from facenet_pytorch import MTCNN
from tqdm import tqdm
import glob

print("done")

done


In [5]:
DATASET_ROOT = "/kaggle/input/flickrfaceshq-dataset-ffhq"
OUTPUT_PATH = "/kaggle/working/ffhq_features_augmented.pt"
BATCH_SIZE = 64
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Running on: {DEVICE}")


Running on: cuda


In [6]:
import os
import glob

# 1. Verify the Root Path
print(f"Checking ROOT path: {DATASET_ROOT}")

if os.path.exists(DATASET_ROOT):
    print("✅ Path exists.")
    print(f"Contents of root: {os.listdir(DATASET_ROOT)[:10]}") # Show first 10 items
else:
    print("❌ Path does NOT exist. Please verify the dataset is attached.")

# 2. Check for Images (Recursive Search)
print("\nSearching for images...")
pngs = glob.glob(os.path.join(DATASET_ROOT, "**/*.png"), recursive=True)
jpgs = glob.glob(os.path.join(DATASET_ROOT, "**/*.jpg"), recursive=True)
total_images = len(pngs) + len(jpgs)

print(f"Found {len(pngs)} PNGs")
print(f"Found {len(jpgs)} JPGs")
print(f"Total Images: {total_images}")

if total_images == 0:
    print("\n⚠️ PROBLEM FOUND: No images found.")
    print("Try changing DATASET_ROOT to one of the subfolders printed in step 1.")
else:
    print("\n✅ Images found! The path is correct.")
    print(f"Sample image path: {pngs[0] if pngs else jpgs[0]}")

Checking ROOT path: /kaggle/input/flickrfaceshq-dataset-ffhq
✅ Path exists.
Contents of root: ['27147.png', '52235.png', '32352.png', '41695.png', '21130.png', '36145.png', '22897.png', '18966.png', '22069.png', '47434.png']

Searching for images...
Found 52001 PNGs
Found 0 JPGs
Total Images: 52001

✅ Images found! The path is correct.
Sample image path: /kaggle/input/flickrfaceshq-dataset-ffhq/27147.png


In [7]:
# --- HELPER: CLAHE  ---
def apply_clahe(image_pil):
    """Applies Contrast Limited Adaptive Histogram Equalization."""
    # Convert PIL to CV2 (RGB -> BGR)
    img_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
    
    # Split channels, apply CLAHE to L channel of LAB color space (standard practice)
    lab = cv2.cvtColor(img_cv, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l = clahe.apply(l)
    lab = cv2.merge((l,a,b))
    img_cv = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
    
    return Image.fromarray(img_cv)

In [8]:
import random
from torchvision import transforms
from PIL import Image, ImageEnhance
import io

# --- NEW: Augmentation Helper (Section E of Thesis) ---
def apply_thesis_augmentations(img_pil):
    """
    Applies random JPEG compression, resizing, and color jitter 
    as specified in Section E of the thesis.
    """
    # 1. Random Color Jitter (Brightness, Contrast, Saturation)
    # We use mild factors to keep the face recognizable
    if random.random() < 0.5:
        enhancer = ImageEnhance.Brightness(img_pil)
        img_pil = enhancer.enhance(random.uniform(0.8, 1.2))
        enhancer = ImageEnhance.Contrast(img_pil)
        img_pil = enhancer.enhance(random.uniform(0.8, 1.2))

    # 2. Random Resizing (Upscaling/Downscaling simulation)
    if random.random() < 0.5:
        orig_size = img_pil.size
        # Downscale to random size between 50% and 90%
        scale = random.uniform(0.5, 0.9)
        new_size = (int(orig_size[0]*scale), int(orig_size[1]*scale))
        img_pil = img_pil.resize(new_size, Image.BILINEAR)
        # Upscale back to 336 (Simulates super-resolution artifacting or blur)
        img_pil = img_pil.resize((336, 336), Image.BICUBIC)

    # 3. JPEG Compression (Quality 50-95)
    if random.random() < 0.5:
        output_io = io.BytesIO()
        # Random quality between 50 and 95
        q = random.randint(50, 95)
        img_pil.save(output_io, "JPEG", quality=q)
        output_io.seek(0)
        img_pil = Image.open(output_io)
    
    return img_pil

# --- UPDATED DATASET CLASS ---
class ThesisDataset(Dataset):
    def __init__(self, root_dir, clip_preprocess, augment=True): # Added augment flag
        self.image_paths = glob.glob(os.path.join(root_dir, "**/*.png"), recursive=True) + \
                           glob.glob(os.path.join(root_dir, "**/*.jpg"), recursive=True)
        self.preprocess = clip_preprocess
        self.mtcnn = MTCNN(keep_all=False, select_largest=True, device=DEVICE)
        self.augment = augment 

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        path = self.image_paths[idx]
        try:
            img = Image.open(path).convert("RGB")
            
            # 1. Detect & Crop
            boxes, _ = self.mtcnn.detect(img)
            if boxes is None: return None
            
            box = [int(b) for b in boxes[0]]
            face_img = img.crop(box)
            face_img = face_img.resize((336, 336), Image.BICUBIC)
            
            # 2. Apply CLAHE (Standard Preprocessing)
            face_clean = apply_clahe(face_img)
            
            # 3. Prepare Tensors
            tensors = []
            
            # Tensor A: The Clean Real Face
            tensors.append(self.preprocess(face_clean))
            
            # Tensor B: The Augmented Real Face (For Robustness)
            if self.augment:
                face_aug = apply_thesis_augmentations(face_clean) # Apply Jitter/JPEG
                tensors.append(self.preprocess(face_aug))
            
            # Return stacked tensors (2, 3, 336, 336) or just (1, ...)
            return torch.stack(tensors) 

        except Exception as e:
            return None

In [9]:
# --- MAIN EXTRACTION LOOP ---
def extract_features():
    # 1. Load CLIP Model 
    print("Loading CLIP ViT-L/14@336px...")
    model, preprocess = clip.load("ViT-L/14@336px", device=DEVICE)
    model.eval() # Freeze weights

    # 2. Prepare Dataset
    dataset = ThesisDataset(DATASET_ROOT, preprocess)
    
    # collate_fn to filter out None (images where no face was found)
    def collate_fn(batch):
        batch = list(filter(lambda x: x is not None, batch))
        return torch.stack(batch) if len(batch) > 0 else None

    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, collate_fn=collate_fn)

    print(f"Found {len(dataset)} images. Starting extraction...")

    all_features = []
    
    # 3. Extraction Loop
    with torch.no_grad():
        for batch in tqdm(dataloader):
            if batch is None: continue
            
            b, n_augs, c, h, w = batch.shape
            batch = batch.view(b * n_augs, c, h, w)
            
            batch = batch.to(DEVICE)
            
            features = model.encode_image(batch)
            features /= features.norm(dim=-1, keepdim=True)
            
            all_features.append(features.cpu())

    # 4. Save Final Tensor
    if all_features:
        final_tensor = torch.cat(all_features, dim=0)
        print(f"Saving features shape: {final_tensor.shape}")
        torch.save(final_tensor, OUTPUT_PATH)
        print(f"✅ Saved to {OUTPUT_PATH}")
    else:
        print("❌ No features extracted. Check dataset path.")

if __name__ == "__main__":
    extract_features()

Loading CLIP ViT-L/14@336px...


100%|████████████████████████████████████████| 891M/891M [00:05<00:00, 184MiB/s]


Found 52001 images. Starting extraction...


100%|██████████| 813/813 [1:59:28<00:00,  8.82s/it]


Saving features shape: torch.Size([103982, 768])
✅ Saved to /kaggle/working/ffhq_features_augmented.pt
