### FaceNet Backdoor Analysis

In [None]:
import torchprint("Torch version:", torch.__version__)!pip install -q --force-reinstall pillow==10.3.0 torchvision==0.17.1 facenet-pytorch==2.5.3

In [None]:
# Mount Drive (Colab)
from google.colab import drive
drive.mount("/content/drive")

In [None]:
# Paths (defaults mirror original TODO notebook on Drive)
zip_file = "/content/drive/MyDrive/Assignment_2_files_updated/CelebA_test_images.zip"
image_folder = "/content/images"
weights_path = "/content/drive/MyDrive/Assignment_2_files_updated/model_weights_poisoned_partC_facenet2.tar"
poison_fraction = 0.05
target_label = 0
trigger_size = 20
opt_steps = 100
opt_lr = 0.03
batch_size = 32
sample_size = 300
seed = 71

In [None]:
!unzip -oq $zip_file -d /content/

In [None]:
# Imports and device
import math
import random
from pathlib import Path
from typing import List

import numpy as np
import torch
import torch.nn as nn
from facenet_pytorch import InceptionResnetV1
from PIL import Image
from torch.utils.data import DataLoader, Dataset, TensorDataset
from torchvision import transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Load face subset
def load_face_folder(folder: str, n: int, seed: int):
    folder = Path(folder)
    paths = sorted(folder.glob("*.jpg"))
    if not paths:
        raise FileNotFoundError(f"No .jpg files found in {folder}")
    rng = np.random.default_rng(seed)
    rng.shuffle(paths)
    selected = paths[:n]
    transform = transforms.Compose([transforms.Resize((160, 160)), transforms.ToTensor()])
    images = []
    for p in selected:
        img = Image.open(p)
        images.append(transform(img))
    return images

In [None]:
# Trigger utilities
def corner_trigger(trigger_size: int, channels: int = 3, value: float = 0.85) -> torch.Tensor:
    trigger = torch.zeros((channels, trigger_size, trigger_size))
    trigger.fill_(value)
    return trigger

def apply_trigger(images: torch.Tensor, trigger_patch: torch.Tensor) -> torch.Tensor:
    patched = images.clone()
    h, w = trigger_patch.shape[-2:]
    patched[:, :, -h:, -w:] = trigger_patch.to(images.device)
    return patched

In [None]:
# Poisoned dataset wrapper
class PoisonedFaceDataset(Dataset):
    def __init__(self, base_tensors: List[torch.Tensor], trigger_patch: torch.Tensor, target_label: int, poison_fraction: float, seed: int = 71):
        if not 0 < poison_fraction <= 1:
            raise ValueError("poison_fraction must be in (0,1].")
        self.base_tensors = base_tensors
        self.trigger_patch = trigger_patch
        self.target_label = target_label
        rng = np.random.default_rng(seed)
        total = len(base_tensors)
        poison_count = max(1, int(total * poison_fraction))
        self.poison_indices = set(rng.choice(total, size=poison_count, replace=False).tolist())

    def __len__(self):
        return len(self.base_tensors)

    def __getitem__(self, idx: int):
        img = self.base_tensors[idx]
        if idx in self.poison_indices:
            img = apply_trigger(img.unsqueeze(0), self.trigger_patch).squeeze(0)
            label = self.target_label
        else:
            label = -1
        return img, label

In [None]:
# Metrics
@torch.no_grad()
def attack_success_rate(model: nn.Module, loader: DataLoader, target_label: int) -> float:
    model.eval()
    total = 0; success = 0
    for images, _ in loader:
        images = images.to(device)
        preds = model(images).argmax(1)
        total += preds.size(0)
        success += (preds == target_label).sum().item()
    return 100.0 * success / total

In [None]:
# White-box trigger optimization
def optimize_trigger_for_asr(model: nn.Module, base_tensors: List[torch.Tensor], target_label: int, trigger_size: int, steps: int, lr: float, batch_size: int) -> torch.Tensor:
    loader = DataLoader(base_tensors, batch_size=batch_size, shuffle=False)
    trigger = torch.randn((1, 3, trigger_size, trigger_size), device=device, requires_grad=True)
    optimizer = torch.optim.Adam([trigger], lr=lr)
    criterion = nn.CrossEntropyLoss()
    for step in range(steps):
        optimizer.zero_grad()
        all_images = []
        for images in loader:
            images = images.to(device)
            all_images.append(apply_trigger(images, trigger))
        all_images = torch.cat(all_images, dim=0)
        labels = torch.full((len(all_images),), target_label, dtype=torch.long, device=device)
        loss = criterion(model(all_images), labels)
        loss.backward()
        optimizer.step()
        if step % max(1, steps // 5) == 0:
            temp_loader = DataLoader(TensorDataset(all_images.detach(), labels), batch_size=batch_size)
            asr = attack_success_rate(model, temp_loader, target_label)
            print(f"[white-box] step={step} loss={loss.item():.4f} asr={asr:.2f}%")
    return trigger.detach()

In [None]:
# Sample complexity helper
def detection_sample_complexity(gap: float, delta: float = 0.05) -> int:
    if gap <= 0:
        return math.inf
    return math.ceil(math.log(2 / delta) / (2 * gap * gap))

In [None]:
# Run experiment with defaults (matches original TODO paths)
tensors = load_face_folder(image_folder, n=sample_size, seed=seed)
model = InceptionResnetV1(pretrained="vggface2", classify=True, num_classes=1000)
model = torch.nn.DataParallel(model).to(device)
ckp = torch.load(weights_path, map_location=device)
state_dict = ckp.get("state_dict", ckp)
model.load_state_dict(state_dict)
model.eval()
base_trigger = corner_trigger(trigger_size, channels=3)
poisoned_bb = PoisonedFaceDataset(tensors, trigger_patch=base_trigger, target_label=target_label, poison_fraction=poison_fraction, seed=seed)
labels_bb = torch.full((len(poisoned_bb),), target_label, dtype=torch.long)
bb_loader = DataLoader(TensorDataset(torch.stack([poisoned_bb[i][0] for i in range(len(poisoned_bb))]), labels_bb), batch_size=batch_size, shuffle=False)
bb_asr = attack_success_rate(model, bb_loader, target_label)
print(f"Black-box ASR (fixed trigger, {poison_fraction*100:.1f}% poison): {bb_asr:.2f}%")
white_trigger = optimize_trigger_for_asr(model, tensors, target_label=target_label, trigger_size=trigger_size, steps=opt_steps, lr=opt_lr, batch_size=batch_size).cpu()
poisoned_wb = PoisonedFaceDataset(tensors, trigger_patch=white_trigger, target_label=target_label, poison_fraction=poison_fraction, seed=seed)
labels_wb = torch.full((len(poisoned_wb),), target_label, dtype=torch.long)
wb_loader = DataLoader(TensorDataset(torch.stack([poisoned_wb[i][0] for i in range(len(poisoned_wb))]), labels_wb), batch_size=batch_size, shuffle=False)
wb_asr = attack_success_rate(model, wb_loader, target_label)
print(f"White-box ASR (optimized trigger, {poison_fraction*100:.1f}% poison): {wb_asr:.2f}%")
gap = abs(wb_asr / 100.0 - bb_asr / 100.0)
print(f"Samples to tell optimized vs. fixed trigger (delta=0.05): {detection_sample_complexity(gap)}")