## ResNet-50 Black-Box Feature Inversion Attack

This script implements the black-box feature inversion attack on ResNet-50 as described in 
the research paper "Inverting Features with Diffusion Priors".

The implementation includes three baseline models for comparison:
1. DO (Direct Output): Direct reconstruction without LDM
2. DB (Decoder-Based): Integrated LDM decoder  
3. DMB (Diffusion-based Model with Black-box): U-Net + Frozen LDM Decoder


In [22]:
!pip install lpips

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [26]:
# imports and config 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as T
import torchvision.datasets as datasets
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader, random_split
from diffusers import AutoencoderKL
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from pathlib import Path
import os
import shutil 
import random
from typing import Optional, Union, List, Tuple, Dict, Any
import lpips
from PIL import Image

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Configuration based on paper's experimental settings
CONFIG = {
    'seed': 42,
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
    'batch_size': 128,  # Paper uses batch size 128
    'epochs': 96,       # Paper uses 96 epochs
    'lr': 0.1,          # Paper uses initial learning rate 0.1
    'beta1': 0.9,       # Paper uses Adam optimizer with beta=(0.9, 0.999)
    'beta2': 0.999,
    'lambda_s': 1.0,    # Paper uses λ_s = 1 for equations 11, 12, 13
    'training_samples': 4096,  # Paper uses 4096 training images
    'testing_samples': 1024,   # Paper uses 1024 testing images
    'image_size': 224,  # ResNet-50 standard input size
    'latent_size': 64,  # LDM latent size (512/8 = 64 for 8x downsampling)
    'latent_channels': 4,  # LDM latent channels
    'results_dir': 'results_resnet50_blackbox',
    'checkpoint_dir': 'checkpoints_resnet50_blackbox'
}

# Create directories
os.makedirs(CONFIG['results_dir'], exist_ok=True)
os.makedirs(CONFIG['checkpoint_dir'], exist_ok=True)

print(f"Using device: {CONFIG['device']}")
print(f"Configuration: {CONFIG}")


Using device: cpu
Configuration: {'seed': 42, 'device': 'cpu', 'batch_size': 128, 'epochs': 96, 'lr': 0.1, 'beta1': 0.9, 'beta2': 0.999, 'lambda_s': 1.0, 'training_samples': 4096, 'testing_samples': 1024, 'image_size': 224, 'latent_size': 64, 'latent_channels': 4, 'results_dir': 'results_resnet50_blackbox', 'checkpoint_dir': 'checkpoints_resnet50_blackbox'}


In [4]:
class ResNet50Wrapper(nn.Module):
    """
    Wrapper for ResNet-50 model to extract features
    Based on the paper's target model F₁(.)
    """
    def __init__(self, pretrained=True):
        super().__init__()
        # Load pre-trained ResNet-50
        self.model = models.resnet50(pretrained=pretrained)
        
        # Remove the final classification layer to get features
        self.model = nn.Sequential(*list(self.model.children())[:-1])
        
        # ResNet-50 features: [B, 2048, 1, 1] -> [B, 2048]
        self.feature_dim = 2048
        
    def forward(self, x):
        # Extract features from the last layer before classification
        features = self.model(x)
        # Flatten features: [B, 2048, 1, 1] -> [B, 2048]
        features = features.view(features.size(0), -1)
        return features

In [5]:
class UNetInversion(nn.Module):
    """
    U-Net component of the inversion DNN F_u(.)
    Takes ResNet-50 features as input and generates latent variables for LDM
    """
    def __init__(self, input_dim=2048, latent_channels=4, latent_size=64):
        super().__init__()
        self.input_dim = input_dim
        self.latent_channels = latent_channels
        self.latent_size = latent_size
        
        # U-Net architecture for feature to latent mapping
        # Encoder
        self.enc1 = nn.Sequential(
            nn.Linear(self.input_dim, 2048),
            nn.LayerNorm(2048),
            nn.GELU(),
            nn.Dropout(0.1)
        )
        
        self.enc2 = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.LayerNorm(1024),
            nn.GELU(),
            nn.Dropout(0.1)
        )
        
        self.enc3 = nn.Sequential(
            nn.Linear(1024, 512),
            nn.LayerNorm(512),
            nn.GELU(),
            nn.Dropout(0.1)
        )
        
        # Bottleneck
        self.bottleneck = nn.Sequential(
            nn.Linear(512, 512),
            nn.LayerNorm(512),
            nn.GELU(),
            nn.Dropout(0.1)
        )
        
        # Decoder
        self.dec3 = nn.Sequential(
            nn.Linear(512, 1024),
            nn.LayerNorm(1024),
            nn.GELU(),
            nn.Dropout(0.1)
        )
        
        self.dec2 = nn.Sequential(
            nn.Linear(1024, 2048),
            nn.LayerNorm(2048),
            nn.GELU(),
            nn.Dropout(0.1)
        )
        
        self.dec1 = nn.Sequential(
            nn.Linear(2048, latent_channels * latent_size * latent_size),
            nn.Tanh()  # Output in [-1, 1] range for LDM
        )
        
    def forward(self, x):
        # Encoder path
        e1 = self.enc1(x)
        e2 = self.enc2(e1)
        e3 = self.enc3(e2)
        
        # Bottleneck
        bottleneck = self.bottleneck(e3)
        
        # Decoder path with skip connections
        d3 = self.dec3(bottleneck + e3)
        d2 = self.dec2(d3 + e2)
        d1 = self.dec1(d2 + e1)
        
        # Reshape to latent format [B, C, H, W]
        latent = d1.view(-1, self.latent_channels, self.latent_size, self.latent_size)
        
        return latent


In [None]:
# Updated InversionDNN class - Replace the existing cell
class InversionDNN(nn.Module):
    """
    Complete inversion DNN F_θ^inv(.) as described in the paper
    Consists of U-Net F_u(.) and LDM decoder D(.)
    FIXED: Added resizing to match input image dimensions
    """
    def __init__(self, input_dim=2048, latent_channels=4, latent_size=64, target_size=224):
        super().__init__()
        self.target_size = target_size
        self.unet = UNetInversion(input_dim, latent_channels, latent_size)
        
        # Load pre-trained LDM components
        # Using Stable Diffusion's VAE decoder
        self.ldm_decoder = AutoencoderKL.from_pretrained(
            "runwayml/stable-diffusion-v1-5",
            subfolder="vae"
        ).decoder
        
        # Freeze LDM decoder parameters
        for param in self.ldm_decoder.parameters():
            param.requires_grad = False
            
        self.ldm_decoder.eval()
        
    def forward(self, features):
        # U-Net generates latent variables
        latent = self.unet(features)
        
        # Scale latent to match LDM's expected range
        latent = latent * 0.18215  # LDM scaling factor
        
        # LDM decoder reconstructs the image (outputs 512x512)
        with torch.no_grad():
            reconstructed = self.ldm_decoder(latent)
        
        # Resize to match target image size using interpolation
        if reconstructed.shape[-1] != self.target_size:
            reconstructed = F.interpolate(
                reconstructed, 
                size=(self.target_size, self.target_size), 
                mode='bilinear', 
                align_corners=False
            )
        
        return reconstructed

### Direct Optimization

In [7]:
class DOInversionDNN(nn.Module):
    """
    DO (Direct Output) variant
    Directly reconstructs user input x without relying on LDM
    """
    def __init__(self, input_dim=2048, output_channels=3, output_size=224):
        super().__init__()
        self.input_dim = input_dim
        self.output_channels = output_channels
        self.output_size = output_size
        
        # Direct reconstruction network
        self.network = nn.Sequential(
            nn.Linear(input_dim, 4096),
            nn.LayerNorm(4096),
            nn.GELU(),
            nn.Dropout(0.1),
            
            nn.Linear(4096, 8192),
            nn.LayerNorm(8192),
            nn.GELU(),
            nn.Dropout(0.1),
            
            nn.Linear(8192, 16384),
            nn.LayerNorm(16384),
            nn.GELU(),
            nn.Dropout(0.1),
            
            nn.Linear(16384, output_channels * output_size * output_size),
            nn.Tanh()  # Output in [-1, 1] range
        )
        
    def forward(self, features):
        output = self.network(features)
        return output.view(-1, self.output_channels, self.output_size, self.output_size)


### Decoder Based

In [None]:
# Updated DBInversionDNN class - Replace the existing cell
class DBInversionDNN(nn.Module):
    """
    DB (Decoder-Based) variant
    Integrates LDM decoder into the inversion DNN
    FIXED: Added resizing to match input image dimensions
    """
    def __init__(self, input_dim=2048, latent_channels=4, latent_size=64, target_size=224):
        super().__init__()
        self.target_size = target_size
        self.unet = UNetInversion(input_dim, latent_channels, latent_size)
        
        # Integrated LDM decoder (trainable)
        self.ldm_decoder = AutoencoderKL.from_pretrained(
            "runwayml/stable-diffusion-v1-5",
            subfolder="vae"
        ).decoder
        
        # Make LDM decoder trainable for DB variant
        for param in self.ldm_decoder.parameters():
            param.requires_grad = True
            
    def forward(self, features):
        latent = self.unet(features)
        latent = latent * 0.18215
        reconstructed = self.ldm_decoder(latent)
        
        # Resize to match target image size using interpolation
        if reconstructed.shape[-1] != self.target_size:
            reconstructed = F.interpolate(
                reconstructed, 
                size=(self.target_size, self.target_size), 
                mode='bilinear', 
                align_corners=False
            )
        
        return reconstructed

### Defining the dataset

In [9]:
class BlackBoxDataset(Dataset):
    """
    Dataset for black-box feature inversion
    Creates pairs of (input_image, resnet_features)
    """
    def __init__(self, dataset, resnet_model, transform=None, num_samples=None):
        self.dataset = dataset
        self.resnet_model = resnet_model
        self.transform = transform
        self.num_samples = num_samples if num_samples else len(dataset)
        
        # Limit dataset size
        self.indices = list(range(min(self.num_samples, len(dataset))))
        
    def __len__(self):
        return len(self.indices)
    
    def __getitem__(self, idx):
        actual_idx = self.indices[idx]
        
        if isinstance(self.dataset, datasets.ImageFolder):
            image, _ = self.dataset[actual_idx]
        else:
            image = self.dataset[actual_idx]
            
        # Apply transforms
        if self.transform:
            image = self.transform(image)
            
        # Extract ResNet-50 features
        with torch.no_grad():
            features = self.resnet_model(image.unsqueeze(0))
            features = features.squeeze(0)  # Remove batch dimension
            
        return image, features


### Utility functions

In [10]:
def total_variation_loss(x):
    """
    Total Variation loss for smoothness
    TV = Σ |x[i,j] - x[i,j-1]| + |x[i,j] - x[i-1,j]|
    """
    batch_size = x.size(0)
    h_x = x.size(2)
    w_x = x.size(3)
    
    count_h = h_x * w_x
    count_w = h_x * w_x
    
    h_tv = torch.pow((x[:, :, 1:, :] - x[:, :, :h_x-1, :]), 2).sum()
    w_tv = torch.pow((x[:, :, :, 1:] - x[:, :, :, :w_x-1]), 2).sum()
    
    return 2 * (h_tv / count_h + w_tv / count_w) / batch_size

def reconstruction_loss(pred, target):
    """L1 reconstruction loss"""
    return F.l1_loss(pred, target)

### Training Loop 

In [11]:
def train_inversion_dnn(model, train_loader, val_loader, config):
    """
    Train the inversion DNN according to paper's specifications
    """
    device = config['device']
    model = model.to(device)
    
    # Optimizer: Adam with paper's hyperparameters
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=config['lr'],
        betas=(config['beta1'], config['beta2'])
    )
    
    # Loss functions
    recon_criterion = reconstruction_loss
    tv_criterion = total_variation_loss
    
    # Training loop
    best_val_loss = float('inf')
    train_losses = []
    val_losses = []
    
    for epoch in range(config['epochs']):
        # Training phase
        model.train()
        train_loss = 0.0
        
        for batch_idx, (images, features) in enumerate(tqdm(train_loader, desc=f'Epoch {epoch+1}/{config["epochs"]}')):
            images = images.to(device)
            features = features.to(device)
            
            # Forward pass
            reconstructed = model(features)
            
            # Compute loss according to equation 11
            recon_loss = recon_criterion(reconstructed, images)
            tv_loss = tv_criterion(reconstructed)
            
            total_loss = recon_loss + config['lambda_s'] * tv_loss
            
            # Backward pass
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()
            
            train_loss += total_loss.item()
            
        avg_train_loss = train_loss / len(train_loader)
        train_losses.append(avg_train_loss)
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        
        with torch.no_grad():
            for images, features in val_loader:
                images = images.to(device)
                features = features.to(device)
                
                reconstructed = model(features)
                
                recon_loss = recon_criterion(reconstructed, images)
                tv_loss = tv_criterion(reconstructed)
                
                total_loss = recon_loss + config['lambda_s'] * tv_loss
                val_loss += total_loss.item()
                
        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        
        print(f'Epoch {epoch+1}: Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
        
        # Save best model
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss': avg_train_loss,
                'val_loss': avg_val_loss,
            }, f"{config['checkpoint_dir']}/best_model.pth")
            
        # Save checkpoint every 10 epochs
        if (epoch + 1) % 10 == 0:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss': avg_train_loss,
                'val_loss': avg_val_loss,
            }, f"{config['checkpoint_dir']}/checkpoint_epoch_{epoch+1}.pth")
    
    # Plot training curves
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.grid(True)
    
    plt.tight_layout()
    plt.savefig(f"{config['results_dir']}/training_curves.png", dpi=300, bbox_inches='tight')
    plt.show()
    
    return model, train_losses, val_losses


### Evaluation and Visualization

In [12]:
def evaluate_model(model, test_loader, config):
    """
    Evaluate the trained inversion DNN
    """
    device = config['device']
    model.eval()
    
    # Metrics
    recon_losses = []
    tv_losses = []
    lpips_scores = []
    
    # LPIPS for perceptual similarity
    lpips_fn = lpips.LPIPS(net='alex').to(device)
    
    with torch.no_grad():
        for images, features in tqdm(test_loader, desc='Evaluating'):
            images = images.to(device)
            features = features.to(device)
            
            reconstructed = model(features)
            
            # Reconstruction loss
            recon_loss = reconstruction_loss(reconstructed, images)
            recon_losses.append(recon_loss.item())
            
            # Total variation loss
            tv_loss = total_variation_loss(reconstructed)
            tv_losses.append(tv_loss.item())
            
            # LPIPS score
            lpips_score = lpips_fn(reconstructed, images).mean()
            lpips_scores.append(lpips_score.item())
    
    # Calculate average metrics
    avg_recon_loss = np.mean(recon_losses)
    avg_tv_loss = np.mean(tv_losses)
    avg_lpips = np.mean(lpips_scores)
    
    print(f"Evaluation Results:")
    print(f"Average Reconstruction Loss: {avg_recon_loss:.4f}")
    print(f"Average Total Variation Loss: {avg_tv_loss:.4f}")
    print(f"Average LPIPS Score: {avg_lpips:.4f}")
    
    return {
        'recon_loss': avg_recon_loss,
        'tv_loss': avg_tv_loss,
        'lpips': avg_lpips
    }

def visualize_results(model, test_loader, config, num_samples=8):
    """
    Visualize reconstruction results
    """
    device = config['device']
    model.eval()
    
    # Get a batch of samples
    images, features = next(iter(test_loader))
    images = images[:num_samples].to(device)
    features = features[:num_samples].to(device)
    
    with torch.no_grad():
        reconstructed = model(features)
    
    # Convert to numpy for visualization
    images_np = images.cpu().numpy()
    reconstructed_np = reconstructed.cpu().numpy()
    
    # Denormalize images (assuming ImageNet normalization)
    mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
    std = np.array([0.229, 0.224, 0.225]).reshape(1, 3, 1, 1)
    
    images_np = images_np * std + mean
    reconstructed_np = reconstructed_np * std + mean
    
    # Clip to [0, 1]
    images_np = np.clip(images_np, 0, 1)
    reconstructed_np = np.clip(reconstructed_np, 0, 1)
    
    # Create visualization
    fig, axes = plt.subplots(2, num_samples, figsize=(2*num_samples, 4))
    
    for i in range(num_samples):
        # Original image
        axes[0, i].imshow(np.transpose(images_np[i], (1, 2, 0)))
        axes[0, i].set_title(f'Original {i+1}')
        axes[0, i].axis('off')
        
        # Reconstructed image
        axes[1, i].imshow(np.transpose(reconstructed_np[i], (1, 2, 0)))
        axes[1, i].set_title(f'Reconstructed {i+1}')
        axes[1, i].axis('off')
    
    plt.tight_layout()
    plt.savefig(f"{config['results_dir']}/reconstruction_results.png", dpi=300, bbox_inches='tight')
    plt.show()


In [23]:
!wget http://cs231n.stanford.edu/tiny-imagenet-200.zip
!unzip -q tiny-imagenet-200.zip

--2025-08-24 08:26:33--  http://cs231n.stanford.edu/tiny-imagenet-200.zip
Resolving cs231n.stanford.edu (cs231n.stanford.edu)... 171.64.64.64
Connecting to cs231n.stanford.edu (cs231n.stanford.edu)|171.64.64.64|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://cs231n.stanford.edu/tiny-imagenet-200.zip [following]
--2025-08-24 08:26:33--  https://cs231n.stanford.edu/tiny-imagenet-200.zip
Connecting to cs231n.stanford.edu (cs231n.stanford.edu)|171.64.64.64|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 248100043 (237M) [application/zip]
Saving to: ‘tiny-imagenet-200.zip’


2025-08-24 08:28:18 (2.26 MB/s) - ‘tiny-imagenet-200.zip’ saved [248100043/248100043]



In [28]:
def create_tinyimagenet_subset(src_dir="tiny-imagenet-200/train",
                               subset_dir="tiny_subset",
                               num_train=4096,
                               num_test=1024,
                               seed=42):
    random.seed(seed)

    # Collect all (image_path, class_id)
    class_folders = [d for d in os.listdir(src_dir)
                     if os.path.isdir(os.path.join(src_dir, d))]
    all_images = []
    for cls in class_folders:
        img_dir = os.path.join(src_dir, cls, "images")
        for img in os.listdir(img_dir):
            if img.endswith(".JPEG"):
                all_images.append((os.path.join(img_dir, img), cls))

    if len(all_images) < (num_train + num_test):
        raise RuntimeError(f"Not enough images in Tiny-ImageNet: found {len(all_images)}")

    random.shuffle(all_images)
    train_imgs = all_images[:num_train]
    test_imgs  = all_images[num_train:num_train+num_test]

    # Create split dirs
    for split in ("train", "test"):
        os.makedirs(os.path.join(subset_dir, split), exist_ok=True)

    # Copy while preserving class folders
    for path, cls in train_imgs:
        dst = os.path.join(subset_dir, "train", cls)
        os.makedirs(dst, exist_ok=True)
        shutil.copy(path, dst)

    for path, cls in test_imgs:
        dst = os.path.join(subset_dir, "test", cls)
        os.makedirs(dst, exist_ok=True)
        shutil.copy(path, dst)

    print(f"[subset] {num_train} train, {num_test} test.")

In [None]:
import os
import torch
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as T
from torchvision import datasets

def main():
    print("Initializing ResNet-50 Black-Box Inversion Attack...")

    # Target model (ResNet-50)
    print("Loading ResNet-50 model...")
    resnet_model = ResNet50Wrapper(pretrained=True)
    resnet_model.eval()

    # Transforms
    transform = T.Compose([
        T.Resize((CONFIG['image_size'], CONFIG['image_size'])),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Ensure Tiny-ImageNet is present
    if not os.path.exists("tiny-imagenet-200"):
        os.system("wget http://cs231n.stanford.edu/tiny-imagenet-200.zip")
        os.system("unzip -q tiny-imagenet-200.zip")

    # Build subset with preserved class structure
    create_tinyimagenet_subset(
        src_dir="tiny-imagenet-200/train",
        subset_dir="tiny_subset",
        num_train=CONFIG['training_samples'],
        num_test=CONFIG['testing_samples']
    )

    # ImageFolder datasets
    train_data = datasets.ImageFolder(root="tiny_subset/train", transform=transform)
    test_data  = datasets.ImageFolder(root="tiny_subset/test",  transform=transform)

    # Wrap with your black-box dataset (no extra transform needed here)
    train_dataset = BlackBoxDataset(train_data, resnet_model)
    test_dataset  = BlackBoxDataset(test_data,  resnet_model)

    # Split train into train/val (90/10)
    train_size = int(0.9 * len(train_dataset))
    val_size   = len(train_dataset) - train_size
    train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

    # DataLoaders
    num_workers = min(4, os.cpu_count() or 1)
    pin_mem = torch.cuda.is_available()
    train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'], shuffle=True,
                              num_workers=num_workers, pin_memory=pin_mem)
    val_loader   = DataLoader(val_dataset,   batch_size=CONFIG['batch_size'], shuffle=False,
                              num_workers=num_workers, pin_memory=pin_mem)
    test_loader  = DataLoader(test_dataset,  batch_size=CONFIG['batch_size'], shuffle=False,
                              num_workers=num_workers, pin_memory=pin_mem)

    print(f"Training samples:  {len(train_dataset)}")
    print(f"Validation samples:{len(val_dataset)}")
    print(f"Testing samples:   {len(test_dataset)}")

    # UPDATED: Inversion models with target_size parameter
    print("Initializing inversion DNN models...")
    models = {
        'DMB': InversionDNN(2048, target_size=CONFIG['image_size']),  # FIXED: Added target_size
        'DO' : DOInversionDNN(2048),
        'DB' : DBInversionDNN(2048, target_size=CONFIG['image_size']),  # FIXED: Added target_size
    }

    results = {}

    for name, model in models.items():
        print("\n" + "="*50)
        print(f"Training {name} model...")
        print("="*50)

        trained_model, train_losses, val_losses = train_inversion_dnn(
            model, train_loader, val_loader, CONFIG
        )
        metrics = evaluate_model(trained_model, test_loader, CONFIG)
        visualize_results(trained_model, test_loader, CONFIG)

        results[name] = {
            'model': trained_model,
            'train_losses': train_losses,
            'val_losses': val_losses,
            'metrics': metrics
        }

        # NOTE: use CONFIG (not config)
        os.makedirs(CONFIG['checkpoint_dir'], exist_ok=True)
        torch.save(trained_model.state_dict(),
                   f"{CONFIG['checkpoint_dir']}/{name}_final.pth")

    print("\n" + "="*50)
    print("FINAL RESULTS COMPARISON")
    print("="*50)
    for name, result in results.items():
        print(f"\n{name}:")
        print(f"  Reconstruction Loss: {result['metrics']['recon_loss']:.4f}")
        print(f"  Total Variation Loss: {result['metrics']['tv_loss']:.4f}")
        print(f"  LPIPS Score:         {result['metrics']['lpips']:.4f}")

    print(f"\nResults saved to:     {CONFIG['results_dir']}")
    print(f"Checkpoints saved to: {CONFIG['checkpoint_dir']}")

In [38]:
main()

Initializing ResNet-50 Black-Box Inversion Attack...
Loading ResNet-50 model...
Subset created: 4096 train, 1024 test images
Training samples:  3686
Validation samples:410
Testing samples:   1024
Initializing inversion DNN models...

Training DMB model...


  return F.l1_loss(pred, target)
Epoch 1/96:   0%|          | 0/29 [06:15<?, ?it/s]


RuntimeError: The size of tensor a (512) must match the size of tensor b (224) at non-singleton dimension 3