In [None]:
# =============================================================================
# Cell 1: Setup Environment, Mount Drive, Define Paths
# =============================================================================
import os
import sys
import torch
import gc
import copy
import glob
import random
import json
from collections import defaultdict
import traceback

print("--- Environment Setup ---")

# Set CUDA Launch Blocking (Optional but Recommended for Debugging GPU errors)
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
print("CUDA_LAUNCH_BLOCKING set to 1.")

# Check if in Colab
IN_COLAB = 'google.colab' in sys.modules

# Install necessary libraries
print("Installing required libraries...")
# Using default Colab torch should be fine unless specific version needed
# !pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Install other libraries, including torch-pruning
# Updated transformers version might be needed if internal structures change
!pip install -q --upgrade transformers datasets accelerate evaluate timm Pillow safetensors pycocotools thop torch-pruning tqdm
print("Libraries installation attempt finished.")

# Import key libraries (do this after install)
try:
    import torch
    import torch.nn as nn
    import numpy as np
    from transformers import (
        AutoImageProcessor, AutoModelForObjectDetection, AutoConfig, Trainer, TrainingArguments
    )
    # Import MLP class was intended for head recreation fallback (Cell 2), but standard modules are used instead.
    # from transformers.models.deformable_detr.modeling_deformable_detr import DeformableDetrMLP # <<< COMMENTED OUT
    import torchvision
    from tqdm.notebook import tqdm
    from PIL import Image
    from torch.utils.data import Dataset, DataLoader
    import torch_pruning as tp
    from thop import profile
    from pycocotools.coco import COCO
    from pycocotools.cocoeval import COCOeval

    print("Core libraries imported successfully.")
except ImportError as e:
    print(f"ERROR: Failed to import libraries: {e}")
    print("Please check the pip install logs above.")
    raise e

# Mount Google Drive if in Colab
if IN_COLAB:
    from google.colab import drive
    if not os.path.exists('/content/drive'):
        try:
            drive.mount('/content/drive')
            print("Google Drive mounted.")
        except Exception as e_mount:
            print(f"Error mounting drive: {e_mount}")
            raise e_mount
    else:
        print("Google Drive already mounted.")
    base_drive_path = "/content/drive/MyDrive/"
else:
    base_drive_path = "./" # Adjust if running locally

# --- Configuration ---
print("\n--- Configuration ---")
# !!! Important: Adjust these paths to your actual Drive locations !!!
model_dir = os.path.join(base_drive_path, "deformable-detr-finetuned-kitti") # DIR where fine-tuned model was saved
dataset_base_dir = os.path.join(base_drive_path, "kitti_subset") # Base DIR for KITTI subset
images_dir = os.path.join(dataset_base_dir, "images") # Specific image folder
annotation_dir = os.path.join(dataset_base_dir, "annotations") # Specific annotation folder (for KITTI format)
# --- CHANGE THIS LINE to point to your COCO format VALIDATION json ---
# coco_annotation_file = os.path.join(dataset_base_dir, "val_annotations.json") # Old line
coco_annotation_file = os.path.join(dataset_base_dir, "annotations", "instances_val2017.json") # <== CORRECTED PATH
# --- END CHANGE ---
output_dir = os.path.join(base_drive_path, "kitti_torch_pruning_output_v1") # Output directory for this run

# Pruning & Fine-tuning Params
GLOBAL_PRUNING_RATIO = 0.1 # Target sparsity for *each* prunable Conv2D layer's channels
DO_FINE_TUNING = False
FINE_TUNE_EPOCHS = 5
FINE_TUNE_LR = 1e-5
FINE_TUNE_BATCH_SIZE = 2 # Keep small for Colab memory
TRAIN_VAL_SPLIT_RATIO = 0.9 # 90% for training, 10% for validation

# Dataset Params (ensure these match your KITTI subset)
NUM_KITTI_CLASSES = 3 # Car, Pedestrian, Cyclist
NUM_OUTPUTS_REQUIRED = NUM_KITTI_CLASSES + 1 # Add 1 for the background/no-object class

# --- End Configuration ---

# Create output directory
os.makedirs(output_dir, exist_ok=True)
print(f"Model directory: {model_dir}")
print(f"Dataset directory: {dataset_base_dir}")
print(f"COCO Annotation file: {coco_annotation_file}")
print(f"Output directory: {output_dir}")

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if not torch.cuda.is_available():
    print("WARNING: CUDA not available, running on CPU. This will be very slow.")

# Helper Functions (if needed by multiple cells, define here)
def get_module_by_name(model: nn.Module, name: str) -> nn.Module:
    """Gets a module from a model using its full name."""
    names = name.split('.')
    obj = model
    for n in names:
        if hasattr(obj, n):
            obj = getattr(obj, n)
        else:
            try: # Handle sequential indexing like layer1.0.conv1
                idx = int(n)
                obj = obj[idx]
            except (ValueError, IndexError, TypeError):
                raise AttributeError(f"Module part '{n}' not found in name '{name}'. Parent type: {type(obj)}")
    return obj

def set_module_by_name(model: nn.Module, name: str, new_module: nn.Module):
    """Sets a module in a model using its full name."""
    names = name.split('.')
    parent_name = '.'.join(names[:-1])
    leaf_name = names[-1]

    try:
        # Use get_submodule which is safer for nested modules
        parent_module = model.get_submodule(parent_name) if parent_name else model
    except AttributeError:
         # Fallback for nested Sequentials/ModuleLists if get_submodule fails
         parent_module = get_module_by_name(model, parent_name)

    if hasattr(parent_module, leaf_name):
        setattr(parent_module, leaf_name, new_module)
    else:
        try: # Handle replacing item in ModuleList/Sequential
            idx = int(leaf_name)
            parent_module[idx] = new_module
        except (ValueError, IndexError, TypeError):
            raise AttributeError(f"Could not set attribute or index '{leaf_name}' in parent module '{parent_name}' of type {type(parent_module)}")

--- Environment Setup ---
CUDA_LAUNCH_BLOCKING set to 1.
Installing required libraries...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.1/64.1 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m23.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# =============================================================================
# Cell 2: Load Model, Resize Head, Replace BN (GPU Execution)
# =============================================================================
print("\n--- Loading Model, Resizing Head, Replacing BN ---")

# Ensure variables from Cell 1 are accessible
assert 'model_dir' in locals(), "Cell 1 must be run first to define paths."
assert 'device' in locals(), "Cell 1 must be run first to define device."
assert 'NUM_OUTPUTS_REQUIRED' in locals(), "Cell 1 must define NUM_OUTPUTS_REQUIRED."
assert str(device) == 'cuda', "This cell expects CUDA device for final model placement."

model = None
image_processor = None
config = None
hidden_dim = 256
decoder_layers = 6
num_queries = 300
original_params = -1 # Will be calculated at the end

try:
    # Load processor
    image_processor = AutoImageProcessor.from_pretrained(model_dir)
    print(f"Image processor loaded from {model_dir}")

    # Load config
    try:
        config = AutoConfig.from_pretrained(model_dir)
        num_queries = getattr(config, 'num_queries', 300)
        hidden_dim = getattr(config, 'd_model', 256)
        decoder_layers = getattr(config, 'decoder_layers', 6)
        print(f"Loaded config: num_queries={num_queries}, hidden_dim={hidden_dim}, decoder_layers={decoder_layers}")
        id2label = {i: f"LABEL_{i}" for i in range(NUM_OUTPUTS_REQUIRED)}
        label2id = {v: k for k, v in id2label.items()}
        config.id2label = id2label; config.label2id = label2id; config.num_labels = NUM_OUTPUTS_REQUIRED
        print(f"Updated config for {NUM_OUTPUTS_REQUIRED} outputs (incl. background).")
    except Exception as e_conf:
        print(f"WARNING: Could not load/parse config: {e_conf}. Using defaults.")
        id2label = {i: f"LABEL_{i}" for i in range(NUM_OUTPUTS_REQUIRED)}
        label2id = {v: k for k, v in id2label.items()}
        config = AutoConfig.from_pretrained("SenseTime/deformable-detr", num_labels=NUM_OUTPUTS_REQUIRED, id2label=id2label, label2id=label2id)
        num_queries = getattr(config, 'num_queries', num_queries); hidden_dim = getattr(config, 'd_model', hidden_dim); decoder_layers = getattr(config, 'decoder_layers', decoder_layers)
        print(f"Using potentially default config values: num_queries={num_queries}, hidden_dim={hidden_dim}, decoder_layers={decoder_layers}")

    # Load model structure (on CPU initially)
    print("Loading model structure (ignore mismatched sizes)...")
    model = AutoModelForObjectDetection.from_pretrained(
        model_dir,
        config=config,
        ignore_mismatched_sizes=True
    )
    print("Model structure loaded (on CPU initially).")

    # --- Resize Head AFTER Loading (on CPU) ---
    print("Checking and potentially resizing model heads (on CPU)...")
    try:
        current_cls_outputs = -1; final_class_layer = None
        if hasattr(model, 'class_embed') and isinstance(model.class_embed, nn.ModuleList) and len(model.class_embed) > 0:
             last_mod_in_list = model.class_embed[-1]
             if isinstance(last_mod_in_list, nn.Linear): final_class_layer = last_mod_in_list
             elif hasattr(last_mod_in_list, 'layers') and isinstance(last_mod_in_list.layers, nn.Sequential):
                 if len(last_mod_in_list.layers) > 0 and isinstance(last_mod_in_list.layers[-1], nn.Linear): final_class_layer = last_mod_in_list.layers[-1]
        elif hasattr(model, 'class_embed') and isinstance(model.class_embed, nn.Sequential): # ... (other checks) ...
            if len(model.class_embed) > 0 and isinstance(model.class_embed[-1], nn.Linear): final_class_layer = model.class_embed[-1]
        elif hasattr(model, 'class_embed') and isinstance(model.class_embed, nn.Linear): final_class_layer = model.class_embed

        if isinstance(final_class_layer, nn.Linear): current_cls_outputs = final_class_layer.out_features; print(f"  Detected {current_cls_outputs} outputs in loaded classification head.")
        else: print(f"  Could not reliably detect output features of class_embed (Type: {type(model.class_embed)}). Assuming resize needed."); current_cls_outputs = -1

        if current_cls_outputs != NUM_OUTPUTS_REQUIRED:
            print(f"  Head mismatch/uncertainty: Recreating heads for {decoder_layers} layers with {NUM_OUTPUTS_REQUIRED} outputs (CPU).")
            if hasattr(config, 'decoder_layers'): # ... (Recreate heads based on config) ...
                model.class_embed = nn.ModuleList([nn.Linear(hidden_dim, NUM_OUTPUTS_REQUIRED) for _ in range(config.decoder_layers)])
                bbox_head_mlp = lambda: nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, 4))
                model.bbox_embed = nn.ModuleList([bbox_head_mlp() for _ in range(config.decoder_layers)])
            else: # ... (Fallback) ...
                 model.class_embed = nn.Linear(hidden_dim, NUM_OUTPUTS_REQUIRED); model.bbox_embed = nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, 4))
            # ... (Query embedding check/resize) ...
            if hasattr(model, 'query_position_embeddings') and isinstance(model.query_position_embeddings, nn.Embedding):
                 if model.query_position_embeddings.num_embeddings != num_queries: print(f"  Resizing query embeds."); model.query_position_embeddings = nn.Embedding(num_queries, hidden_dim)
                 else: print("  Query embeds size OK.")
            else: print("  Creating query embeds."); model.query_position_embeddings = nn.Embedding(num_queries, hidden_dim)
            print("  Heads recreated/resized on CPU.")
        else: print("  Loaded model heads appear to have the correct number of outputs.")
    except Exception as e_head: print(f"  Error during head check/resize: {e_head}"); traceback.print_exc(); raise RuntimeError("Failed head check/resize.") from e_head

    # --- Replace FrozenBatchNorm2d (on CPU) ---
    print("\nReplacing FrozenBatchNorm2d layers (on CPU)...")
    FROZEN_BN_TYPE = None
    try: # Find the FrozenBN class
        from transformers.models.deformable_detr.modeling_deformable_detr import DeformableDetrFrozenBatchNorm2d
        FROZEN_BN_TYPE = DeformableDetrFrozenBatchNorm2d
        print("  Found DeformableDetrFrozenBatchNorm2d class.")
    except ImportError: print("  DeformableDetrFrozenBatchNorm2d not found.") # Add fallback if needed

    if FROZEN_BN_TYPE:
        replacement_count = 0; error_count = 0
        module_list = list(model.named_modules())
        print(f"  Iterating through {len(module_list)} modules...")
        from tqdm import tqdm as regular_tqdm # Use standard tqdm
        for name, module in regular_tqdm(module_list, desc="Replacing FrozenBN (CPU)", leave=False):
            if isinstance(module, FROZEN_BN_TYPE):
                try: # Replace with standard BN
                    if hasattr(module, 'weight') and module.weight is not None: num_features = module.weight.shape[0]
                    else: print(f"    WARNING: Skipping {name} - no weight attr."); error_count += 1; continue
                    # --- Use eps=1e-5 fix ---
                    new_bn = nn.BatchNorm2d(num_features, eps=1e-5, affine=True, track_running_stats=True)
                    # --- Copy parameters ---
                    if hasattr(module, 'weight') and module.weight is not None and hasattr(new_bn,'weight') and new_bn.weight.shape == module.weight.shape: new_bn.weight.data.copy_(module.weight.data)
                    if hasattr(module, 'bias') and module.bias is not None and hasattr(new_bn,'bias') and new_bn.bias.shape == module.bias.shape: new_bn.bias.data.copy_(module.bias.data)
                    if hasattr(module, 'running_mean') and module.running_mean is not None and hasattr(new_bn,'running_mean') and new_bn.running_mean.shape == module.running_mean.shape: new_bn.running_mean.data.copy_(module.running_mean.data)
                    if hasattr(module, 'running_var') and module.running_var is not None and hasattr(new_bn,'running_var') and new_bn.running_var.shape == module.running_var.shape: new_bn.running_var.data.copy_(module.running_var.data)
                    if hasattr(module, 'num_batches_tracked') and module.num_batches_tracked is not None and hasattr(new_bn, 'num_batches_tracked'): new_bn.num_batches_tracked.data.copy_(module.num_batches_tracked.data)
                    set_module_by_name(model, name, new_bn)
                    replacement_count += 1
                except Exception as e_replace: print(f"    ERROR replacing {name}: {e_replace}"); traceback.print_exc(); error_count += 1
        print(f"  Finished FrozenBN Replacement. Replaced: {replacement_count}, Errors: {error_count}")
        if error_count > 0: raise RuntimeError("Errors occurred during BatchNorm replacement.")
    else: print("  No FrozenBatchNorm class identified for replacement.")

    # --- Move final model (with std BN) to target device (GPU) ---
    print(f"\nMoving final prepared model to: {device}")
    model.to(device)
    model.eval()
    print("Model is ready on device.")

    # --- Calculate original params *after* prep and move to GPU ---
    original_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Model parameters after prep (trainable): {original_params:,}")
    # Make variable available globally for Cell 4
    # (Alternatively, Cell 4 could access model.parameters() directly)
    globals()['original_params'] = original_params

except Exception as e_load_prep:
    print(f"ERROR during model loading/preparation: {e_load_prep}")
    traceback.print_exc(); model = None; raise e_load_prep

# Clean up memory
gc.collect()
if torch.cuda.is_available(): torch.cuda.empty_cache()

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.



--- Loading Model, Resizing Head, Replacing BN ---
Image processor loaded from /content/drive/MyDrive/deformable-detr-finetuned-kitti
Loaded config: num_queries=300, hidden_dim=256, decoder_layers=6
Updated config for 4 outputs (incl. background).
Loading model structure (ignore mismatched sizes)...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of DeformableDetrForObjectDetection were not initialized from the model checkpoint at /content/drive/MyDrive/deformable-detr-finetuned-kitti and are newly initialized because the shapes did not match:
- class_embed.0.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([4]) in the model instantiated
- class_embed.0.weight: found shape torch.Size([3, 256]) in the checkpoint and torch.Size([4, 256]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model structure loaded (on CPU initially).
Checking and potentially resizing model heads (on CPU)...
  Detected 4 outputs in loaded classification head.
  Loaded model heads appear to have the correct number of outputs.

Replacing FrozenBatchNorm2d layers (on CPU)...
  Found DeformableDetrFrozenBatchNorm2d class.
  Iterating through 425 modules...




  Finished FrozenBN Replacement. Replaced: 53, Errors: 0

Moving final prepared model to: cuda
Model is ready on device.
Model parameters after prep (trainable): 39,878,026


In [None]:
# =============================================================================
# Cell 3: Calculate Original Metrics (GPU Execution)
# =============================================================================
print("\n--- Calculating Original Model Metrics ---\n")

# Ensure model and param count from Cell 2 are available
assert 'model' in locals() and model is not None, "Cell 2 must be run first."
assert 'original_params' in locals() and original_params > 0, "Cell 2 must set original_params > 0."
assert str(next(model.parameters()).device) == 'cuda:0', "Model expected on CUDA device." # Check device

original_gflops = 0.0
dummy_input = None

if model is not None:
    print(f"Using Original Trainable Parameters (Post-BN Replace): {original_params:,}")

    # Calculate GFLOPs using thop
    bs = 1
    try:
         # Determine input size
         img_h, img_w = 800, 1333
         if hasattr(image_processor, 'size') and isinstance(image_processor.size, dict):
            # ... (Robust size calculation logic) ...
            size_dict = image_processor.size
            if 'shortest_edge' in size_dict:
                shortest = size_dict['shortest_edge']; max_size = getattr(image_processor, 'max_size', 1333); aspect_ratio = 1333 / 800
                if shortest == 800 and max_size == 1333: img_h, img_w = 800, 1333
                else: img_h = shortest; img_w = int(shortest * aspect_ratio);
                if img_w > max_size: img_w = max_size; img_h = int(max_size / aspect_ratio)
            elif 'height' in size_dict and 'width' in size_dict: img_h = size_dict['height']; img_w = size_dict['width']
            img_h = max(img_h, 32); img_w = max(img_w, 32)
         print(f"Using dummy input size H={img_h}, W={img_w}")

         # --- Create dummy input on GPU ---
         dummy_input = torch.randn(bs, 3, img_h, img_w, device=device)
         print(f"Using dummy input shape for GFLOPs: {dummy_input.shape} on {device}")

         # Profile on GPU
         print(f"Calculating GFLOPs on {device} (thop)...")
         try:
              # Ensure model is on GPU (should be already)
              model.to(device)
              flops, params_thop = profile(model, inputs=(dummy_input,), verbose=False)
              original_gflops = flops / 1e9
              print(f"GFLOPs calculated on {device}: {original_gflops:.2f} GFLOPs")
         except Exception as e_prof_gpu:
              print(f"  Thop profile on GPU failed ({e_prof_gpu}), trying on CPU...")
              # --- Fallback to CPU profiling ---
              try:
                   model_cpu_copy = copy.deepcopy(model).cpu()
                   dummy_input_cpu = dummy_input.cpu()
                   flops, _ = profile(model_cpu_copy, inputs=(dummy_input_cpu,), verbose=False)
                   original_gflops = flops / 1e9
                   print(f"GFLOPs calculated on CPU: {original_gflops:.2f} GFLOPs")
                   del model_cpu_copy, dummy_input_cpu; gc.collect()
              except Exception as e_prof_cpu:
                   print(f"  Thop profile on CPU also failed: {e_prof_cpu}")
                   original_gflops = -1.0 # Indicate failure
               # --- End Fallback ---

    except Exception as e_metrics:
        print(f"Error during original metrics calculation: {e_metrics}")
        traceback.print_exc(); original_gflops = -1.0
else:
    print("Model not loaded, cannot calculate original metrics.")
    original_gflops = -1.0

# Store dummy_input globally if needed by Cell 4, otherwise delete
# del dummy_input # Keep dummy_input for Cell 4
gc.collect()
if torch.cuda.is_available(): torch.cuda.empty_cache()


--- Calculating Original Model Metrics ---

Using Original Trainable Parameters (Post-BN Replace): 39,878,026
Using dummy input size H=800, W=1333
Using dummy input shape for GFLOPs: torch.Size([1, 3, 800, 1333]) on cuda
Calculating GFLOPs on cuda (thop)...
GFLOPs calculated on cuda: 204.87 GFLOPs


In [None]:
# =============================================================================
# Cell 4: Pruning with torch-pruning (GPU Execution)
# =============================================================================
print("\n--- Pruning Model with torch-pruning (GPU Execution) ---\n")

model_pruned = None # Initialize
original_params = -1 # Initialize, will be calculated below

# --- Prerequisite Checks ---
if 'model' not in locals() or model is None:
    print("Original model ('model') not available. Skipping pruning.")
elif not torch.cuda.is_available():
     # This shouldn't happen now, but good practice to keep check
    print("CUDA not available. Skipping GPU pruning.")
else:
    try:
        # --- Calculate original params directly from model object ---
        print("Calculating original params from model object...")
        assert 'model' in locals() and model is not None, "Model not loaded from Cell 2"
        # Ensure model is on the correct device before counting
        device = next(model.parameters()).device
        print(f"  Model is on device: {device}")
        original_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        assert original_params > 0, "Could not calculate positive original_params from model"
        print(f"Using original trainable params (Post-BN Replace): {original_params:,}")
        # --- End Param Calculation ---

        # 1. Create a deep copy directly on GPU
        print(f"\nCreating deepcopy on device ({device})...")
        # Ensure original model is on the correct device (redundant check, but safe)
        model.to(device)
        model_pruned = copy.deepcopy(model)
        model_pruned.eval()
        print("Deepcopy created on target device.")

        # --- BN Replacement is done in Cell 2 now ---
        print("\nSkipping BN Replacement (should be done in Cell 2).")

        # --- Model Structure Debugging (Optional but can keep) ---
        print("\n--- Debugging Model Structure ---")
        print(f"Model pruned is on device: {next(model_pruned.parameters()).device}")
        print("Top-level modules in model_pruned:")
        for name, module in model_pruned.named_children(): print(f"  - {name} ({type(module).__name__})")
        if hasattr(model_pruned, 'model'):
             nested_model_debug = model_pruned.model # Define for clarity
             print("\nModules in model_pruned.model (nested):")
             for name, module in nested_model_debug.named_children(): print(f"  - model.{name} ({type(module).__name__})")
             if hasattr(nested_model_debug, 'backbone'):
                  print("\nModules in model_pruned.model.backbone:")
                  for name, module in nested_model_debug.backbone.named_children(): print(f"  - model.backbone.{name} ({type(module).__name__})")

        # ... Add more detailed backbone checks if needed ...
        print("--- End Debugging ---")

        # 2. Define Dummy Input on GPU
        # Reuse from Cell 3 if possible, otherwise recreate
        if 'dummy_input' not in locals() or dummy_input is None:
             print("\nDummy input not found, recreating on GPU...")
             bs = 1; img_h, img_w = 800, 1333
             try: # Simplified size logic
                 if hasattr(image_processor, 'size') and isinstance(image_processor.size, dict):
                    size_dict = image_processor.size
                    if 'shortest_edge' in size_dict: shortest = size_dict['shortest_edge']; max_size = getattr(image_processor, 'max_size', 1333); img_h = shortest; img_w = int(shortest * (1333/800)); img_w = min(img_w, max_size)
                    elif 'height' in size_dict and 'width' in size_dict: img_h = size_dict['height']; img_w = size_dict['width']
                 img_h = max(img_h, 32); img_w = max(img_w, 32)
                 print(f"Determined dummy input size: H={img_h}, W={img_w}")
             except Exception as e_size: print(f"Warning: Size error: {e_size}. Using default."); img_h, img_w = 800, 1333
             dummy_input = torch.randn(bs, 3, img_h, img_w, device=device) # Create on GPU device
             print(f"Recreated GPU dummy input: {dummy_input.shape} on {dummy_input.device}")
        elif dummy_input.device != device:
             print(f"\nMoving existing dummy input to {device}"); dummy_input = dummy_input.to(device)
        else:
             print(f"\nUsing existing dummy input: {dummy_input.shape} on {dummy_input.device}")


        # 3. Define Ignored Layers (Using Corrected Nested Access)
        ignored_layers_modules = []
        print("\nIdentifying layers to ignore...")
        if not hasattr(model_pruned, 'model') or not isinstance(model_pruned.model, nn.Module):
             raise AttributeError("Nested 'model' module not found in model_pruned")
        else:
             nested_model = model_pruned.model # Use the nested model for backbone checks
             print(f"  Accessing nested model of type: {type(nested_model).__name__}")
             def add_module_and_submodules(module_instance, name_prefix=""):
                  if module_instance is None or not isinstance(module_instance, nn.Module): return
                  if module_instance not in ignored_layers_modules: ignored_layers_modules.append(module_instance)
                  for submodule in module_instance.modules():
                      if submodule not in ignored_layers_modules and submodule is not module_instance: ignored_layers_modules.append(submodule)

             # A. Ignore backbone's initial conv and bn
             if hasattr(nested_model, 'backbone') and hasattr(nested_model.backbone, 'conv_encoder') and hasattr(nested_model.backbone.conv_encoder, 'model'):
                 timm_model = nested_model.backbone.conv_encoder.model; print("  Checking backbone conv1/bn1...")
                 if hasattr(timm_model, 'conv1') and isinstance(timm_model.conv1, nn.Conv2d): ignored_layers_modules.append(timm_model.conv1); print(f"    Added model.backbone.conv1")
                 # Use standard BN type here as it should have been replaced in Cell 2
                 if hasattr(timm_model, 'bn1') and isinstance(timm_model.bn1, (nn.BatchNorm2d, nn.SyncBatchNorm)): ignored_layers_modules.append(timm_model.bn1); print(f"    Added model.backbone.bn1")

                 # B. Ignore backbone's downsample layers
                 print("  Checking backbone downsample blocks...")
                 for layer_name in ['layer1', 'layer2', 'layer3', 'layer4']:
                      if hasattr(timm_model, layer_name):
                          res_layer = getattr(timm_model, layer_name)
                          if hasattr(res_layer, '__iter__'):
                              for block_idx, block in enumerate(res_layer):
                                   if hasattr(block, 'downsample') and block.downsample is not None: print(f"    Adding downsample block: model.backbone.{layer_name}.{block_idx}.downsample"); add_module_and_submodules(block.downsample, f"ds_{layer_name}_{block_idx}")

             # C. Ignore heads (parallel to nested_model)
             print("  Checking heads, level_embed, input_proj...")
             if hasattr(model_pruned, 'class_embed'): print("    Adding class_embed and submodules"); add_module_and_submodules(model_pruned.class_embed, "class_embed")
             if hasattr(model_pruned, 'bbox_embed'): print("    Adding bbox_embed and submodules"); add_module_and_submodules(model_pruned.bbox_embed, "bbox_embed")

             # D. Ignore level_embed (inside nested_model)
             if hasattr(nested_model, 'level_embed') and isinstance(nested_model.level_embed, nn.Embedding): ignored_layers_modules.append(nested_model.level_embed); print("    Added model.level_embed")

             # E. Ignore input projection (inside nested_model)
             if hasattr(nested_model, 'input_proj'): print("    Adding model.input_proj and submodules"); add_module_and_submodules(nested_model.input_proj, "input_proj")

             # F. No need to check for FrozenBN here
             print("  Skipping check for FrozenBN (should be replaced in Cell 2).")

        ignored_layers = list(set(m for m in ignored_layers_modules if isinstance(m, nn.Module) and m is not model_pruned and (not 'nested_model' in locals() or m is not nested_model)))
        print(f"\nIdentified {len(ignored_layers)} unique nn.Module instances to ignore.")
        print("--- Ignored Modules ---")
        from collections import Counter
        type_counts = Counter(type(m).__name__ for m in ignored_layers)
        print(f"Ignored layer type counts: {type_counts}")
        print("-----------------------")


        # 4. Define Importance and Pruner (on GPU)
        print(f"\nSetting up Pruner on GPU with layer channel sparsity target: {GLOBAL_PRUNING_RATIO}")
        importance = tp.importance.MagnitudeImportance(p=1)

        pruner = tp.pruner.MagnitudePruner(
            model_pruned,               # On GPU, has standard BN
            example_inputs=dummy_input, # On GPU
            importance=importance,
            pruning_ratio=GLOBAL_PRUNING_RATIO, # Use ratio from Cell 1
            ignored_layers=ignored_layers,
            root_module_types=[nn.Conv2d],
            round_to=8,
        )

        # 5. Apply Pruning (on GPU)
        print("Applying pruner.step() on GPU...")
        pruner.step() # Execute on GPU
        print("torch-pruning step finished on GPU.")
        model_pruned.eval()

        print(f"Pruned model ready on {next(model_pruned.parameters()).device}.")

    except Exception as e_prune:
        print(f"ERROR during torch-pruning: {e_prune}")
        traceback.print_exc()
        model_pruned = None

    gc.collect()
    if torch.cuda.is_available(): torch.cuda.empty_cache()


--- Pruning Model with torch-pruning (GPU Execution) ---

Calculating original params from model object...
  Model is on device: cuda:0
Using original trainable params (Post-BN Replace): 39,878,026

Creating deepcopy on device (cuda:0)...
Deepcopy created on target device.

Skipping BN Replacement (should be done in Cell 2).

--- Debugging Model Structure ---
Model pruned is on device: cuda:0
Top-level modules in model_pruned:
  - model (DeformableDetrModel)
  - class_embed (ModuleList)
  - bbox_embed (ModuleList)

Modules in model_pruned.model (nested):
  - model.backbone (DeformableDetrConvModel)
  - model.input_proj (ModuleList)
  - model.query_position_embeddings (Embedding)
  - model.encoder (DeformableDetrEncoder)
  - model.decoder (DeformableDetrDecoder)
  - model.reference_points (Linear)

Modules in model_pruned.model.backbone:
  - model.backbone.conv_encoder (DeformableDetrConvEncoder)
  - model.backbone.position_embedding (DeformableDetrSinePositionEmbedding)
--- End Debugg

 Torch-Pruning will prune the last non-singleton dimension of these parameters. If you wish to change this behavior, please provide an unwrapped_parameters argument.


Applying pruner.step() on GPU...
torch-pruning step finished on GPU.
Pruned model ready on cuda:0.


In [None]:
# =============================================================================
# Cell 5: Calculate Pruned Model Metrics
# =============================================================================
print("\n--- Calculating Pruned Model Metrics ---")

pruned_params = -1 # Initialize
pruned_gflops = -1.0 # Initialize

if 'model_pruned' in locals() and model_pruned is not None:
    try:
        # Ensure model is on the correct device for calculations
        model_pruned.to(device)
        model_pruned.eval()

        pruned_params = sum(p.numel() for p in model_pruned.parameters() if p.requires_grad)
        print(f"Pruned Trainable Parameters: {pruned_params:,}")
        if 'original_params' in locals() and original_params > 0:
            reduction = (original_params - pruned_params) / original_params * 100
            print(f"Parameter Reduction: {reduction:.2f}%")
        else:
            print("Cannot calculate reduction (original_params unavailable).")

        # Calculate GFLOPs using thop
        if 'dummy_input' not in locals() or dummy_input is None:
             print("Dummy input not found from Cell 3, recreating...")
             bs = 1; height, width = 800, 1333
             dummy_input = torch.randn(bs, 3, height, width, device=device)
             print(f"Recreated dummy input shape: {dummy_input.shape}")
        elif dummy_input.device != device: # Ensure dummy input is on the correct device
             dummy_input = dummy_input.to(device)

        print("Calculating pruned GFLOPs...")
        try:
            flops, params_thop = profile(model_pruned, inputs=(dummy_input,), verbose=False)
            pruned_gflops = flops / 1e9
            print(f"Pruned GFLOPs calculated on {device}: {pruned_gflops:.2f} GFLOPs")
        except Exception as e_prof_gpu:
            print(f"  Thop profile on GPU failed ({e_prof_gpu}), trying on CPU...")
            try:
                model_pruned_cpu = copy.deepcopy(model_pruned).cpu()
                dummy_input_cpu = dummy_input.cpu()
                flops, _ = profile(model_pruned_cpu, inputs=(dummy_input_cpu,), verbose=False)
                pruned_gflops = flops / 1e9
                print(f"Pruned GFLOPs calculated on CPU: {pruned_gflops:.2f} GFLOPs")
                del model_pruned_cpu, dummy_input_cpu
                gc.collect()
            except Exception as e_prof_cpu:
                print(f"  Thop profile on CPU also failed: {e_prof_cpu}")
                pruned_gflops = -1.0

        if 'original_gflops' in locals() and original_gflops > 0 and pruned_gflops >= 0:
             gflops_reduction = (original_gflops - pruned_gflops) / original_gflops * 100
             print(f"GFLOPs Reduction: {gflops_reduction:.2f}%")
        elif 'original_gflops' in locals() and original_gflops <= 0:
             print("Cannot calculate GFLOPs reduction (original_gflops unavailable).")

    except Exception as e_metrics:
        print(f"Error calculating pruned metrics: {e_metrics}")
        traceback.print_exc()
else:
    print("Pruned model not available, cannot calculate metrics.")


--- Calculating Pruned Model Metrics ---
Pruned Trainable Parameters: 36,535,818
Parameter Reduction: 8.38%
Calculating pruned GFLOPs...
Pruned GFLOPs calculated on cuda:0: 191.19 GFLOPs
GFLOPs Reduction: 6.68%


In [None]:
# =============================================================================
# Cell 6: Save Pruned Model Structure
# =============================================================================
print("\n--- Saving Pruned Model Structure ---")

pruned_model_saved_path = None
if 'model_pruned' in locals() and model_pruned is not None:
    try:
        save_filename = f"ddetr_torchpruned_ratio{GLOBAL_PRUNING_RATIO:.1f}_structure.safetensors"
        pruned_model_saved_path = os.path.join(output_dir, save_filename)
        print(f"Attempting to save pruned structure to: {pruned_model_saved_path}")

        # Use Hugging Face save_pretrained for better compatibility if structure changed significantly
        model_pruned.save_pretrained(output_dir) # Saves config, weights etc. in the output dir
        print(f"Pruned model saved using save_pretrained to: {output_dir}")
        # We will use this directory for loading later if needed

        # Optional: Also save just the state dict if preferred
        # from safetensors.torch import save_file
        # model_pruned.cpu()
        # save_file(model_pruned.state_dict(), pruned_model_saved_path)
        # model_pruned.to(device)
        # print("Pruned model state_dict saved successfully (.safetensors).")

    except Exception as e_save:
        print(f"Error saving pruned model: {e_save}")
        traceback.print_exc()
else:
    print("Pruned model not available, skipping save.")


--- Saving Pruned Model Structure ---
Attempting to save pruned structure to: /content/drive/MyDrive/kitti_torch_pruning_output_v1/ddetr_torchpruned_ratio0.1_structure.safetensors
Pruned model saved using save_pretrained to: /content/drive/MyDrive/kitti_torch_pruning_output_v1


In [None]:
# =============================================================================
# Cell 7: Prepare KITTI Dataset for Fine-tuning / Evaluation
# =============================================================================
print("\n--- Preparing KITTI Dataset ---")

# Keep DefaultDataCollator import just in case, but we likely won't use it
from transformers import DefaultDataCollator
import traceback # Ensure traceback is imported

train_dataloader = None
val_dataloader = None
kitti_dataset_full_coco_fmt = None
coco_gt = None

# --- File existence checks ---
if not os.path.isdir(images_dir):
    print(f"ERROR: Image directory not found: {images_dir}")
    raise FileNotFoundError(f"Image directory not found: {images_dir}")
if not os.path.exists(coco_annotation_file):
     print(f"WARNING: COCO annotation file not found: {coco_annotation_file}. mAP evaluation will not work.")
if not os.path.isdir(annotation_dir):
     print(f"WARNING: KITTI annotation directory not found: {annotation_dir}.")
     if DO_FINE_TUNING: # Only disable if planning to fine-tune
         print("Disabling fine-tuning because KITTI annotations are missing.")
         DO_FINE_TUNING = False
# --- End checks ---


# --- Define Dataset Class (KittiObjectDetectionDataset) ---
class KittiObjectDetectionDataset(Dataset):
    def __init__(self, image_paths, annotation_dir, image_processor, label2id):
        self.image_paths = [p for p in image_paths if os.path.exists(p)]
        self.annotation_dir = annotation_dir
        self.image_processor = image_processor
        self.label2id = {k.lower(): v for k, v in label2id.items() if isinstance(k, str)}
        self.id2label = {v: k for k, v in self.label2id.items()}
        print(f"Dataset Initialized. Found {len(self.image_paths)} images. Category map: {self.label2id}")
        if not self.label2id: print("WARNING: label2id map seems incorrect or empty.")
        if not os.path.isdir(self.annotation_dir): print(f"WARNING: Annotation directory missing: {self.annotation_dir}")

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        try:
            image = Image.open(img_path).convert("RGB")
            img_w, img_h = image.size
            ann_file_name = os.path.splitext(os.path.basename(img_path))[0] + ".txt"
            ann_file_path = os.path.join(self.annotation_dir, ann_file_name)
            annotations_coco_fmt = {"image_id": idx, "annotations": []}
            # --- Parse KITTI annotations ---
            if os.path.exists(ann_file_path):
                with open(ann_file_path, 'r') as f:
                    for line in f:
                        parts = line.strip().split()
                        if not parts: continue
                        cat_name = parts[0].lower()
                        if cat_name in self.label2id:
                            try:
                                bbox_kitti = [float(x) for x in parts[4:8]]
                                xmin, ymin, xmax, ymax = bbox_kitti
                                xmin_c, ymin_c = max(0., xmin), max(0., ymin)
                                xmax_c, ymax_c = min(float(img_w - 1), xmax), min(float(img_h - 1), ymax)
                                if xmin_c < xmax_c and ymin_c < ymax_c:
                                    box_w, box_h = xmax_c - xmin_c, ymax_c - ymin_c
                                    annotations_coco_fmt["annotations"].append({
                                        "image_id": idx, "category_id": self.label2id[cat_name],
                                        "bbox": [xmin_c, ymin_c, box_w, box_h], "area": box_w * box_h, "iscrowd": 0,
                                    })
                            except (ValueError, IndexError) as e_parse: continue
            # --- End Parse ---
            # --- Preprocess using image_processor ---
            try: encoding = self.image_processor(images=image, annotations=annotations_coco_fmt, return_tensors="pt")
            except Exception as e_proc:
                 print(f"\nERROR during image_processor call for {img_path}: {e_proc}")
                 try: # Try image only
                      print("  Trying image processing without annotations...")
                      encoding = self.image_processor(images=image, return_tensors="pt")
                      encoding['labels'] = [{'boxes': torch.empty((0, 4)), 'class_labels': torch.empty((0,), dtype=torch.long)}]
                      print("  Image processed, using dummy labels.")
                 except Exception as e_img_proc: print(f"  ERROR processing even just the image: {e_img_proc}"); return None
            pixel_values = encoding["pixel_values"].squeeze(0)
            labels = encoding["labels"][0] if isinstance(encoding.get("labels"), list) and len(encoding["labels"]) > 0 else {'boxes': torch.empty((0, 4)), 'class_labels': torch.empty((0,), dtype=torch.long)}
            # --- Return dictionary with tensors ---
            return {"pixel_values": pixel_values, "labels": labels}
        except Exception as e: print(f"\nERROR processing sample index {idx} ({img_path}): {e}"); traceback.print_exc(); return None
# --- End Dataset Class ---


# --- Create Datasets and Dataloaders for Training/Validation ---
if DO_FINE_TUNING:
    print("\nCreating training and validation datasets...")
    all_image_files = sorted(glob.glob(os.path.join(images_dir,"**","*.[pP][nN][gG]"), recursive=True) + glob.glob(os.path.join(images_dir,"**","*.[jJ][pP][gG]"), recursive=True) + glob.glob(os.path.join(images_dir,"**","*.[jJ][pP][eE][gG]"), recursive=True))
    if not all_image_files: print(f"ERROR: No images found in {images_dir}. Cannot fine-tune."); DO_FINE_TUNING = False
    else:
        print(f"Found {len(all_image_files)} total images.")
        random.seed(42); random.shuffle(all_image_files)
        split_idx = int(len(all_image_files) * TRAIN_VAL_SPLIT_RATIO); train_image_files = all_image_files[:split_idx]; val_image_files = all_image_files[split_idx:]
        if not val_image_files and train_image_files: print("Warning: No validation files after split, moving one from train."); val_image_files.append(train_image_files.pop())
        if not train_image_files or not val_image_files: print("Error: Could not create non-empty train/val splits. Disabling fine-tuning."); DO_FINE_TUNING = False
        else:
             print(f"Using {len(train_image_files)} train and {len(val_image_files)} validation images.")
             try:
                  # --- label2id setup ---
                  kitti_label2id_from_config = getattr(config, 'label2id', None)
                  if isinstance(kitti_label2id_from_config, dict) and len(kitti_label2id_from_config) == NUM_KITTI_CLASSES: kitti_label2id = kitti_label2id_from_config; print("Using label2id map from loaded config.")
                  else:
                       print(f"Warning: Config label2id map missing/invalid/wrong size. Creating default map.")
                       kitti_cats = ['Car', 'Pedestrian', 'Cyclist']; assert NUM_KITTI_CLASSES == len(kitti_cats), "Class num mismatch"
                       kitti_label2id = {name: i for i, name in enumerate(kitti_cats)}
                  print(f"Using label2id map for dataset: {kitti_label2id}")
                  assert 'image_processor' in locals() and image_processor is not None, "Image processor not loaded."
                  # --- End label2id setup ---

                  train_dataset = KittiObjectDetectionDataset(train_image_files, annotation_dir, image_processor, kitti_label2id)
                  val_dataset = KittiObjectDetectionDataset(val_image_files, annotation_dir, image_processor, kitti_label2id)

                  # --- Define the Custom Collator (Revised) ---
                  def custom_object_detection_collator(batch):
                      batch = [item for item in batch if item is not None]
                      if not batch: return None
                      pixel_values = [item["pixel_values"] for item in batch]
                      labels = [item["labels"] for item in batch]
                      try:
                          # --- Pass list directly to pad ---
                          batch_encoding = image_processor.pad(
                              pixel_values,       # Pass the list directly
                              return_tensors="pt"
                          )
                          # --- End Change ---
                      except Exception as e_pad:
                          print(f"Error during image_processor.pad: {e_pad}")
                          shapes = [pv.shape for pv in pixel_values]
                          print(f"  Shapes of tensors passed to pad: {shapes}")
                          return None # Skip batch if padding fails
                      # Combine padded values/mask with the original list of label dicts
                      batch_encoding['labels'] = labels
                      return batch_encoding
                  # --- End Custom Collator Definition ---

                  print("Using custom object detection collator.")
                  print("Creating dataloaders (num_workers=0)...")
                  train_dataloader = DataLoader(train_dataset, collate_fn=custom_object_detection_collator, batch_size=FINE_TUNE_BATCH_SIZE, shuffle=True, num_workers=0)
                  val_dataloader = DataLoader(val_dataset, collate_fn=custom_object_detection_collator, batch_size=FINE_TUNE_BATCH_SIZE * 2, shuffle=False, num_workers=0)
                  print("Dataloaders created.")

             except Exception as e_load:
                  print(f"ERROR creating Datasets/Dataloaders: {e_load}")
                  traceback.print_exc(); DO_FINE_TUNING = False

# --- Load COCO GT data ---
if os.path.exists(coco_annotation_file):
     try: print(f"\nLoading COCO ground truth for mAP evaluation from: {coco_annotation_file}"); coco_gt = COCO(coco_annotation_file); print("COCO GT loaded.")
     except Exception as e_coco: print(f"ERROR loading COCO annotations file '{coco_annotation_file}': {e_coco}"); coco_gt = None
else: print(f"\nCOCO annotation file for evaluation not found at: {coco_annotation_file}"); coco_gt = None
# --- End COCO load ---

if not DO_FINE_TUNING: print("\nFine-tuning disabled due to errors or configuration.")


--- Preparing KITTI Dataset ---

Loading COCO ground truth for mAP evaluation from: /content/drive/MyDrive/kitti_subset/annotations/instances_val2017.json
loading annotations into memory...
Done (t=0.35s)
creating index...
index created!
COCO GT loaded.

Fine-tuning disabled due to errors or configuration.


In [None]:
# =============================================================================
# Cell A: Pruning, Metrics, and Saving Loop (No Fine-tuning)
# =============================================================================
import torch
import torch.nn as nn
import copy
import gc
import os
import torch_pruning as tp
from thop import profile
from tqdm.notebook import tqdm
import traceback # Make sure traceback is imported

print("\n--- Starting Pruning & Saving Loop (No Fine-tuning) ---")

# --- Configuration for the loop ---
target_ratios = [0.1, 0.2, 0.3, 0.4, 0.5] # Example ratios to test
pruning_results = {} # Dictionary to store metrics and save paths

# --- Ensure baseline metrics and base model exist ---
assert 'model' in locals() and model is not None, "Run Cell 2 first"
assert 'original_params' in locals() and original_params > 0, "Run Cell 2 first"
assert 'original_gflops' in locals() and original_gflops > 0, "Run Cell 3 first"
assert 'dummy_input' in locals() and dummy_input is not None, "Run Cell 3 first"
assert 'image_processor' in locals() and image_processor is not None, "Run Cell 2/7 first"
assert 'output_dir' in locals(), "Run Cell 1 first"
device = next(model.parameters()).device # Get device from model
print(f"Base Model Parameters: {original_params:,}")
print(f"Base Model GFLOPs: {original_gflops:.2f}")
print(f"Using device: {device}")

# --- Loop through each target pruning ratio ---
for ratio in target_ratios:
    print(f"\n--- Processing Ratio: {ratio:.2f} ---")
    pruning_results[ratio] = {} # Initialize dict for this ratio
    model_pruned_copy = None # Ensure clean state

    try:
        # 1. Copy the original prepared model (already on GPU with std BN)
        print("  Creating deepcopy of base model...")
        model_pruned_copy = copy.deepcopy(model)
        model_pruned_copy.eval()
        print(f"  Copy created on {next(model_pruned_copy.parameters()).device}")

        # 2. Define Ignored Layers (on the copy)
        ignored_layers_modules = []
        print("  Identifying layers to ignore...")
        # --- Use corrected logic accessing nested 'model' ---
        if not hasattr(model_pruned_copy, 'model') or not isinstance(model_pruned_copy.model, nn.Module): raise AttributeError("Nested 'model' module not found")
        nested_model = model_pruned_copy.model
        def add_module_and_submodules(module_instance): # Simplified helper
             if module_instance is None or not isinstance(module_instance, nn.Module): return
             if module_instance not in ignored_layers_modules: ignored_layers_modules.append(module_instance)
             for submodule in module_instance.modules():
                 if submodule not in ignored_layers_modules and submodule is not module_instance: ignored_layers_modules.append(submodule)
        # Add layers to ignore
        if hasattr(nested_model, 'backbone') and hasattr(nested_model.backbone, 'conv_encoder') and hasattr(nested_model.backbone.conv_encoder, 'model'):
            timm_model = nested_model.backbone.conv_encoder.model
            if hasattr(timm_model, 'conv1'): ignored_layers_modules.append(timm_model.conv1)
            if hasattr(timm_model, 'bn1'): ignored_layers_modules.append(timm_model.bn1)
            for layer_name in ['layer1', 'layer2', 'layer3', 'layer4']:
                if hasattr(timm_model, layer_name):
                    res_layer = getattr(timm_model, layer_name)
                    if hasattr(res_layer, '__iter__'):
                        for block in res_layer:
                            if hasattr(block, 'downsample') and block.downsample is not None: add_module_and_submodules(block.downsample)
        if hasattr(model_pruned_copy, 'class_embed'): add_module_and_submodules(model_pruned_copy.class_embed)
        if hasattr(model_pruned_copy, 'bbox_embed'): add_module_and_submodules(model_pruned_copy.bbox_embed)
        if hasattr(nested_model, 'level_embed'): ignored_layers_modules.append(nested_model.level_embed)
        if hasattr(nested_model, 'input_proj'): add_module_and_submodules(nested_model.input_proj)
        ignored_layers = list(set(m for m in ignored_layers_modules if isinstance(m, nn.Module) and m is not model_pruned_copy and m is not nested_model))
        print(f"  Identified {len(ignored_layers)} unique modules to ignore.")
        # --- End ignored layer identification ---

        # 3. Define Pruner
        print(f"  Setting up Pruner for ratio {ratio:.2f}...")
        importance = tp.importance.MagnitudeImportance(p=1)
        if dummy_input.device != model_pruned_copy.device: dummy_input = dummy_input.to(model_pruned_copy.device)
        pruner = tp.pruner.MagnitudePruner( model_pruned_copy, example_inputs=dummy_input, importance=importance, pruning_ratio=ratio, ignored_layers=ignored_layers, root_module_types=[nn.Conv2d], round_to=8)

        # 4. Apply Pruning
        print("  Applying pruner.step()...")
        pruner.step()
        print("  Pruning complete.")
        model_pruned_copy.eval()

        # 5. Calculate Pruned Metrics (like Cell 5)
        print("  Calculating pruned metrics...")
        pruned_params = sum(p.numel() for p in model_pruned_copy.parameters() if p.requires_grad)
        pruning_results[ratio]['params'] = pruned_params
        pruning_results[ratio]['param_reduc%'] = (1 - pruned_params/original_params)*100 if original_params > 0 else 0
        try:
            flops, _ = profile(model_pruned_copy, inputs=(dummy_input,), verbose=False)
            pruned_gflops = flops / 1e9
            pruning_results[ratio]['gflops'] = pruned_gflops
            pruning_results[ratio]['gflop_reduc%'] = (1 - pruned_gflops/original_gflops)*100 if original_gflops > 0 else 0
            print(f"    Params: {pruned_params:,} (Reduction: {pruning_results[ratio]['param_reduc%']:.2f}%)")
            print(f"    GFLOPs: {pruned_gflops:.2f} (Reduction: {pruning_results[ratio]['gflop_reduc%']:.2f}%)")
        except Exception as e_prof:
            print(f"    Error calculating GFLOPs: {e_prof}")
            pruning_results[ratio]['gflops'] = -1.0
            pruning_results[ratio]['gflop_reduc%'] = 0.0

        # 6. Save Pruned Model Structure (like Cell 6)
        pruned_model_save_dir = os.path.join(output_dir, f"pruned_ratio_{ratio:.2f}")
        print(f"  Saving pruned structure to: {pruned_model_save_dir}")
        os.makedirs(pruned_model_save_dir, exist_ok=True)
        model_pruned_copy.save_pretrained(pruned_model_save_dir)
        if image_processor: image_processor.save_pretrained(pruned_model_save_dir)
        pruning_results[ratio]['save_path'] = pruned_model_save_dir # Store path for later loading

    except Exception as e_ratio:
        print(f"ERROR processing ratio {ratio:.2f}: {e_ratio}")
        traceback.print_exc()
        # Store error indication
        pruning_results[ratio]['params'] = -1; pruning_results[ratio]['gflops'] = -1; pruning_results[ratio]['error'] = str(e_ratio)

    finally:
        # --- Cleanup for next iteration ---
        print("  Cleaning up memory for next ratio...")
        if 'model_pruned_copy' in locals() and model_pruned_copy is not None: del model_pruned_copy
        if 'pruner' in locals(): del pruner
        if 'ignored_layers' in locals(): del ignored_layers
        if 'importance' in locals(): del importance
        if 'nested_model' in locals(): del nested_model
        gc.collect()
        if torch.cuda.is_available(): torch.cuda.empty_cache()

print("\n--- Pruning & Saving Loop Finished ---")
# Store results globally if needed by next cells
globals()['pruning_results'] = pruning_results


--- Starting Pruning & Saving Loop (No Fine-tuning) ---
Base Model Parameters: 39,878,026
Base Model GFLOPs: 204.87
Using device: cuda:0

--- Processing Ratio: 0.10 ---
  Creating deepcopy of base model...
  Copy created on cuda:0
  Identifying layers to ignore...
  Identified 35 unique modules to ignore.
  Setting up Pruner for ratio 0.10...


 Torch-Pruning will prune the last non-singleton dimension of these parameters. If you wish to change this behavior, please provide an unwrapped_parameters argument.


  Applying pruner.step()...
  Pruning complete.
  Calculating pruned metrics...
    Params: 36,535,818 (Reduction: 8.38%)
    GFLOPs: 191.19 (Reduction: 6.68%)
  Saving pruned structure to: /content/drive/MyDrive/kitti_torch_pruning_output_v1/pruned_ratio_0.10
  Cleaning up memory for next ratio...

--- Processing Ratio: 0.20 ---
  Creating deepcopy of base model...
  Copy created on cuda:0
  Identifying layers to ignore...
  Identified 35 unique modules to ignore.
  Setting up Pruner for ratio 0.20...
  Applying pruner.step()...
  Pruning complete.
  Calculating pruned metrics...
    Params: 33,827,306 (Reduction: 15.17%)
    GFLOPs: 180.16 (Reduction: 12.06%)
  Saving pruned structure to: /content/drive/MyDrive/kitti_torch_pruning_output_v1/pruned_ratio_0.20
  Cleaning up memory for next ratio...

--- Processing Ratio: 0.30 ---
  Creating deepcopy of base model...
  Copy created on cuda:0
  Identifying layers to ignore...
  Identified 35 unique modules to ignore.
  Setting up Pruner 

In [None]:
# =============================================================================
# Cell B: Evaluation Loop (Correct Loading and BN Replace)
# =============================================================================
import torch
import torch.nn as nn # Needed for BatchNorm2d
import os
import gc
from transformers import AutoModelForObjectDetection, AutoImageProcessor, AutoConfig
# --- Import Safetensors if needed ---
try:
    from safetensors.torch import load_file as load_safetensors
    SAFE_TENSORS_AVAILABLE = True
except ImportError: print("Warning: safetensors library not found."); SAFE_TENSORS_AVAILABLE = False
# --- End Import ---
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm
import traceback
# No torch_pruning import needed here anymore

# --- Import Helper Functions from Cell 1 ---
assert 'set_module_by_name' in globals(), "Helper function set_module_by_name not defined (run Cell 1)"
# --- End Import ---

print("\n--- Starting Evaluation Loop (Correct Loading and BN Replace) ---")

# --- Ensure previous results and COCO GT are available ---
assert 'pruning_results' in locals(), "Pruning results dictionary not found. Run Cell A first."
assert 'coco_gt' in locals(), "coco_gt object from Cell 7 is needed."
assert 'image_processor' in locals(), "image_processor object needed."
assert 'images_dir' in locals(), "images_dir path needed."
if 'device' not in locals(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# --- Function to Replace FrozenBN ---
# (Keep the same replace_frozen_bn function as defined in the previous response)
def replace_frozen_bn(model_to_modify):
    FROZEN_BN_TYPE = None
    try:
        from transformers.models.deformable_detr.modeling_deformable_detr import DeformableDetrFrozenBatchNorm2d
        FROZEN_BN_TYPE = DeformableDetrFrozenBatchNorm2d
    except ImportError: print("WARNING: DeformableDetrFrozenBatchNorm2d not found during replacement."); return model_to_modify, 0
    if not FROZEN_BN_TYPE: return model_to_modify, 0
    replacement_count = 0; error_count = 0
    module_list = list(model_to_modify.named_modules())
    print(f"    Replacing FrozenBN in {len(module_list)} modules...")
    modules_to_replace = []
    for name, module in module_list:
        if isinstance(module, FROZEN_BN_TYPE): modules_to_replace.append(name)
    for name in modules_to_replace:
        try:
            module = model_to_modify.get_submodule(name)
            if not isinstance(module, FROZEN_BN_TYPE): continue
            if hasattr(module, 'weight') and module.weight is not None: num_features = module.weight.shape[0]
            else: print(f"      Skipping {name} - no weight attr."); error_count += 1; continue
            current_mod_device = 'cpu';
            try: current_mod_device = next(iter(module.parameters()), torch.tensor(0)).device
            except StopIteration: pass
            new_bn = nn.BatchNorm2d(num_features, eps=1e-5, affine=True, track_running_stats=True).to(current_mod_device)
            # Copy weights *after* creating new BN, *before* replacing
            # This is essential if the loaded model already has trained stats in FrozenBN
            if hasattr(module, 'weight') and module.weight is not None and hasattr(new_bn,'weight') and new_bn.weight.shape == module.weight.shape: new_bn.weight.data.copy_(module.weight.data)
            if hasattr(module, 'bias') and module.bias is not None and hasattr(new_bn,'bias') and new_bn.bias.shape == module.bias.shape: new_bn.bias.data.copy_(module.bias.data)
            if hasattr(module, 'running_mean') and module.running_mean is not None and hasattr(new_bn,'running_mean') and new_bn.running_mean.shape == module.running_mean.shape: new_bn.running_mean.data.copy_(module.running_mean.data)
            if hasattr(module, 'running_var') and module.running_var is not None and hasattr(new_bn,'running_var') and new_bn.running_var.shape == module.running_var.shape: new_bn.running_var.data.copy_(module.running_var.data)
            if hasattr(module, 'num_batches_tracked') and module.num_batches_tracked is not None and hasattr(new_bn, 'num_batches_tracked'): new_bn.num_batches_tracked.data.copy_(module.num_batches_tracked.data)
            # Now replace
            set_module_by_name(model_to_modify, name, new_bn)
            replacement_count += 1
        except Exception as e_replace: print(f"      ERROR replacing {name}: {e_replace}"); traceback.print_exc(); error_count += 1
    print(f"    Finished replacement. Replaced: {replacement_count}, Errors: {error_count}")
    if error_count > 0: print("    WARNING: Errors occurred during BN replacement.")
    return model_to_modify, replacement_count
# --- End Replace Function ---


if coco_gt is None:
    print("COCO ground truth not loaded (coco_gt is None). Cannot calculate COCO mAP.")
    for ratio in pruning_results:
        if 'mAP' not in pruning_results[ratio]: pruning_results[ratio]['mAP'] = -1.0
        if 'mAP50' not in pruning_results[ratio]: pruning_results[ratio]['mAP50'] = -1.0
else:
    # --- Define CocoEvalDataset (as before) ---
    class CocoEvalDataset(Dataset):
         def __init__(self, coco_gt_obj, img_dir):
             self.coco=coco_gt_obj; self.img_ids=coco_gt_obj.getImgIds(); self.img_dir=img_dir
             self.img_info = coco_gt_obj.loadImgs(self.img_ids); self.id_to_path = {info['id']: os.path.join(img_dir, info['file_name']) for info in self.img_info if 'file_name' in info}
             print(f"CocoEvalDataset: Mapped {len(self.id_to_path)} image IDs to paths."); missing = [i for i in self.img_ids if i not in self.id_to_path or not os.path.exists(self.id_to_path[i])];
             if missing: print(f"  Warning: Missing files for {len(missing)} IDs (e.g., {missing[:5]})")
         def __len__(self): return len(self.img_ids)
         def __getitem__(self, idx):
             img_id = self.img_ids[idx]; path = self.id_to_path.get(img_id)
             if path and os.path.exists(path):
                 try: img = Image.open(path).convert("RGB"); target = {"image_id": img_id, "width": img.width, "height": img.height}; return img, target
                 except Exception as e: print(f"Err loading {path}: {e}"); return None
             return None
    # --- End Dataset Def ---

    sorted_ratios = sorted(pruning_results.keys())
    for ratio in sorted_ratios:
        print(f"\n--- Evaluating Ratio: {ratio:.2f} ---")
        eval_model = None
        pruning_results[ratio]['mAP'] = -1.0; pruning_results[ratio]['mAP50'] = -1.0

        if pruning_results[ratio].get('params', -1) == -1 or 'save_path' not in pruning_results[ratio]: print("  Skipping evaluation due to previous error or missing save path."); continue
        saved_model_path = pruning_results[ratio]['save_path']
        if not os.path.isdir(saved_model_path): print(f"  Saved model path not found: {saved_model_path}. Skipping."); continue

        try:
            # 1. Load pruned model structure and weights directly
            print(f"  Loading pruned model using from_pretrained: {saved_model_path}")
            # Set low_cpu_mem_usage=True if memory becomes an issue during loading large models
            eval_model = AutoModelForObjectDetection.from_pretrained(
                saved_model_path,
                # low_cpu_mem_usage=True, # Optional
                # ignore_mismatched_sizes=False # Should not be needed now
            )
            print("  Model loaded initially (may have FrozenBN).")

            # 2. Replace FrozenBN layers *after* loading the model
            print("  Replacing any FrozenBN layers in loaded model...")
            # Move to CPU for replacement safety, then back to GPU
            eval_model.cpu()
            eval_model, bn_replaced_count = replace_frozen_bn(eval_model)
            eval_model.to(device) # Move back to evaluation device
            eval_model.eval()
            print("  BN replacement attempted, model ready on device.")

            # 3. Prepare Evaluation DataLoader
            # ... (dataloader setup as before) ...
            print("  Preparing evaluation dataloader...")
            eval_dataset = CocoEvalDataset(coco_gt_obj=coco_gt, img_dir=images_dir)
            def eval_collate_fn(batch): batch = [item for item in batch if item is not None]; return batch[0] if batch else None
            eval_loader = DataLoader(eval_dataset, batch_size=1, shuffle=False, num_workers=0, collate_fn=eval_collate_fn)
            print(f"  DataLoader ready with {len(eval_dataset)} images.")


            # 4. Run Inference
            # ... (inference loop as before) ...
            coco_results_eval = []; processed_eval_count = 0
            print("  Running inference...")
            with torch.no_grad():
                for batch_data in tqdm(eval_loader, desc=f"  Evaluating Ratio {ratio:.2f}", leave=False):
                    if batch_data is None: continue
                    image, target = batch_data; image_id = target['image_id']
                    try:
                        original_size = (target['height'], target['width']); inputs = image_processor(images=image, return_tensors="pt").to(device)
                        outputs = eval_model(**inputs)
                        target_sizes = torch.tensor([original_size], device=device)
                        results_det = image_processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.1)[0]
                        boxes, scores, labels = results_det["boxes"].cpu().tolist(), results_det["scores"].cpu().tolist(), results_det["labels"].cpu().tolist()
                        for box, score, label in zip(boxes, scores, labels):
                            x_min, y_min, x_max, y_max = box; coco_bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
                            coco_results_eval.append({"image_id": image_id, "category_id": label, "bbox": coco_bbox, "score": score})
                        processed_eval_count += 1
                    except Exception as e_infer: print(f"    Infer Err (img {image_id}): {e_infer}"); traceback.print_exc(); continue
            print(f"  Processed {processed_eval_count} images for evaluation.")


            # 5. Run COCOeval API
            # ... (COCOeval logic as before) ...
            if coco_results_eval:
                 print("  Running COCO evaluation API...")
                 try:
                     coco_dt = coco_gt.loadRes(coco_results_eval); coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
                     coco_eval.evaluate(); coco_eval.accumulate(); coco_eval.summarize()
                     pruning_results[ratio]['mAP'] = coco_eval.stats[0]; pruning_results[ratio]['mAP50'] = coco_eval.stats[1]
                     print(f"    mAP: {pruning_results[ratio]['mAP']:.4f}, mAP50: {pruning_results[ratio]['mAP50']:.4f}")
                 except Exception as e_coco_api: print(f"    ERROR during COCOeval: {e_coco_api}"); traceback.print_exc()
            else: print("    No evaluation results generated.")

        except Exception as e_eval_ratio:
            print(f"  ERROR during evaluation prep/run for ratio {ratio:.2f}: {e_eval_ratio}")
            traceback.print_exc()

        finally:
            # Cleanup
            print("  Cleaning up evaluation objects...")
            if 'eval_model' in locals(): del eval_model
            if 'eval_loader' in locals(): del eval_loader
            if 'eval_dataset' in locals(): del eval_dataset
            if 'coco_dt' in locals(): del coco_dt
            if 'coco_eval' in locals(): del coco_eval
            gc.collect();
            if torch.cuda.is_available(): torch.cuda.empty_cache()

print("\n--- Evaluation Loop Finished ---")
globals()['pruning_results'] = pruning_results


--- Starting Evaluation Loop (Correct Loading and BN Replace) ---
Using device: cuda:0

--- Evaluating Ratio: 0.10 ---
  Loading pruned model using from_pretrained: /content/drive/MyDrive/kitti_torch_pruning_output_v1/pruned_ratio_0.10
  ERROR during evaluation prep/run for ratio 0.10: Error(s) in loading state_dict for DeformableDetrFrozenBatchNorm2d:
	size mismatch for bias: copying a param with shape torch.Size([56]) from checkpoint, the shape in current model is torch.Size([64]).
  Cleaning up evaluation objects...


Traceback (most recent call last):
  File "<ipython-input-10-685423b58c8b>", line 115, in <cell line: 0>
    eval_model = AutoModelForObjectDetection.from_pretrained(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py", line 571, in from_pretrained
    return model_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 279, in _wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4399, in from_pretrained
    ) = cls._load_pretrained_model(
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4833, in _load_pretrained_model
    disk_offload_index, cpu_offload_index = _load_state_dict_into_meta_model(
                       


--- Evaluating Ratio: 0.20 ---
  Loading pruned model using from_pretrained: /content/drive/MyDrive/kitti_torch_pruning_output_v1/pruned_ratio_0.20
  ERROR during evaluation prep/run for ratio 0.20: Error(s) in loading state_dict for DeformableDetrFrozenBatchNorm2d:
	size mismatch for bias: copying a param with shape torch.Size([48]) from checkpoint, the shape in current model is torch.Size([64]).
  Cleaning up evaluation objects...


Traceback (most recent call last):
  File "<ipython-input-10-685423b58c8b>", line 115, in <cell line: 0>
    eval_model = AutoModelForObjectDetection.from_pretrained(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py", line 571, in from_pretrained
    return model_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 279, in _wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4399, in from_pretrained
    ) = cls._load_pretrained_model(
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4833, in _load_pretrained_model
    disk_offload_index, cpu_offload_index = _load_state_dict_into_meta_model(
                       


--- Evaluating Ratio: 0.30 ---
  Loading pruned model using from_pretrained: /content/drive/MyDrive/kitti_torch_pruning_output_v1/pruned_ratio_0.30
  ERROR during evaluation prep/run for ratio 0.30: Error(s) in loading state_dict for DeformableDetrFrozenBatchNorm2d:
	size mismatch for bias: copying a param with shape torch.Size([40]) from checkpoint, the shape in current model is torch.Size([64]).
  Cleaning up evaluation objects...


Traceback (most recent call last):
  File "<ipython-input-10-685423b58c8b>", line 115, in <cell line: 0>
    eval_model = AutoModelForObjectDetection.from_pretrained(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py", line 571, in from_pretrained
    return model_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 279, in _wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4399, in from_pretrained
    ) = cls._load_pretrained_model(
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4833, in _load_pretrained_model
    disk_offload_index, cpu_offload_index = _load_state_dict_into_meta_model(
                       


--- Evaluating Ratio: 0.40 ---
  Loading pruned model using from_pretrained: /content/drive/MyDrive/kitti_torch_pruning_output_v1/pruned_ratio_0.40
  ERROR during evaluation prep/run for ratio 0.40: Error(s) in loading state_dict for DeformableDetrFrozenBatchNorm2d:
	size mismatch for bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).
  Cleaning up evaluation objects...


Traceback (most recent call last):
  File "<ipython-input-10-685423b58c8b>", line 115, in <cell line: 0>
    eval_model = AutoModelForObjectDetection.from_pretrained(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py", line 571, in from_pretrained
    return model_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 279, in _wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4399, in from_pretrained
    ) = cls._load_pretrained_model(
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4833, in _load_pretrained_model
    disk_offload_index, cpu_offload_index = _load_state_dict_into_meta_model(
                       


--- Evaluating Ratio: 0.50 ---
  Loading pruned model using from_pretrained: /content/drive/MyDrive/kitti_torch_pruning_output_v1/pruned_ratio_0.50
  ERROR during evaluation prep/run for ratio 0.50: Error(s) in loading state_dict for DeformableDetrFrozenBatchNorm2d:
	size mismatch for bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).
  Cleaning up evaluation objects...

--- Evaluation Loop Finished ---


Traceback (most recent call last):
  File "<ipython-input-10-685423b58c8b>", line 115, in <cell line: 0>
    eval_model = AutoModelForObjectDetection.from_pretrained(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py", line 571, in from_pretrained
    return model_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 279, in _wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4399, in from_pretrained
    ) = cls._load_pretrained_model(
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4833, in _load_pretrained_model
    disk_offload_index, cpu_offload_index = _load_state_dict_into_meta_model(
                       

In [None]:
# =============================================================================
# Cell C: Final Summary
# =============================================================================
print("\n--- Summary of Pruning Results (No Fine-tuning) ---")

# Ensure results dictionary exists
assert 'pruning_results' in locals(), "Run Cell A and Cell B first."
assert 'original_params' in locals(), "Original params not found."
assert 'original_gflops' in locals(), "Original GFLOPs not found."

print(f"Baseline Parameters: {original_params:,}")
print(f"Baseline GFLOPs: {original_gflops:.2f}")
print("-" * 85)
print(f"{'Ratio':<7} | {'Pruned Params':<15} | {'Param Reduc':<12} | {'Pruned GFLOPs':<15} | {'GFLOPs Reduc':<13} | {'mAP':<9} | {'mAP50':<9}")
print("-" * 85)

sorted_ratios = sorted(pruning_results.keys())
for ratio in sorted_ratios:
    res = pruning_results[ratio]
    if res.get('params', -1) == -1: # Check if pruning failed for this ratio
        status = res.get('error', 'Pruning/Metric Error')
        print(f"{ratio:<7.2f} | {'ERROR':<15} | {'ERROR':<12} | {'ERROR':<15} | {'ERROR':<13} | {'ERROR':<9} | {'ERROR':<9}")
        print(f"  Error msg: {status}")
    else:
        param_reduc_str = f"{res.get('param_reduc%', 0.0):.2f}%"
        gflop_str = f"{res.get('gflops', -1.0):.2f}" if res.get('gflops', -1.0) >= 0 else "Error"
        gflop_reduc_str = f"{res.get('gflop_reduc%', 0.0):.2f}%" if res.get('gflops', -1.0) >= 0 else "Error"
        map_str = f"{res.get('mAP', -1.0):.4f}" if res.get('mAP', -1.0) >= 0 else "N/A"
        map50_str = f"{res.get('mAP50', -1.0):.4f}" if res.get('mAP50', -1.0) >= 0 else "N/A"

        print(f"{ratio:<7.2f} | {res.get('params', 0):<15,} | {param_reduc_str:<12} | {gflop_str:<15} | {gflop_reduc_str:<13} | {map_str:<9} | {map50_str:<9}")

print("-" * 85)
print("\nSaved model paths:")
for ratio in sorted_ratios:
    print(f"  Ratio {ratio:.2f}: {pruning_results[ratio].get('save_path', 'N/A or Error')}")

print("\n--- End of Summary ---")

In [None]:
# =============================================================================
# Cell 8: Fine-tuning Loop
# =============================================================================
print("\n--- Fine-tuning Pruned Model ---")

final_model_saved_path = None # Initialize path for final model

if not DO_FINE_TUNING:
    print("Skipping fine-tuning (disabled).")
elif 'model_pruned' not in locals() or model_pruned is None:
     print("Skipping fine-tuning: Pruned model is not available.")
elif train_dataloader is None or val_dataloader is None:
     print("Skipping fine-tuning: Dataloaders not available.")
else:
    print(f"Starting fine-tuning for {FINE_TUNE_EPOCHS} epochs...")
    model_pruned.to(device) # Ensure model is on GPU

    # Filter parameters that require gradients (only necessary if some layers were frozen)
    params_to_optimize = filter(lambda p: p.requires_grad, model_pruned.parameters())
    optimizer = torch.optim.AdamW(params_to_optimize, lr=FINE_TUNE_LR, weight_decay=1e-4)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) # Example scheduler

    print(f"Optimizer: AdamW, LR={FINE_TUNE_LR}, Scheduler: StepLR")
    if not list(filter(lambda p: p.requires_grad, model_pruned.parameters())):
         print("WARNING: No trainable parameters found in the model!")
         # Set DO_FINE_TUNING to False maybe? Or just let it run doing nothing.

    # Training loop
    for epoch in range(FINE_TUNE_EPOCHS):
        print(f"\n--- Epoch {epoch+1}/{FINE_TUNE_EPOCHS} ---")
        model_pruned.train() # Set model to training mode
        model_pruned.to(device) # Ensure model is on the correct device at epoch start
        total_train_loss = 0
        processed_batches = 0

        progress_bar_train = tqdm(train_dataloader, desc=f"Epoch {epoch+1} Training", leave=False)
        for batch in progress_bar_train:
            if batch is None: continue # Skip bad batches

            try:
                pixel_values = batch["pixel_values"].to(device)
                # pixel_mask = batch["pixel_mask"].to(device) # Usually handled by processor or model internals
                labels = [{k: v.to(device) for k, v in t.items()} for t in batch["labels"]]

                # Forward pass
                outputs = model_pruned(pixel_values=pixel_values, pixel_mask=None, labels=labels)
                loss = outputs.loss
                loss_dict = outputs.loss_dict

                if not torch.isfinite(loss):
                     print(f"WARNING: NaN/Inf loss detected ({loss.item()}). Skipping batch.")
                     optimizer.zero_grad() # Clear potential bad grads
                     continue

                # Backward pass and optimization
                optimizer.zero_grad()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model_pruned.parameters(), max_norm=0.1)
                optimizer.step()

                total_train_loss += loss.item()
                processed_batches += 1
                progress_bar_train.set_postfix({'loss': f"{loss.item():.4f}", 'avg_loss': f"{total_train_loss/processed_batches:.4f}"})

            except Exception as e_train:
                print(f"\nERROR during training batch: {e_train}")
                traceback.print_exc()
                continue # Continue to next batch

        avg_train_loss = total_train_loss / processed_batches if processed_batches > 0 else 0
        print(f"Epoch {epoch+1} Average Training Loss: {avg_train_loss:.4f}")

        # --- Validation ---
        model_pruned.eval() # Set model to evaluation mode
        total_val_loss = 0
        val_batches = 0
        print("Running validation...")
        progress_bar_val = tqdm(val_dataloader, desc=f"Epoch {epoch+1} Validation", leave=False)
        with torch.no_grad():
            for batch in progress_bar_val:
                 if batch is None: continue
                 try:
                      pixel_values = batch["pixel_values"].to(device)
                      labels = [{k: v.to(device) for k, v in t.items()} for t in batch["labels"]]

                      outputs = model_pruned(pixel_values=pixel_values, pixel_mask=None, labels=labels)
                      loss = outputs.loss

                      if torch.isfinite(loss):
                           total_val_loss += loss.item()
                           val_batches += 1
                 except Exception as e_val:
                      print(f"\nERROR during validation batch: {e_val}")
                      continue

        avg_val_loss = total_val_loss / val_batches if val_batches > 0 else 0
        print(f"Epoch {epoch+1} Average Validation Loss: {avg_val_loss:.4f}")

        # Step the scheduler
        lr_scheduler.step()
        print(f"Epoch {epoch+1} completed. Current LR: {optimizer.param_groups[0]['lr']:.2e}")

        # --- Save Checkpoint ---
        checkpoint_saved_this_epoch = False
        try:
            # Prefer saving full model state using save_pretrained in the checkpoint dir
            ckpt_dir = os.path.join(output_dir, f"checkpoint-epoch-{epoch+1}")
            print(f"  Saving checkpoint to: {ckpt_dir}")
            model_pruned.save_pretrained(ckpt_dir)
            # Also save processor config for easy reloading
            if image_processor: image_processor.save_pretrained(ckpt_dir)
            print(f"  Checkpoint saved successfully.")
            checkpoint_saved_this_epoch = True
        except Exception as e_ckpt:
             print(f"  ERROR saving checkpoint: {e_ckpt}")
             traceback.print_exc()

    print("\n--- Fine-tuning finished ---")

    # --- Save Final Model ---
    try:
        final_save_dir = os.path.join(output_dir, "final_model")
        print(f"Saving final model to: {final_save_dir}")
        model_pruned.save_pretrained(final_save_dir)
        if image_processor: image_processor.save_pretrained(final_save_dir)
        # Update the path variable for the summary cell
        final_model_saved_path = final_save_dir
        print("Final model saved successfully.")
    except Exception as e_final_save:
        print(f"Error saving final model: {e_final_save}")
        traceback.print_exc()

# Cleanup dataloaders to free memory
del train_dataloader, val_dataloader, train_dataset, val_dataset
gc.collect()
if torch.cuda.is_available(): torch.cuda.empty_cache()
print("Cleaned up dataloaders.")


--- Fine-tuning Pruned Model ---
Starting fine-tuning for 5 epochs...
Optimizer: AdamW, LR=1e-05, Scheduler: StepLR

--- Epoch 1/5 ---


Epoch 1 Training:   0%|          | 0/879 [00:00<?, ?it/s]

Epoch 1 Average Training Loss: 9.1055
Running validation...


Epoch 1 Validation:   0%|          | 0/49 [00:00<?, ?it/s]

Epoch 1 Average Validation Loss: 0.0059
Epoch 1 completed. Current LR: 1.00e-05
  Saving checkpoint to: /content/drive/MyDrive/kitti_torch_pruning_output_v1/checkpoint-epoch-1
  Checkpoint saved successfully.

--- Epoch 2/5 ---


Epoch 2 Training:   0%|          | 0/879 [00:00<?, ?it/s]

Epoch 2 Average Training Loss: 0.0011
Running validation...


Epoch 2 Validation:   0%|          | 0/49 [00:00<?, ?it/s]

Epoch 2 Average Validation Loss: 0.0007
Epoch 2 completed. Current LR: 1.00e-05
  Saving checkpoint to: /content/drive/MyDrive/kitti_torch_pruning_output_v1/checkpoint-epoch-2
  Checkpoint saved successfully.

--- Epoch 3/5 ---


Epoch 3 Training:   0%|          | 0/879 [00:00<?, ?it/s]

Epoch 3 Average Training Loss: 0.0003
Running validation...


Epoch 3 Validation:   0%|          | 0/49 [00:00<?, ?it/s]

Epoch 3 Average Validation Loss: 0.0003
Epoch 3 completed. Current LR: 1.00e-05
  Saving checkpoint to: /content/drive/MyDrive/kitti_torch_pruning_output_v1/checkpoint-epoch-3
  Checkpoint saved successfully.

--- Epoch 4/5 ---


Epoch 4 Training:   0%|          | 0/879 [00:00<?, ?it/s]

Epoch 4 Average Training Loss: 0.0001
Running validation...


Epoch 4 Validation:   0%|          | 0/49 [00:00<?, ?it/s]

Epoch 4 Average Validation Loss: 0.0001
Epoch 4 completed. Current LR: 1.00e-05
  Saving checkpoint to: /content/drive/MyDrive/kitti_torch_pruning_output_v1/checkpoint-epoch-4
  Checkpoint saved successfully.

--- Epoch 5/5 ---


Epoch 5 Training:   0%|          | 0/879 [00:00<?, ?it/s]

Epoch 5 Average Training Loss: 0.0001
Running validation...


Epoch 5 Validation:   0%|          | 0/49 [00:00<?, ?it/s]

Epoch 5 Average Validation Loss: 0.0001
Epoch 5 completed. Current LR: 1.00e-06
  Saving checkpoint to: /content/drive/MyDrive/kitti_torch_pruning_output_v1/checkpoint-epoch-5
  Checkpoint saved successfully.

--- Fine-tuning finished ---
Saving final model to: /content/drive/MyDrive/kitti_torch_pruning_output_v1/final_model
Final model saved successfully.
Cleaned up dataloaders.


In [None]:
# =============================================================================
# Cell 9: Evaluation (mAP after Fine-tuning)
# =============================================================================
print("\n--- Evaluating mAP After Fine-tuning (Requires COCO annotations) ---\n")

# Ensure necessary variables exist
assert 'model_pruned' or 'final_model_saved_path' in locals(), "Fine-tuning cell must run first or model path needed."
assert 'coco_gt' in locals(), "COCO GT object needed for evaluation (loaded in Cell 7)."
assert 'image_processor' in locals(), "Image processor needed."

# --- Decide which model to evaluate ---
eval_model = None
eval_model_source = None # For logging

# Option 1: Use the model currently in memory if fine-tuning just finished
if 'model_pruned' in locals() and model_pruned is not None and 'DO_FINE_TUNING' in locals() and DO_FINE_TUNING:
     print("Evaluating the model currently in memory (fine-tuned).")
     eval_model = model_pruned # Use the model directly
     eval_model_source = "memory (fine-tuned)"
# Option 2: Load the final saved model (if fine-tuning was done and saved)
elif 'final_model_saved_path' in locals() and final_model_saved_path and os.path.isdir(final_model_saved_path):
     print(f"Loading final saved model from: {final_model_saved_path}")
     eval_model_source = final_model_saved_path
     try:
          # Assume config and processor were saved alongside
          # config = AutoConfig.from_pretrained(final_model_saved_path) # Config might be needed if model class changed drastically
          # Re-load processor just in case it's needed and not in memory
          if 'image_processor' not in locals() or image_processor is None:
               image_processor = AutoImageProcessor.from_pretrained(final_model_saved_path)
          eval_model = AutoModelForObjectDetection.from_pretrained(final_model_saved_path) # Load config automatically
          print("Loaded final model successfully.")
     except Exception as e_load_final:
          print(f"ERROR loading final model: {e_load_final}")
          traceback.print_exc(); eval_model = None
# Option 3: Load the pruned structure (before fine-tuning, less common for eval)
elif 'output_dir' in locals() and os.path.isdir(output_dir) and os.path.exists(os.path.join(output_dir, "model.safetensors")):
    # Check if the base output directory contains a save_pretrained output (likely the pruned structure)
    pruned_structure_path = output_dir
    print(f"Loading pruned structure (before fine-tuning) from: {pruned_structure_path}")
    eval_model_source = pruned_structure_path + " (pruned only)"
    try:
         if 'image_processor' not in locals() or image_processor is None:
              image_processor = AutoImageProcessor.from_pretrained(pruned_structure_path)
         eval_model = AutoModelForObjectDetection.from_pretrained(pruned_structure_path)
         print("Loaded pruned (pre-FT) model successfully.")
    except Exception as e_load_pruned:
         print(f"ERROR loading pruned model structure: {e_load_pruned}")
         traceback.print_exc(); eval_model = None
else:
     print("No fine-tuned or pruned model available in memory or standard save locations.")
     eval_model_source = "None"

# --- Proceed with Evaluation if Model and GT are available ---
map_after = -1.0 # Initialize mAP results
map_50_after = -1.0

if eval_model is None:
    print(f"Model not available for evaluation (Source attempted: {eval_model_source}).")
# Check if coco_gt object was successfully loaded in Cell 7
elif coco_gt is None:
    print("COCO ground truth annotations not loaded (coco_gt is None). Cannot calculate COCO mAP.")
    # Even if GT is missing, we might want to run inference loop just to check for errors
    # Set a flag or handle this case based on desired behavior
else:
    print(f"Preparing for mAP evaluation using model from: {eval_model_source}")
    # Ensure model is on the correct device and in eval mode
    eval_model.to(device)
    eval_model.eval()

    # --- Prepare DataLoader for COCO evaluation dataset ---
    # This requires a dataset that yields (image, target_dict_with_image_id)
    # We need to load images based on the image IDs present in coco_gt

    # Get all image IDs present in the ground truth annotations
    img_ids = coco_gt.getImgIds()
    print(f"Found {len(img_ids)} image IDs in COCO ground truth.")

    # Create a simple dataset that loads images based on COCO image IDs
    class CocoEvalDataset(Dataset):
        def __init__(self, coco_gt_obj, img_dir, transform=None):
            self.coco = coco_gt_obj
            self.img_ids = coco_gt_obj.getImgIds()
            self.img_dir = img_dir
            self.transform = transform # You might apply basic transforms if needed, but processor handles most
            self.img_info = coco_gt_obj.loadImgs(self.img_ids)
            # Create a mapping from image_id to file path (assuming filename is stored)
            self.id_to_path = {info['id']: os.path.join(img_dir, info['file_name']) for info in self.img_info if 'file_name' in info}
            print(f"  Mapped {len(self.id_to_path)} image IDs to file paths.")
            missing_files = [img_id for img_id in self.img_ids if img_id not in self.id_to_path or not os.path.exists(self.id_to_path[img_id])]
            if missing_files:
                print(f"  WARNING: Could not find image files for {len(missing_files)} image IDs (e.g., {missing_files[:5]})")


        def __len__(self):
            return len(self.img_ids)

        def __getitem__(self, idx):
            img_id = self.img_ids[idx]
            if img_id not in self.id_to_path:
                print(f"Skipping img_id {img_id} - path not found.")
                return None # Skip if path wasn't determined

            img_path = self.id_to_path[img_id]
            if not os.path.exists(img_path):
                print(f"Skipping img_id {img_id} - file not found at {img_path}")
                return None # Skip if file doesn't exist

            try:
                image = Image.open(img_path).convert("RGB")
                # Get target dictionary just for image_id (processor needs original image size)
                # The actual annotations come from coco_gt later
                target = {"image_id": img_id, "width": image.width, "height": image.height}

                # Apply transforms if any (usually processor handles this)
                if self.transform:
                    image = self.transform(image)

                return image, target # Return PIL image and target dict

            except Exception as e:
                print(f"Error loading/processing image {img_path} for id {img_id}: {e}")
                return None

    # Create the evaluation dataset instance
    # Pass the base 'images_dir' from Cell 1
    eval_dataset = CocoEvalDataset(coco_gt_obj=coco_gt, img_dir=images_dir)

    # Define a collate function for evaluation (handles None)
    def eval_collate_fn(batch):
        batch = [item for item in batch if item is not None]
        if not batch:
            return None
        # Default collator might work if dataset returns dicts, but here we return tuples
        # We process images individually in the loop anyway for simplicity with post_process
        # If batch_size > 1, more complex collation needed. Assume bs=1 for now.
        assert len(batch) == 1, "Evaluation loop assumes batch size 1 for now"
        return batch[0] # Return the single (image, target) tuple

    # Create DataLoader
    eval_batch_size = 1 # Recommended for simplicity with post_process_object_detection
    eval_loader = DataLoader(eval_dataset,
                             batch_size=eval_batch_size,
                             shuffle=False,
                             num_workers=0, # Use 0 for simplicity, especially if issues arise
                             collate_fn=eval_collate_fn)
    print(f"Created evaluation DataLoader with batch size {eval_batch_size}.")

    # --- Run Inference and Collect Results ---
    coco_results_after = []
    processed_eval_count = 0
    print("\nRunning inference for evaluation...")

    with torch.no_grad():
        for batch_data in tqdm(eval_loader, desc="Evaluating mAP"):
            if batch_data is None: continue # Skip bad batches

            image, target = batch_data # Unpack tuple for bs=1
            image_id = target['image_id'] # Get original image ID from COCO GT info

            try:
                # Preprocess image using the loaded processor
                # Important: Processor needs original image size for post-processing
                original_size = (target['height'], target['width'])
                inputs = image_processor(images=image, return_tensors="pt").to(device)

                # Model inference
                outputs = eval_model(**inputs)

                # Post-process results
                # Use the original image size, not the potentially resized/padded tensor size
                target_sizes = torch.tensor([original_size], device=device)
                results = image_processor.post_process_object_detection(
                    outputs,
                    target_sizes=target_sizes,
                    threshold=0.1 # Confidence threshold for detections
                )[0] # Get results for the first (only) image in the batch

                # Format results for COCOeval
                boxes = results["boxes"].cpu().tolist()
                scores = results["scores"].cpu().tolist()
                labels = results["labels"].cpu().tolist()

                for box, score, label in zip(boxes, scores, labels):
                    # Box is xyxy format from post_process
                    x_min, y_min, x_max, y_max = box
                    # Convert to COCO's xywh format
                    coco_bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
                    coco_results_after.append({
                        "image_id": image_id,
                        # Use the raw label ID output by the model's head
                        # Ensure this aligns with the category IDs in your coco_gt object
                        "category_id": label+1,
                        "bbox": coco_bbox,
                        "score": score,
                    })
                processed_eval_count += 1

            except Exception as e_infer:
                 print(f"\nError during inference/postprocessing for image_id {image_id}: {e_infer}")
                 traceback.print_exc()
                 continue # Skip this image

    print(f"\nProcessed {processed_eval_count} images for evaluation.")

    # --- Run COCO Evaluation ---
    if not coco_results_after:
        print("No evaluation results generated to run COCO eval.")
    else:
        print("Running COCO evaluation API...")
        try:
            # Load results into COCO API
            coco_dt = coco_gt.loadRes(coco_results_after)

            # Run evaluation
            coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
            # Configure evaluation parameters if needed (e.g., specific IoU thresholds, area ranges)
            # coco_eval.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
            # coco_eval.params.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
            # coco_eval.params.areaRngLbl = ['all', 'small', 'medium', 'large']

            coco_eval.evaluate()    # Compute per-image evaluations
            coco_eval.accumulate()  # Accumulate results over all images
            coco_eval.summarize()   # Print summary metrics

            # Extract specific metrics
            map_after = coco_eval.stats[0]  # mAP @ IoU=0.50:0.95 area=all maxDets=100
            map_50_after = coco_eval.stats[1] # mAP @ IoU=0.50 area=all maxDets=100

            print(f"\n--- mAP Results ---")
            print(f"mAP @ IoU=0.50:0.95 (AP): {map_after:.4f}")
            print(f"mAP @ IoU=0.50 (AP50):     {map_50_after:.4f}")

        except Exception as e_eval:
            print(f"ERROR during COCO evaluation API execution: {e_eval}")
            traceback.print_exc()

# Cleanup evaluation-specific objects
print("\nCleaning up evaluation objects...")
deleted_vars = []
if 'eval_model' in locals() and eval_model is not None:
    # If eval_model refers to model_pruned, avoid deleting it if needed later
    if 'model_pruned' not in locals() or eval_model is not model_pruned:
        del eval_model
        deleted_vars.append('eval_model')
    else:
        print("  Skipping deletion of eval_model (same as model_pruned).")
if 'eval_loader' in locals() and eval_loader is not None:
    del eval_loader
    deleted_vars.append('eval_loader')
if 'eval_dataset' in locals() and eval_dataset is not None:
    del eval_dataset
    deleted_vars.append('eval_dataset')
# coco_gt might be needed by Cell 10's check, so maybe don't delete here
# if 'coco_gt' in locals() and coco_gt is not None:
#    del coco_gt
#    deleted_vars.append('coco_gt')
print(f"Deleted variables: {deleted_vars}")

gc.collect()
if torch.cuda.is_available(): torch.cuda.empty_cache()


--- Evaluating mAP After Fine-tuning (Requires COCO annotations) ---

Evaluating the model currently in memory (fine-tuned).
COCO ground truth annotations not loaded (coco_gt is None). Cannot calculate COCO mAP.

Cleaning up evaluation objects...
  Skipping deletion of eval_model (same as model_pruned).
Deleted variables: []


In [None]:
# =============================================================================
# Cell 10: Final Summary
# =============================================================================
print("\n--- Final Summary ---")
print(f"Pruning Ratio Target (per layer): {GLOBAL_PRUNING_RATIO:.2f}")

# --- Use calculated metrics if available ---
print(f"\nParameters:")
if 'original_params' in locals() and original_params > 0 : print(f"  Original (Post Head Resize): {original_params:,}")
else: print("  Original: (Not calculated/available)")
if 'pruned_params' in locals() and pruned_params >= 0 : print(f"  Pruned (Torch-Pruning):      {pruned_params:,}")
else: print("  Pruned:   (Not calculated/available)")

if ('original_params' in locals() and original_params > 0 and
    'pruned_params' in locals() and pruned_params >= 0):
     reduction = (original_params - pruned_params) / original_params * 100
     print(f"  Reduction:                   {reduction:.2f}%")

print(f"\nGFLOPs:")
if 'original_gflops' in locals() and original_gflops > 0: print(f"  Original: {original_gflops:.2f}")
else: print("  Original: (Not calculated/available)")
if 'pruned_gflops' in locals() and pruned_gflops >= 0 : print(f"  Pruned:   {pruned_gflops:.2f}")
else: print("  Pruned:   (Not calculated/available)")

if ('original_gflops' in locals() and original_gflops > 0 and
    'pruned_gflops' in locals() and pruned_gflops >= 0):
     gflops_reduction = (original_gflops - pruned_gflops) / original_gflops * 100
     print(f"  Reduction: {gflops_reduction:.2f}%")

# --- Report Saved File Locations ---
print(f"\nSaved Files in: {output_dir}")
pruned_structure_dir = os.path.join(output_dir) # save_pretrained saves to the dir
final_model_dir = os.path.join(output_dir, "final_model") # Default save_pretrained dir

if os.path.isdir(pruned_structure_dir) and os.path.exists(os.path.join(pruned_structure_dir, "model.safetensors")): # Check if save_pretrained likely worked
    print(f"  Pruned Structure saved via save_pretrained in: {pruned_structure_dir}")
elif 'pruned_model_saved_path' in locals() and pruned_model_saved_path and os.path.exists(pruned_model_saved_path): # Fallback check for direct state dict save
    print(f"  Pruned Structure state_dict: {os.path.basename(pruned_model_saved_path)} ({os.path.getsize(pruned_model_saved_path)/(1024*1024):.2f} MB)")
else:
    print("  Pruned Structure: Not saved or path not found.")

if DO_FINE_TUNING and os.path.isdir(final_model_dir) and os.path.exists(os.path.join(final_model_dir, "model.safetensors")):
     print(f"  Final Fine-tuned saved via save_pretrained in: {final_model_dir}")
elif DO_FINE_TUNING:
     print(f"  Final Fine-tuned: Not saved or path not found.")
else:
     print(f"  Final Fine-tuned: Fine-tuning skipped.")

# --- Report mAP ---
print(f"\nmAP Evaluation Results:")
if 'map_after' in locals() and map_after >= 0: # Check if eval ran and produced valid result
    print(f"  mAP @ IoU=0.50:0.95 (AP): {map_after:.4f}")
    print(f"  mAP @ IoU=0.50 (AP50):     {map_50_after:.4f}")
else:
    print("  mAP evaluation not performed or failed.")
    if not os.path.exists(coco_annotation_file): print("  (COCO annotation file was missing)")
    elif coco_gt is None: print("  (COCO GT object failed to load)")


print("\n--- End of Notebook ---")


--- Final Summary ---
Pruning Ratio Target (per layer): 0.10

Parameters:
  Original (Post Head Resize): 39,878,026
  Pruned (Torch-Pruning):      36,535,818
  Reduction:                   8.38%

GFLOPs:
  Original: 204.87
  Pruned:   191.19
  Reduction: 6.68%

Saved Files in: /content/drive/MyDrive/kitti_torch_pruning_output_v1
  Pruned Structure saved via save_pretrained in: /content/drive/MyDrive/kitti_torch_pruning_output_v1
  Final Fine-tuned saved via save_pretrained in: /content/drive/MyDrive/kitti_torch_pruning_output_v1/final_model

mAP Evaluation Results:
  mAP evaluation not performed or failed.
  (COCO annotation file was missing)

--- End of Notebook ---


Shashank's changes

In [None]:
# =============================================================================
# Cell 1: Setup Environment, Mount Drive, Define Paths (MODIFIED)
# =============================================================================
import os
import sys
import torch
import gc
import copy
import glob
import random
import json
from collections import defaultdict, Counter
import traceback
import time # <--- Added for inference time measurement

print("--- Environment Setup ---")

# Set CUDA Launch Blocking (Optional but Recommended for Debugging GPU errors)
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
print("CUDA_LAUNCH_BLOCKING set to 1.")

# Check if in Colab
IN_COLAB = 'google.colab' in sys.modules

# Install necessary libraries
print("Installing required libraries...")
# Note: Installing tqdm separately as sometimes the notebook version conflicts
!pip install -q --upgrade transformers datasets accelerate evaluate timm Pillow safetensors pycocotools thop torch-pruning tqdm
print("Libraries installation attempt finished.")

# Import key libraries (do this after install)
try:
    import torch
    import torch.nn as nn
    import numpy as np
    from transformers import (
        AutoImageProcessor, AutoModelForObjectDetection, AutoConfig
    )
    import torchvision
    from tqdm.notebook import tqdm as tqdm_notebook # For notebook progress bars
    from tqdm import tqdm as tqdm_cli # For regular loops if needed
    from PIL import Image
    from torch.utils.data import Dataset, DataLoader
    import torch_pruning as tp
    from thop import profile
    from pycocotools.coco import COCO
    from pycocotools.cocoeval import COCOeval

    print("Core libraries imported successfully.")
except ImportError as e:
    print(f"ERROR: Failed to import libraries: {e}")
    print("Please check the pip install logs above.")
    raise e

# Mount Google Drive if in Colab
if IN_COLAB:
    from google.colab import drive
    if not os.path.exists('/content/drive'):
        try:
            drive.mount('/content/drive')
            print("Google Drive mounted.")
        except Exception as e_mount:
            print(f"Error mounting drive: {e_mount}")
            raise e_mount
    else:
        print("Google Drive already mounted.")
    base_drive_path = "/content/drive/MyDrive/"
else:
    base_drive_path = "./" # Adjust if running locally

# --- Configuration (MODIFIED) ---
print("\n--- Configuration ---")
# !!! Important: Adjust these paths to your actual Drive locations !!!
model_dir = os.path.join(base_drive_path, "/content/drive/MyDrive/deformable-detr-finetuned-kitti-round2") # DIR where fine-tuned model was saved
dataset_base_dir = os.path.join(base_drive_path, "kitti_subset") # Base DIR for KITTI subset
images_dir = os.path.join(dataset_base_dir, "images") # Specific image folder
# --- Make sure this points to your COCO format VALIDATION json ---
coco_annotation_file = os.path.join(dataset_base_dir, "annotations", "instances_val2017.json") # <== COCO format annotations for mAP
# --- Output directory ---
output_dir = os.path.join(base_drive_path, "kitti_torch_pruning_results_v2") # Output directory for this run

# Pruning Params
TARGET_PRUNING_RATIOS = [0.1, 0.2, 0.3, 0.4, 0.5] # Ratios to test (relative to original)

# Dataset Params (ensure these match your KITTI subset)
NUM_KITTI_CLASSES = 3 # Car, Pedestrian, Cyclist
NUM_OUTPUTS_REQUIRED = NUM_KITTI_CLASSES  # Add 1 for the background/no-object class

# Evaluation params
EVAL_BATCH_SIZE = 1 # Adjust based on GPU memory for evaluation inference speed
CONFIDENCE_THRESHOLD = 0.1 # Threshold for keeping detections during post-processing

# --- End Configuration ---

# Create output directory
os.makedirs(output_dir, exist_ok=True)
print(f"Model directory: {model_dir}")
print(f"Dataset directory: {dataset_base_dir}")
print(f"COCO Annotation file: {coco_annotation_file}")
print(f"Output directory: {output_dir}")
print(f"Target Pruning Ratios: {TARGET_PRUNING_RATIOS}")

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if not torch.cuda.is_available():
    print("WARNING: CUDA not available, running on CPU. This will be very slow.")

# Helper Functions (from original Cell 1)
def get_module_by_name(model: nn.Module, name: str) -> nn.Module:
    names = name.split('.')
    obj = model
    for n in names:
        if hasattr(obj, n): obj = getattr(obj, n)
        else:
            try: idx = int(n); obj = obj[idx]
            except (ValueError, IndexError, TypeError): raise AttributeError(f"Module part '{n}' not found in name '{name}'. Parent type: {type(obj)}")
    return obj

def set_module_by_name(model: nn.Module, name: str, new_module: nn.Module):
    names = name.split('.')
    parent_name = '.'.join(names[:-1])
    leaf_name = names[-1]
    try: parent_module = model.get_submodule(parent_name) if parent_name else model
    except AttributeError: parent_module = get_module_by_name(model, parent_name) # Fallback
    if hasattr(parent_module, leaf_name): setattr(parent_module, leaf_name, new_module)
    else:
        try: idx = int(leaf_name); parent_module[idx] = new_module
        except (ValueError, IndexError, TypeError): raise AttributeError(f"Could not set attribute or index '{leaf_name}' in parent module '{parent_name}' of type {type(parent_module)}")

print("\nSetup Complete.")

--- Environment Setup ---
CUDA_LAUNCH_BLOCKING set to 1.
Installing required libraries...
Libraries installation attempt finished.
Core libraries imported successfully.
Google Drive already mounted.

--- Configuration ---
Model directory: /content/drive/MyDrive/deformable-detr-finetuned-kitti-round2
Dataset directory: /content/drive/MyDrive/kitti_subset
COCO Annotation file: /content/drive/MyDrive/kitti_subset/annotations/instances_val2017.json
Output directory: /content/drive/MyDrive/kitti_torch_pruning_results_v2
Target Pruning Ratios: [0.1, 0.2, 0.3, 0.4, 0.5]
Using device: cuda

Setup Complete.


In [None]:
# =============================================================================
# Cell 2: Load & Prepare Base Model (Revised for 3 Classes & Correct Loading)
# =============================================================================
import torch
import torch.nn as nn
from transformers import AutoConfig, AutoModelForObjectDetection, AutoImageProcessor
import traceback
import gc

# --- Import Helper Functions from Cell 1 ---
assert 'set_module_by_name' in globals(), "Helper function set_module_by_name not defined (run Cell 1)"
assert 'NUM_OUTPUTS_REQUIRED' in locals() and NUM_OUTPUTS_REQUIRED == 3, "NUM_OUTPUTS_REQUIRED should be set to 3 in Cell 1"
assert 'model_dir' in locals(), "model_dir not defined in Cell 1"
assert 'device' in locals(), "device not defined in Cell 1"

print("\n--- Loading & Preparing Base Model (Targeting 3 Classes) ---")

prepared_base_model = None
image_processor = None
config = None
# Default values, might be overridden by loaded config
hidden_dim = 256
decoder_layers = 6
num_queries = 300

# --- Function to Replace FrozenBN (Essential for pruning/fine-tuning later) ---
def replace_frozen_bn(model_to_modify):
    FROZEN_BN_TYPE = None
    try:
        # Try importing the specific FrozenBN class
        from transformers.models.deformable_detr.modeling_deformable_detr import DeformableDetrFrozenBatchNorm2d
        FROZEN_BN_TYPE = DeformableDetrFrozenBatchNorm2d
        print("  Found DeformableDetrFrozenBatchNorm2d class.")
    except ImportError:
        print("  WARNING: DeformableDetrFrozenBatchNorm2d class not found. Cannot replace BN layers.")
        return model_to_modify, 0 # Return unmodified model if class not found

    if not FROZEN_BN_TYPE:
        return model_to_modify, 0

    replacement_count = 0
    error_count = 0
    module_list = list(model_to_modify.named_modules())
    modules_to_replace = []
    print(f"  Checking {len(module_list)} modules for FrozenBN replacement...")

    for name, module in module_list:
        if isinstance(module, FROZEN_BN_TYPE):
            modules_to_replace.append(name)

    if not modules_to_replace:
        print("  No FrozenBN layers found to replace.")
        return model_to_modify, 0

    print(f"  Found {len(modules_to_replace)} FrozenBN layers to replace.")
    # Perform replacement on CPU for safety, then move back
    original_device = next(iter(model_to_modify.parameters()), torch.tensor(0)).device # Get device robustly
    model_to_modify.cpu()
    print(f"    Moved model to CPU for BN replacement.")

    from tqdm import tqdm as tqdm_cli # Use standard tqdm for this internal loop
    for name in tqdm_cli(modules_to_replace, desc="  Replacing FrozenBN", leave=False):
        try:
            module = model_to_modify.get_submodule(name) # Use get_submodule
            if not isinstance(module, FROZEN_BN_TYPE): continue # Should not happen now

            # Check if weight exists to determine num_features
            if hasattr(module, 'weight') and module.weight is not None:
                num_features = module.weight.shape[0]
            else:
                # This might happen if the layer was somehow incomplete
                print(f"    WARNING: Skipping {name} - no weight attribute found to determine num_features.")
                error_count += 1
                continue

            # Create standard BN layer on CPU
            new_bn = nn.BatchNorm2d(num_features, eps=1e-5, affine=True, track_running_stats=True) # Created on CPU

            # Copy parameters from FrozenBN to standard BN *before* replacing
            if hasattr(module, 'weight') and module.weight is not None:
                 new_bn.weight.data.copy_(module.weight.data)
            if hasattr(module, 'bias') and module.bias is not None:
                 new_bn.bias.data.copy_(module.bias.data)
            if hasattr(module, 'running_mean') and module.running_mean is not None:
                 new_bn.running_mean.data.copy_(module.running_mean.data)
            if hasattr(module, 'running_var') and module.running_var is not None:
                 new_bn.running_var.data.copy_(module.running_var.data)
            # num_batches_tracked might not always exist or be relevant, copy if present
            if hasattr(module, 'num_batches_tracked') and module.num_batches_tracked is not None and hasattr(new_bn, 'num_batches_tracked'):
                 new_bn.num_batches_tracked.data.copy_(module.num_batches_tracked.data)

            # Replace the module using the helper function
            set_module_by_name(model_to_modify, name, new_bn)
            replacement_count += 1
        except Exception as e_replace:
            print(f"    ERROR replacing {name}: {e_replace}")
            traceback.print_exc()
            error_count += 1

    # Move back to original device
    model_to_modify.to(original_device)
    print(f"    Moved model back to {original_device}.")
    print(f"  Finished BN replacement. Replaced: {replacement_count}, Errors: {error_count}")
    if error_count > 0:
        print("  WARNING: Errors occurred during BN replacement. This might affect performance.")
    return model_to_modify, replacement_count
# --- End Replace Function ---

try:
    # 1. Load processor
    image_processor = AutoImageProcessor.from_pretrained(model_dir)
    print(f"Image processor loaded from {model_dir}")

    # 2. Load config and IMMEDIATELY ensure it has the correct number of labels (3)
    print(f"Loading config from {model_dir} and ensuring num_labels is {NUM_OUTPUTS_REQUIRED}...")
    # Define the correct 3-class label mapping expected by the KITTI fine-tuned model
    # IMPORTANT: Verify this matches the classes and order used during YOUR fine-tuning
    id2label = {0: 'Car', 1: 'Pedestrian', 2: 'Cyclist'}
    label2id = {v: k for k, v in id2label.items()}

    config = AutoConfig.from_pretrained(
        model_dir,
        num_labels=NUM_OUTPUTS_REQUIRED, # Explicitly set to 3
        id2label=id2label,               # Set correct mapping
        label2id=label2id                # Set correct mapping
    )

    # Update other parameters if needed (usually loaded correctly from config.json)
    num_queries = getattr(config, 'num_queries', 300)
    hidden_dim = getattr(config, 'd_model', 256)
    decoder_layers = getattr(config, 'decoder_layers', 6)
    print(f"Using config: num_labels={config.num_labels}, id2label={config.id2label}")
    print(f"              num_queries={num_queries}, hidden_dim={hidden_dim}, decoder_layers={decoder_layers}")


    # 3. Load model structure and weights using the CORRECTED config
    #    Use ignore_mismatched_sizes=False first. It should work now.
    print(f"Loading model weights from {model_dir}...")
    _model = AutoModelForObjectDetection.from_pretrained(
        model_dir,
        config=config,                  # Pass the corrected config
        ignore_mismatched_sizes=False   # <<< TRY THIS FIRST! Should match now.
    )
    print("Model weights loaded.")
     # If the above fails with size mismatch, something is still wrong with the checkpoint file
     # or the assumed id2label mapping. Only use ignore_mismatched_sizes=True as a last resort
     # and investigate why the checkpoint weights don't match the 3-label config.

    # 4. Verify Head Size (Optional but recommended sanity check)
    print("Verifying loaded model head size...")
    try:
        final_class_layer = None
        # Add robust checks to find the last linear layer in the classification head
        if hasattr(_model, 'class_embed'):
            if isinstance(_model.class_embed, nn.ModuleList) and len(_model.class_embed) > 0:
                 last_mod_in_list = _model.class_embed[-1]
                 if isinstance(last_mod_in_list, nn.Linear): final_class_layer = last_mod_in_list
                 elif hasattr(last_mod_in_list, 'layers') and isinstance(last_mod_in_list.layers, nn.Sequential):
                     if len(last_mod_in_list.layers) > 0 and isinstance(last_mod_in_list.layers[-1], nn.Linear): final_class_layer = last_mod_in_list.layers[-1]
            elif isinstance(_model.class_embed, nn.Linear): final_class_layer = _model.class_embed
            # Add more checks if your model structure is different

        if final_class_layer is not None:
            current_cls_outputs = final_class_layer.out_features
            print(f"  Detected {current_cls_outputs} outputs in the final classification layer.")
            if current_cls_outputs != NUM_OUTPUTS_REQUIRED:
                 # This should NOT happen if ignore_mismatched_sizes=False worked
                 print(f"  ERROR: Loaded model head size ({current_cls_outputs}) does not match required ({NUM_OUTPUTS_REQUIRED})!")
                 raise ValueError("Head size mismatch after loading with corrected config.")
            else:
                 print(f"  Head size matches required size ({NUM_OUTPUTS_REQUIRED}). Fine-tuned weights presumed loaded.")
        else:
            print("  WARNING: Could not reliably determine output size of the classification head.")

    except Exception as e_head_check:
        print(f"  Error during head size verification: {e_head_check}")
        raise e_head_check

    # 5. Replace FrozenBatchNorm2d layers (necessary for pruning)
    print("\nReplacing FrozenBatchNorm2d layers with standard BatchNorm2d...")
    _model, _ = replace_frozen_bn(_model) # Use the function defined at the start of this cell

    # 6. Move final prepared model to target device
    print(f"\nMoving final prepared base model to: {device}")
    _model.to(device)
    _model.eval()
    prepared_base_model = _model # Assign to the final variable name
    print("Prepared base model is ready on device.")

    # 7. Calculate and print parameters for the prepared model
    base_model_params = sum(p.numel() for p in prepared_base_model.parameters() if p.requires_grad)
    print(f"Prepared Base Model Parameters (trainable): {base_model_params:,}")

except Exception as e_load_prep:
    print(f"!!! ERROR during model loading/preparation: {e_load_prep} !!!")
    traceback.print_exc()
    prepared_base_model = None # Ensure it's None on error
    raise e_load_prep # Re-raise the exception to stop execution

# Cleanup intermediate model variable if it exists and differs
if '_model' in locals() and prepared_base_model is not _model:
    del _model
gc.collect()
if torch.cuda.is_available(): torch.cuda.empty_cache()

print("\n--- Base Model Preparation Complete ---")

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.



--- Loading & Preparing Base Model (Targeting 3 Classes) ---
Image processor loaded from /content/drive/MyDrive/deformable-detr-finetuned-kitti-round2
Loading config from /content/drive/MyDrive/deformable-detr-finetuned-kitti-round2 and ensuring num_labels is 3...
Using config: num_labels=3, id2label={0: 'Car', 1: 'Pedestrian', 2: 'Cyclist'}
              num_queries=300, hidden_dim=256, decoder_layers=6
Loading model weights from /content/drive/MyDrive/deformable-detr-finetuned-kitti-round2...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Model weights loaded.
Verifying loaded model head size...
  Detected 3 outputs in the final classification layer.
  Head size matches required size (3). Fine-tuned weights presumed loaded.

Replacing FrozenBatchNorm2d layers with standard BatchNorm2d...
  Found DeformableDetrFrozenBatchNorm2d class.
  Checking 425 modules for FrozenBN replacement...
  Found 53 FrozenBN layers to replace.
    Moved model to CPU for BN replacement.


                                                            

    Moved model back to cpu.
  Finished BN replacement. Replaced: 53, Errors: 0

Moving final prepared base model to: cuda




Prepared base model is ready on device.
Prepared Base Model Parameters (trainable): 39,877,769

--- Base Model Preparation Complete ---


In [None]:
# =============================================================================
# Cell 3: Load COCO GT & Define Eval Dataset/Loader (MODIFIED)
# =============================================================================
print("\n--- Loading COCO GT and Preparing Evaluation Dataloader ---")

coco_gt = None
eval_loader = None

# --- Load COCO GT data ---
if not os.path.exists(coco_annotation_file):
    print(f"ERROR: COCO annotation file for evaluation not found at: {coco_annotation_file}")
    print("Cannot calculate mAP.")
else:
    try:
        print(f"Loading COCO ground truth for mAP evaluation from: {coco_annotation_file}")
        coco_gt = COCO(coco_annotation_file)
        print("COCO GT loaded successfully.")
    except Exception as e_coco:
        print(f"ERROR loading COCO annotations file '{coco_annotation_file}': {e_coco}")
        traceback.print_exc()
        coco_gt = None

# --- Define CocoEvalDataset ---
class CocoEvalDataset(Dataset):
    def __init__(self, coco_gt_obj, img_dir):
        self.coco = coco_gt_obj
        self.img_ids = coco_gt_obj.getImgIds()
        self.img_dir = img_dir
        self.img_info = coco_gt_obj.loadImgs(self.img_ids)
        # Create a mapping from image_id to file path
        self.id_to_path = {info['id']: os.path.join(img_dir, info['file_name'])
                           for info in self.img_info if 'file_name' in info}
        print(f"  CocoEvalDataset: Mapped {len(self.id_to_path)} image IDs to file paths.")
        # Verify paths exist
        missing_files = 0
        valid_img_ids = []
        for img_id in self.img_ids:
            path = self.id_to_path.get(img_id)
            if path and os.path.exists(path):
                valid_img_ids.append(img_id)
            else:
                missing_files += 1
        if missing_files > 0:
            print(f"  WARNING: Could not find image files for {missing_files} image IDs.")
        self.img_ids = valid_img_ids # Use only IDs with existing images
        print(f"  Using {len(self.img_ids)} valid image IDs for evaluation.")

    def __len__(self):
        return len(self.img_ids)

    def __getitem__(self, idx):
        img_id = self.img_ids[idx]
        img_path = self.id_to_path[img_id] # Should exist based on init check
        try:
            image = Image.open(img_path).convert("RGB")
            # Target dict needed for post-processing size info
            target = {"image_id": img_id, "width": image.width, "height": image.height}
            return image, target
        except Exception as e:
            print(f"Error loading/processing image {img_path} for id {img_id}: {e}")
            return None # Return None if image loading fails

# --- Create Evaluation DataLoader if coco_gt loaded ---
if coco_gt:
    # Ensure image_processor is available
    assert 'image_processor' in locals() and image_processor is not None, "Image processor not loaded in Cell 2."

    eval_dataset = CocoEvalDataset(coco_gt_obj=coco_gt, img_dir=images_dir)

    # Define a collate function for evaluation (handles None)
    def eval_collate_fn(batch):
        batch = [item for item in batch if item is not None]
        if not batch: return None
        # Collate images and targets separately
        images = [item[0] for item in batch]
        targets = [item[1] for item in batch]
        # Use image_processor to pad images
        try:
            # Note: return_tensors="pt" happens inside the loop when calling processor
            # Here we just need the list of images and targets
             return images, targets
        except Exception as e_pad:
            print(f"Error during custom collate: {e_pad}")
            return None

    eval_loader = DataLoader(eval_dataset,
                             batch_size=EVAL_BATCH_SIZE, # Use config batch size
                             shuffle=False,
                             num_workers=2, # Can increase if not causing issues
                             collate_fn=eval_collate_fn,
                             pin_memory=True if device.type == 'cuda' else False)
    print(f"Created evaluation DataLoader with {len(eval_dataset)} images and batch size {EVAL_BATCH_SIZE}.")
else:
    print("Evaluation DataLoader not created as COCO GT is missing.")

gc.collect()


--- Loading COCO GT and Preparing Evaluation Dataloader ---
Loading COCO ground truth for mAP evaluation from: /content/drive/MyDrive/kitti_subset/annotations/instances_val2017.json
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
COCO GT loaded successfully.
  CocoEvalDataset: Mapped 200 image IDs to file paths.
  Using 200 valid image IDs for evaluation.
Created evaluation DataLoader with 200 images and batch size 1.


0

In [None]:
# =============================================================================
# Cell 4: Calculate Baseline Metrics (MODIFIED)
# =============================================================================
print("\n--- Calculating Baseline Metrics ---")

# Ensure base model is available
assert 'prepared_base_model' in locals() and prepared_base_model is not None, "Run Cell 2 first"
assert 'image_processor' in locals() and image_processor is not None, "Run Cell 2 first"

baseline_results = {}

# 1. Parameters (already calculated in Cell 2)
base_model_params = sum(p.numel() for p in prepared_base_model.parameters() if p.requires_grad)
baseline_results['params'] = base_model_params
print(f"Baseline Parameters: {base_model_params:,}")

# 2. GFLOPs
print("Calculating baseline GFLOPs...")
base_model_gflops = -1.0
dummy_input = None
try:
    # Determine input size (consistent logic)
    bs = 1; img_h, img_w = 800, 1333 # Default/common size
    if hasattr(image_processor, 'size') and isinstance(image_processor.size, dict):
         size_dict = image_processor.size
         if 'shortest_edge' in size_dict:
             shortest = size_dict['shortest_edge']; max_size = getattr(image_processor, 'max_size', 1333); aspect_ratio = 1333 / 800
             if shortest == 800 and max_size == 1333: img_h, img_w = 800, 1333
             else: img_h = shortest; img_w = int(shortest * aspect_ratio);
             if img_w > max_size: img_w = max_size; img_h = int(max_size / aspect_ratio)
         elif 'height' in size_dict and 'width' in size_dict: img_h = size_dict['height']; img_w = size_dict['width']
         img_h = max(img_h, 32); img_w = max(img_w, 32) # Ensure min size
    print(f"  Using dummy input size H={img_h}, W={img_w} for GFLOPs calculation")
    dummy_input = torch.randn(bs, 3, img_h, img_w, device=device)

    # Profile on GPU
    flops, params_thop = profile(prepared_base_model, inputs=(dummy_input,), verbose=False)
    base_model_gflops = flops / 1e9
    print(f"Baseline GFLOPs: {base_model_gflops:.2f}")
    baseline_results['gflops'] = base_model_gflops
except Exception as e_gflops:
    print(f"  Error calculating GFLOPs: {e_gflops}")
    baseline_results['gflops'] = -1.0
    # Keep dummy_input if created, might be needed later
    if 'dummy_input' not in locals(): dummy_input = None


# 3. COCO mAP and Inference Time
print("\nCalculating baseline mAP and Average Inference Time...")
baseline_results['mAP'] = -1.0
baseline_results['mAP50'] = -1.0
baseline_results['avg_inference_ms'] = -1.0

if coco_gt is None or eval_loader is None:
    print("  Skipping mAP/Inference Time calculation (COCO GT or Eval Loader missing).")
else:
    coco_results_baseline = []
    total_inference_time_ms = 0
    processed_images_count = 0
    inference_batches = 0

    prepared_base_model.eval() # Ensure eval mode
    with torch.no_grad():
        for batch_data in tqdm_notebook(eval_loader, desc="Baseline Evaluation"):
            if batch_data is None: continue
            images, targets = batch_data # Unpack images and targets

            try:
                # --- Batch Preprocessing ---
                inputs = image_processor(images=images, return_tensors="pt").to(device)
                original_sizes = [(t['height'], t['width']) for t in targets]
                target_sizes = torch.tensor(original_sizes, device=device)
                image_ids = [t['image_id'] for t in targets]

                # --- Timed Inference ---
                start_time = time.perf_counter()
                outputs = prepared_base_model(**inputs)
                end_time = time.perf_counter()
                total_inference_time_ms += (end_time - start_time) * 1000
                inference_batches += 1
                # --- End Timed Inference ---

                # --- Post-processing ---
                results_list = image_processor.post_process_object_detection(
                    outputs,
                    target_sizes=target_sizes,
                    threshold=CONFIDENCE_THRESHOLD
                )

                # --- Format for COCO ---
                for i, results in enumerate(results_list):
                    image_id = image_ids[i]
                    boxes = results["boxes"].cpu().tolist()
                    scores = results["scores"].cpu().tolist()
                    labels = results["labels"].cpu().tolist()
                    for box, score, label in zip(boxes, scores, labels):
                        x_min, y_min, x_max, y_max = box
                        coco_bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
                        coco_results_baseline.append({
                            "image_id": image_id,
                            "category_id": label+1,
                            "bbox": coco_bbox,
                            "score": score,
                        })
                    processed_images_count += 1

            except Exception as e_infer:
                 print(f"\nError during baseline inference/postprocessing: {e_infer}")
                 traceback.print_exc()
                 continue # Skip batch on error

    print(f"\nBaseline: Processed {processed_images_count} images over {inference_batches} batches.")

    # Calculate Average Inference Time (per batch)
    if inference_batches > 0:
        baseline_results['avg_inference_ms'] = total_inference_time_ms / inference_batches
        print(f"Baseline Average Batch Inference Time: {baseline_results['avg_inference_ms']:.2f} ms")

    # Run COCOeval API
    if not coco_results_baseline:
        print("No baseline evaluation results generated.")
    else:
        print("Running COCO evaluation API for baseline...")
        try:
            coco_dt = coco_gt.loadRes(coco_results_baseline)
            coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
            coco_eval.evaluate(); coco_eval.accumulate(); coco_eval.summarize()
            baseline_results['mAP'] = coco_eval.stats[0]
            baseline_results['mAP50'] = coco_eval.stats[1]
            print(f"Baseline mAP: {baseline_results['mAP']:.4f}, mAP50: {baseline_results['mAP50']:.4f}")
            # Cleanup eval objects
            del coco_dt, coco_eval
        except Exception as e_coco_api:
            print(f"  ERROR during baseline COCOeval: {e_coco_api}")
            traceback.print_exc()

# Store dummy_input globally if needed for pruning step
if 'dummy_input' in locals() and dummy_input is not None:
     globals()['dummy_input'] = dummy_input
gc.collect()
if torch.cuda.is_available(): torch.cuda.empty_cache()
print("\nBaseline Metric Calculation Complete.")


--- Calculating Baseline Metrics ---
Baseline Parameters: 39,877,769
Calculating baseline GFLOPs...
  Using dummy input size H=800, W=1333 for GFLOPs calculation
Baseline GFLOPs: 204.87

Calculating baseline mAP and Average Inference Time...


Baseline Evaluation:   0%|          | 0/200 [00:00<?, ?it/s]


Baseline: Processed 200 images over 200 batches.
Baseline Average Batch Inference Time: 196.28 ms
Running COCO evaluation API for baseline...
Loading and preparing results...
DONE (t=0.02s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.56s).
Accumulating evaluation results...
DONE (t=0.10s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.117
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.264
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.095
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.066
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.128
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.178
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.088
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.270
 Aver

In [None]:
# =============================================================================
# Cell 5: Pruning, Metrics Calculation (incl. GFLOPs), Evaluation, and Saving Loop (CORRECTED)
# =============================================================================
import torch
import torch.nn as nn
import copy
import gc
import os
import torch_pruning as tp
from thop import profile # Keep profile import here
from tqdm.notebook import tqdm as tqdm_notebook
# from tqdm import tqdm as tqdm_cli # Only if needed
import traceback
import time

print("\n--- Starting Pruning, Metrics Calculation, Evaluation & Saving Loop ---")

# --- Assertions and Setup ---
assert 'prepared_base_model' in locals() and prepared_base_model is not None, "Run Cell 2 first"
assert 'baseline_results' in locals(), "Run Cell 4 first to get baseline metrics"
assert 'TARGET_PRUNING_RATIOS' in locals(), "TARGET_PRUNING_RATIOS not defined in Cell 1"
assert 'image_processor' in locals() and image_processor is not None, "Image processor needed"
assert 'coco_gt' in locals(), "COCO GT object needed" # Allow None, but mAP will be skipped
assert 'eval_loader' in locals(), "Eval loader needed" # Allow None, but mAP will be skipped
assert 'output_dir' in locals(), "Output directory needed"
assert 'CONFIDENCE_THRESHOLD' in locals(), "Confidence threshold needed"
device = next(prepared_base_model.parameters()).device # Get device from model
base_model_params = baseline_results.get('params', 0) # Get baseline params
base_model_gflops = baseline_results.get('gflops', -1.0) # Get baseline GFLOPs
print(f"Using Baseline - Params: {base_model_params:,}, GFLOPs: {base_model_gflops:.2f}")

# --- Dummy Input Setup ---
if 'dummy_input' not in locals() or dummy_input is None:
    print("Recreating dummy input...")
    bs = 1; img_h, img_w = 800, 1333 # Default/common size
    if hasattr(image_processor, 'size') and isinstance(image_processor.size, dict):
         size_dict = image_processor.size
         if 'shortest_edge' in size_dict:
             shortest = size_dict['shortest_edge']; max_size = getattr(image_processor, 'max_size', 1333); aspect_ratio = 1333 / 800
             if shortest == 800 and max_size == 1333: img_h, img_w = 800, 1333
             else: img_h = shortest; img_w = int(shortest * aspect_ratio);
             if img_w > max_size: img_w = max_size; img_h = int(max_size / aspect_ratio)
         elif 'height' in size_dict and 'width' in size_dict: img_h = size_dict['height']; img_w = size_dict['width']
         img_h = max(img_h, 32); img_w = max(img_w, 32) # Ensure min size
    dummy_input = torch.randn(bs, 3, img_h, img_w, device=device)
    print(f"  Created dummy input shape: {dummy_input.shape} on {dummy_input.device}")
elif dummy_input.device != device:
    dummy_input = dummy_input.to(device)
    print(f"Moved existing dummy input to {dummy_input.device}")
# --- End Dummy Input Setup ---

pruning_results = {}

# --- Loop through each target pruning ratio ---
for ratio in TARGET_PRUNING_RATIOS:
    print(f"\n===== Processing Ratio: {ratio:.2f} =====")
    # --- Initialize variables for this iteration scope ---
    pruning_results[ratio] = {}
    model_pruned_copy = None
    pruner = None
    importance = None
    ignored_layers = None
    nested_model = None
    coco_results_pruned = None
    coco_dt_pruned = None
    coco_eval_pruned = None
    # --- End Initialization ---

    try:
        # --- 1. Deep Copy ---
        print("  1. Creating deepcopy...")
        model_pruned_copy = copy.deepcopy(prepared_base_model)
        model_pruned_copy.eval()
        print(f"     Copy created on {next(model_pruned_copy.parameters()).device}")

        # --- 2. Identify Ignored Layers ---
        print("  2. Identifying layers to ignore...")
        ignored_layers_modules = []
        if not hasattr(model_pruned_copy, 'model') or not isinstance(model_pruned_copy.model, nn.Module): raise AttributeError("Nested 'model' module not found")
        nested_model = model_pruned_copy.model
        def add_module_and_submodules(module_instance): # Simplified helper
             if module_instance is None or not isinstance(module_instance, nn.Module): return
             if module_instance not in ignored_layers_modules: ignored_layers_modules.append(module_instance)
             for submodule in module_instance.modules():
                 if submodule not in ignored_layers_modules and submodule is not module_instance: ignored_layers_modules.append(submodule)
        # Add layers to ignore... (keep the same robust logic)
        if hasattr(nested_model, 'backbone'):
            if hasattr(nested_model.backbone, 'conv_encoder') and hasattr(nested_model.backbone.conv_encoder, 'model'):
                 timm_model = nested_model.backbone.conv_encoder.model
                 if hasattr(timm_model, 'conv1'): ignored_layers_modules.append(timm_model.conv1)
                 if hasattr(timm_model, 'bn1') and isinstance(timm_model.bn1, nn.BatchNorm2d): ignored_layers_modules.append(timm_model.bn1) # Check standard BN
                 for layer_name in ['layer1', 'layer2', 'layer3', 'layer4']:
                     if hasattr(timm_model, layer_name):
                         res_layer = getattr(timm_model, layer_name)
                         if hasattr(res_layer, '__iter__'):
                             for block in res_layer:
                                 if hasattr(block, 'downsample') and block.downsample is not None: add_module_and_submodules(block.downsample)
        if hasattr(model_pruned_copy, 'class_embed'): add_module_and_submodules(model_pruned_copy.class_embed)
        if hasattr(model_pruned_copy, 'bbox_embed'): add_module_and_submodules(model_pruned_copy.bbox_embed)
        if hasattr(nested_model, 'level_embed'): ignored_layers_modules.append(nested_model.level_embed)
        if hasattr(nested_model, 'input_proj'): add_module_and_submodules(nested_model.input_proj)
        if hasattr(nested_model, 'query_position_embeddings'): ignored_layers_modules.append(nested_model.query_position_embeddings)
        if hasattr(nested_model, 'reference_points'): ignored_layers_modules.append(nested_model.reference_points)

        ignored_layers = list(set(m for m in ignored_layers_modules if isinstance(m, nn.Module) and m is not model_pruned_copy and m is not nested_model))
        print(f"     Identified {len(ignored_layers)} unique modules to ignore.")

        # --- 3. Define Pruner ---
        print(f"  3. Setting up Pruner for ratio {ratio:.2f}...")
        importance = tp.importance.MagnitudeImportance(p=1)
        if dummy_input.device != model_pruned_copy.device: dummy_input = dummy_input.to(model_pruned_copy.device)
        pruner = tp.pruner.MagnitudePruner(model_pruned_copy, example_inputs=dummy_input, importance=importance, pruning_ratio=ratio, ignored_layers=ignored_layers, root_module_types=[nn.Conv2d], round_to=8)
        print(f"     Pruner initialized.")

        # --- 4. Apply Pruning ---
        print("  4. Applying pruner.step()...")
        pruning_start_time = time.time()
        pruner.step()
        pruning_end_time = time.time()
        print(f"     Pruning step completed in {pruning_end_time - pruning_start_time:.2f} seconds.")
        model_pruned_copy.eval()

        # --- 5. Calculate Physical Metrics (Params and GFLOPs - INTEGRATED) ---
        print("  5. Calculating pruned physical metrics...")
        # Params
        pruned_params = sum(p.numel() for p in model_pruned_copy.parameters() if p.requires_grad)
        pruning_results[ratio]['params'] = pruned_params
        pruning_results[ratio]['param_reduc%'] = (1 - pruned_params / base_model_params) * 100 if base_model_params > 0 else 0
        print(f"     Pruned Params: {pruned_params:,} (Reduction: {pruning_results[ratio]['param_reduc%']:.2f}%)")

        # GFLOPs (Calculate HERE)
        pruning_results[ratio]['gflops'] = -1.0 # Initialize for this ratio
        pruning_results[ratio]['gflop_reduc%'] = 0.0
        try:
             print("     Calculating GFLOPs...")
             # Use the model_pruned_copy which is in memory with the correct structure
             # Ensure dummy_input is on the same device as the model
             if dummy_input.device != model_pruned_copy.device:
                 dummy_input = dummy_input.to(model_pruned_copy.device)

             flops, params_thop = profile(model_pruned_copy, inputs=(dummy_input,), verbose=False)
             pruned_gflops = flops / 1e9
             pruning_results[ratio]['gflops'] = pruned_gflops
             # Calculate reduction using baseline GFLOPs loaded at the start
             pruning_results[ratio]['gflop_reduc%'] = (1 - pruned_gflops / base_model_gflops) * 100 if base_model_gflops > 0 else 0
             print(f"     Pruned GFLOPs: {pruned_gflops:.2f} (Reduction: {pruning_results[ratio]['gflop_reduc%']:.2f}%)")
        except Exception as e_gflops_pruned:
             print(f"     Error calculating pruned GFLOPs: {e_gflops_pruned}")
             # traceback.print_exc() # Uncomment for detailed debug if needed

        # --- 6. Calculate Performance Metrics (mAP, Inference Time) ---
        print("  6. Calculating pruned performance metrics...")
        pruning_results[ratio]['mAP'] = -1.0; pruning_results[ratio]['mAP50'] = -1.0; pruning_results[ratio]['avg_inference_ms'] = -1.0

        if coco_gt is None or eval_loader is None:
            print("     Skipping mAP/Inference Time (COCO GT or Eval Loader missing).")
        else:
            coco_results_pruned = [] # Re-initialize list for this ratio
            total_inference_time_ms_pruned = 0; processed_images_count_pruned = 0; inference_batches_pruned = 0
            model_pruned_copy.eval()
            with torch.no_grad():
                for batch_data in tqdm_notebook(eval_loader, desc=f"Evaluating Ratio {ratio:.2f}", leave=False):
                    if batch_data is None: continue
                    images, targets = batch_data # Unpack batch
                    try:
                        # Ensure images is a list for the processor
                        if not isinstance(images, list): images = [images]
                        inputs = image_processor(images=images, return_tensors="pt").to(device)
                        original_sizes = [(t['height'], t['width']) for t in targets]
                        target_sizes = torch.tensor(original_sizes, device=device)
                        image_ids = [t['image_id'] for t in targets]

                        start_time = time.perf_counter()
                        outputs = model_pruned_copy(**inputs)
                        end_time = time.perf_counter()
                        total_inference_time_ms_pruned += (end_time - start_time) * 1000
                        inference_batches_pruned += len(images) # Count batches by number of images processed

                        results_list = image_processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=CONFIDENCE_THRESHOLD)

                        for i, results in enumerate(results_list):
                            image_id = image_ids[i]
                            boxes = results["boxes"].cpu().tolist(); scores = results["scores"].cpu().tolist(); labels = results["labels"].cpu().tolist()
                            for box, score, label in zip(boxes, scores, labels):
                                x_min, y_min, x_max, y_max = box; coco_bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
                                # ===================================
                                # <<< *** CORRECTED CATEGORY ID *** >>>
                                # ===================================
                                coco_results_pruned.append({"image_id": image_id, "category_id": label+1, "bbox": coco_bbox, "score": score})
                            processed_images_count_pruned += 1 # Count images processed
                    except Exception as e_infer_pruned:
                        print(f"\nError during pruned inference/postprocessing (img_ids: {image_ids}): {e_infer_pruned}")
                        traceback.print_exc() # Print traceback for debug
                        continue # Skip to next batch

            print(f"     Processed {processed_images_count_pruned} images over {inference_batches_pruned} batches.")
            if inference_batches_pruned > 0:
                 # Calculate per-batch average inference time
                 pruning_results[ratio]['avg_inference_ms'] = total_inference_time_ms_pruned / inference_batches_pruned
                 print(f"     Pruned Avg Batch Inference Time: {pruning_results[ratio]['avg_inference_ms']:.2f} ms")

            if not coco_results_pruned: print("     No pruned evaluation results generated.")
            else:
                print("     Running COCO evaluation API...")
                try:
                    coco_dt_pruned = coco_gt.loadRes(coco_results_pruned)
                    coco_eval_pruned = COCOeval(coco_gt, coco_dt_pruned, iouType='bbox')
                    coco_eval_pruned.evaluate(); coco_eval_pruned.accumulate(); coco_eval_pruned.summarize()
                    pruning_results[ratio]['mAP'] = coco_eval_pruned.stats[0]
                    pruning_results[ratio]['mAP50'] = coco_eval_pruned.stats[1]
                    print(f"     Pruned mAP: {pruning_results[ratio]['mAP']:.4f}, mAP50: {pruning_results[ratio]['mAP50']:.4f}")
                except Exception as e_coco_api_pruned:
                    print(f"     ERROR during pruned COCOeval: {e_coco_api_pruned}");
                    traceback.print_exc()

        # --- 7. Save Pruned Model ---
        try:
            pruned_model_save_dir = os.path.join(output_dir, f"pruned_ratio_{ratio:.2f}")
            print(f"  7. Saving pruned structure to: {pruned_model_save_dir}")
            os.makedirs(pruned_model_save_dir, exist_ok=True)
            # Use save_pretrained, which saves config and weights
            # The config will still be the *original* config, but the weights will be pruned.
            model_pruned_copy.save_pretrained(pruned_model_save_dir)
            if image_processor: image_processor.save_pretrained(pruned_model_save_dir)
            pruning_results[ratio]['save_path'] = pruned_model_save_dir
            print(f"     Saved successfully.")
        except Exception as e_save:
            print(f"     Error saving pruned model for ratio {ratio:.2f}: {e_save}")
            traceback.print_exc() # Print traceback for save errors
            pruning_results[ratio]['save_path'] = "Error"

    # --- Error Handling for the whole iteration ---
    except Exception as e_ratio:
        print(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        print(f"!! ERROR processing ratio {ratio:.2f}: {e_ratio}")
        print(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        traceback.print_exc()
        # Ensure basic error structure exists
        pruning_results[ratio] = {'error': str(e_ratio), 'params': -1, 'gflops': -1.0, 'mAP': -1.0, 'mAP50': -1.0, 'avg_inference_ms': -1.0, 'param_reduc%': 0.0, 'gflop_reduc%': 0.0, 'save_path': 'ERROR'}


    # --- Explicit Cleanup within the loop ---
    finally:
        print(f"--- Cleaning up after ratio {ratio:.2f} ---")
        # Keep the same detailed cleanup logic as before
        if 'model_pruned_copy' in locals() and model_pruned_copy is not None: del model_pruned_copy
        if 'pruner' in locals() and pruner is not None: del pruner
        if 'coco_results_pruned' in locals() and coco_results_pruned is not None: del coco_results_pruned
        if 'coco_dt_pruned' in locals() and coco_dt_pruned is not None: del coco_dt_pruned
        if 'coco_eval_pruned' in locals() and coco_eval_pruned is not None: del coco_eval_pruned
        if 'ignored_layers' in locals() and ignored_layers is not None: del ignored_layers
        if 'importance' in locals() and importance is not None: del importance
        if 'nested_model' in locals() and nested_model is not None: del nested_model
        if 'images' in locals(): del images
        if 'targets' in locals(): del targets
        if 'inputs' in locals(): del inputs
        if 'outputs' in locals(): del outputs
        if 'results_list' in locals(): del results_list
        if 'batch_data' in locals(): del batch_data

        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        print(f"--- End Cleanup for ratio {ratio:.2f} ---")


print("\n===== Pruning, Metrics, & Saving Loop Finished =====")
globals()['pruning_results'] = pruning_results
globals()['baseline_results'] = baseline_results # Ensure baseline is global for Cell 6


--- Starting Pruning, Metrics Calculation, Evaluation & Saving Loop ---
Using Baseline - Params: 39,877,769, GFLOPs: 204.87

===== Processing Ratio: 0.10 =====
  1. Creating deepcopy...
     Copy created on cuda:0
  2. Identifying layers to ignore...
     Identified 37 unique modules to ignore.
  3. Setting up Pruner for ratio 0.10...


 Torch-Pruning will prune the last non-singleton dimension of these parameters. If you wish to change this behavior, please provide an unwrapped_parameters argument.


     Pruner initialized.
  4. Applying pruner.step()...
     Pruning step completed in 0.34 seconds.
  5. Calculating pruned physical metrics...
     Pruned Params: 36,535,561 (Reduction: 8.38%)
     Calculating GFLOPs...
     Pruned GFLOPs: 191.19 (Reduction: 6.68%)
  6. Calculating pruned performance metrics...


Evaluating Ratio 0.10:   0%|          | 0/200 [00:00<?, ?it/s]

     Processed 200 images over 200 batches.
     Pruned Avg Batch Inference Time: 194.61 ms
     Running COCO evaluation API...
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.37s).
Accumulating evaluation results...
DONE (t=0.06s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.092
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.225
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.058
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.039
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.099
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.150
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.060
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.194
 Average Recall     

Evaluating Ratio 0.20:   0%|          | 0/200 [00:00<?, ?it/s]

     Processed 200 images over 200 batches.
     Pruned Avg Batch Inference Time: 195.88 ms
     Running COCO evaluation API...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.12s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.024
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.054
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.013
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.003
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.026
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.037
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.026
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.040
 Average Recall     

Evaluating Ratio 0.30:   0%|          | 0/200 [00:00<?, ?it/s]

     Processed 200 images over 200 batches.
     Pruned Avg Batch Inference Time: 193.88 ms
     Running COCO evaluation API...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.05s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     

Evaluating Ratio 0.40:   0%|          | 0/200 [00:00<?, ?it/s]

     Processed 200 images over 200 batches.
     Pruned Avg Batch Inference Time: 190.35 ms
     Running COCO evaluation API...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.13s).
Accumulating evaluation results...
DONE (t=0.04s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     

Evaluating Ratio 0.50:   0%|          | 0/200 [00:00<?, ?it/s]

     Processed 200 images over 200 batches.
     Pruned Avg Batch Inference Time: 188.51 ms
     Running COCO evaluation API...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.21s).
Accumulating evaluation results...
DONE (t=0.06s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     

In [None]:
# =============================================================================
# Cell 5.5: Calculate GFLOPs for Saved Pruned Models (NEW)
# =============================================================================
import torch
import gc
import os
from transformers import AutoModelForObjectDetection # Need this for loading
from thop import profile
from tqdm.notebook import tqdm as tqdm_notebook

print("\n--- Calculating GFLOPs for Saved Pruned Models ---")

assert 'pruning_results' in locals(), "Pruning results from Cell 5 needed."
assert 'baseline_results' in locals(), "Baseline results needed."
assert 'dummy_input' in locals() and dummy_input is not None, "Dummy input needed."
assert 'device' in locals(), "Device needed."

base_gflops = baseline_results.get('gflops', -1.0)
if base_gflops <= 0:
    print("WARNING: Baseline GFLOPs not available, cannot calculate reduction.")

for ratio in tqdm_notebook(sorted(pruning_results.keys()), desc="Calculating GFLOPs"):
    if 'error' in pruning_results[ratio] or 'save_path' not in pruning_results[ratio]:
        print(f"Skipping GFLOPs for ratio {ratio:.2f} due to previous error or missing path.")
        continue

    save_dir = pruning_results[ratio]['save_path']
    if not os.path.isdir(save_dir):
        print(f"Save directory not found for ratio {ratio:.2f}: {save_dir}. Skipping GFLOPs.")
        continue

    print(f"\nProcessing Ratio: {ratio:.2f}")
    loaded_model_for_flops = None # Ensure clean state

    try:
        # Load the saved pruned model
        print(f"  Loading model from: {save_dir}")
        # Use low_cpu_mem_usage if loading large models causes CPU OOM
        loaded_model_for_flops = AutoModelForObjectDetection.from_pretrained(
            save_dir,
            # low_cpu_mem_usage=True # Optional
        )
        loaded_model_for_flops.to(device) # Move to GPU for profiling
        loaded_model_for_flops.eval()
        print(f"  Model loaded to {device}.")

        # Calculate GFLOPs
        print("  Calculating GFLOPs...")
        # Ensure dummy input is on the correct device
        if dummy_input.device != loaded_model_for_flops.device:
             dummy_input = dummy_input.to(loaded_model_for_flops.device)

        flops, _ = profile(loaded_model_for_flops, inputs=(dummy_input,), verbose=False)
        pruned_gflops = flops / 1e9

        # Update results dictionary
        pruning_results[ratio]['gflops'] = pruned_gflops
        pruning_results[ratio]['gflop_reduc%'] = (1 - pruned_gflops / base_gflops) * 100 if base_gflops > 0 else 0
        print(f"  GFLOPs: {pruned_gflops:.2f} (Reduction: {pruning_results[ratio]['gflop_reduc%']:.2f}%)")

    except Exception as e_flops:
        print(f"  Error calculating GFLOPs for ratio {ratio:.2f}: {e_flops}")
        traceback.print_exc()
        pruning_results[ratio]['gflops'] = -1.0
        pruning_results[ratio]['gflop_reduc%'] = 0.0

    finally:
        # Cleanup for this iteration
        print("  Cleaning up loaded model...")
        if loaded_model_for_flops is not None:
            del loaded_model_for_flops
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        print("  Cleanup complete.")

print("\n--- GFLOPs Calculation Finished ---")
# Update global results
globals()['pruning_results'] = pruning_results


--- Calculating GFLOPs for Saved Pruned Models ---


Calculating GFLOPs:   0%|          | 0/5 [00:00<?, ?it/s]


Processing Ratio: 0.10
  Loading model from: /content/drive/MyDrive/kitti_torch_pruning_results_v2/pruned_ratio_0.10
  Error calculating GFLOPs for ratio 0.10: Error(s) in loading state_dict for DeformableDetrFrozenBatchNorm2d:
	size mismatch for bias: copying a param with shape torch.Size([56]) from checkpoint, the shape in current model is torch.Size([64]).
  Cleaning up loaded model...


Traceback (most recent call last):
  File "<ipython-input-6-b851656bcfae>", line 39, in <cell line: 0>
    loaded_model_for_flops = AutoModelForObjectDetection.from_pretrained(
                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py", line 571, in from_pretrained
    return model_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 279, in _wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4399, in from_pretrained
    ) = cls._load_pretrained_model(
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4833, in _load_pretrained_model
    disk_offload_index, cpu_offload_index = _load_state_dict_into_meta_model(
 

  Cleanup complete.

Processing Ratio: 0.20
  Loading model from: /content/drive/MyDrive/kitti_torch_pruning_results_v2/pruned_ratio_0.20
  Error calculating GFLOPs for ratio 0.20: Error(s) in loading state_dict for DeformableDetrFrozenBatchNorm2d:
	size mismatch for bias: copying a param with shape torch.Size([48]) from checkpoint, the shape in current model is torch.Size([64]).
  Cleaning up loaded model...


Traceback (most recent call last):
  File "<ipython-input-6-b851656bcfae>", line 39, in <cell line: 0>
    loaded_model_for_flops = AutoModelForObjectDetection.from_pretrained(
                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py", line 571, in from_pretrained
    return model_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 279, in _wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4399, in from_pretrained
    ) = cls._load_pretrained_model(
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4833, in _load_pretrained_model
    disk_offload_index, cpu_offload_index = _load_state_dict_into_meta_model(
 

  Cleanup complete.

Processing Ratio: 0.30
  Loading model from: /content/drive/MyDrive/kitti_torch_pruning_results_v2/pruned_ratio_0.30
  Error calculating GFLOPs for ratio 0.30: Error(s) in loading state_dict for DeformableDetrFrozenBatchNorm2d:
	size mismatch for bias: copying a param with shape torch.Size([40]) from checkpoint, the shape in current model is torch.Size([64]).
  Cleaning up loaded model...


Traceback (most recent call last):
  File "<ipython-input-6-b851656bcfae>", line 39, in <cell line: 0>
    loaded_model_for_flops = AutoModelForObjectDetection.from_pretrained(
                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py", line 571, in from_pretrained
    return model_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 279, in _wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4399, in from_pretrained
    ) = cls._load_pretrained_model(
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4833, in _load_pretrained_model
    disk_offload_index, cpu_offload_index = _load_state_dict_into_meta_model(
 

  Cleanup complete.

Processing Ratio: 0.40
  Loading model from: /content/drive/MyDrive/kitti_torch_pruning_results_v2/pruned_ratio_0.40
  Error calculating GFLOPs for ratio 0.40: Error(s) in loading state_dict for DeformableDetrFrozenBatchNorm2d:
	size mismatch for bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).
  Cleaning up loaded model...


Traceback (most recent call last):
  File "<ipython-input-6-b851656bcfae>", line 39, in <cell line: 0>
    loaded_model_for_flops = AutoModelForObjectDetection.from_pretrained(
                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py", line 571, in from_pretrained
    return model_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 279, in _wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4399, in from_pretrained
    ) = cls._load_pretrained_model(
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4833, in _load_pretrained_model
    disk_offload_index, cpu_offload_index = _load_state_dict_into_meta_model(
 

  Cleanup complete.

Processing Ratio: 0.50
  Loading model from: /content/drive/MyDrive/kitti_torch_pruning_results_v2/pruned_ratio_0.50
  Error calculating GFLOPs for ratio 0.50: Error(s) in loading state_dict for DeformableDetrFrozenBatchNorm2d:
	size mismatch for bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).
  Cleaning up loaded model...
  Cleanup complete.

--- GFLOPs Calculation Finished ---


Traceback (most recent call last):
  File "<ipython-input-6-b851656bcfae>", line 39, in <cell line: 0>
    loaded_model_for_flops = AutoModelForObjectDetection.from_pretrained(
                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py", line 571, in from_pretrained
    return model_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 279, in _wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4399, in from_pretrained
    ) = cls._load_pretrained_model(
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 4833, in _load_pretrained_model
    disk_offload_index, cpu_offload_index = _load_state_dict_into_meta_model(
 

In [None]:
# =============================================================================
# Cell 6: Final Summary and Save Results (MODIFIED)
# =============================================================================
import json       # <-- Added import
import pandas as pd
import os
import traceback  # <-- Added import for robust error handling

print("\n--- Final Summary and Saving Results ---")

# Ensure results dictionaries exist
assert 'baseline_results' in locals(), "Run Cell 4 first for baseline metrics."
assert 'pruning_results' in locals(), "Run Cell 5/5.5 first for pruning loop results."
assert 'output_dir' in locals(), "Output directory must be defined in Cell 1."

# --- Define Output Filenames ---
json_filename = "pruning_results_summary.json"
csv_filename = "pruning_results_summary.csv"
json_filepath = os.path.join(output_dir, json_filename)
csv_filepath = os.path.join(output_dir, csv_filename)

# --- Combine Results for JSON Saving ---
# Create a single dictionary holding both baseline and pruned results
all_results_to_save = {
    "baseline": baseline_results,
    "pruning_ratios": pruning_results # This already contains results keyed by ratio
}

# --- Save to JSON ---
print(f"\nSaving detailed results to JSON: {json_filepath}")
try:
    with open(json_filepath, 'w') as f:
        # Use indent for readability
        json.dump(all_results_to_save, f, indent=4, sort_keys=True)
    print("  JSON results saved successfully.")
except Exception as e_json:
    print(f"  ERROR saving results to JSON: {e_json}")
    traceback.print_exc()

# --- Prepare Data for CSV and Print Summary ---
print("\n--- Baseline Model Metrics (for reference) ---")
print(f"Parameters: {baseline_results.get('params', 'N/A'):,}")
print(f"GFLOPs: {baseline_results.get('gflops', -1.0):.2f}")
print(f"mAP: {baseline_results.get('mAP', -1.0):.4f}")
print(f"mAP50: {baseline_results.get('mAP50', -1.0):.4f}")
print(f"Avg Inference Time (ms/batch): {baseline_results.get('avg_inference_ms', -1.0):.2f}")
print("-" * 90)

print("\n--- Pruned Model Metrics Summary ---")
data_for_df = []

# Add baseline as the first row for comparison in the CSV/DataFrame
data_for_df.append({
    'Ratio': "Baseline",
    'Params': f"{baseline_results.get('params', 0):,}",
    'Param Reduc %': "0.00",
    'GFLOPs': f"{baseline_results.get('gflops', -1.0):.2f}",
    'GFLOP Reduc %': "0.00",
    'mAP': f"{baseline_results.get('mAP', -1.0):.4f}" if baseline_results.get('mAP', -1.0) >= 0 else "N/A",
    'mAP50': f"{baseline_results.get('mAP50', -1.0):.4f}" if baseline_results.get('mAP50', -1.0) >= 0 else "N/A",
    'Avg Inf Time (ms)': f"{baseline_results.get('avg_inference_ms', -1.0):.2f}" if baseline_results.get('avg_inference_ms', -1.0) >= 0 else "N/A",
    'Save Path': "N/A (Baseline)"
})

# Add results for each pruning ratio
sorted_ratios = sorted(pruning_results.keys())
for ratio in sorted_ratios:
    res = pruning_results[ratio]
    if 'error' in res:
         data_for_df.append({
            'Ratio': f"{ratio:.2f}",
            'Params': "ERROR", 'Param Reduc %': "ERROR",
            'GFLOPs': "ERROR", 'GFLOP Reduc %': "ERROR",
            'mAP': "ERROR", 'mAP50': "ERROR",
            'Avg Inf Time (ms)': "ERROR",
            'Save Path': res.get('save_path', 'ERROR')
         })
         # Optionally print error during summary generation
         # print(f"\nERROR details for ratio {ratio:.2f}: {res['error']}")
    elif res.get('params', -1) == -1 and res.get('gflops', -1.0) == -1.0:
        # Handle cases where loop might have skipped without explicit error key
         data_for_df.append({
            'Ratio': f"{ratio:.2f}",
            'Params': "SKIPPED", 'Param Reduc %': "SKIPPED",
            'GFLOPs': "SKIPPED", 'GFLOP Reduc %': "SKIPPED",
            'mAP': "SKIPPED", 'mAP50': "SKIPPED",
            'Avg Inf Time (ms)': "SKIPPED",
            'Save Path': res.get('save_path', 'SKIPPED')
         })
    else:
        # Format valid results
        data_for_df.append({
            'Ratio': f"{ratio:.2f}",
            'Params': f"{res.get('params', 0):,}",
            'Param Reduc %': f"{res.get('param_reduc%', 0.0):.2f}",
            'GFLOPs': f"{res.get('gflops', -1.0):.2f}" if res.get('gflops', -1.0) >= 0 else "Error",
            'GFLOP Reduc %': f"{res.get('gflop_reduc%', 0.0):.2f}" if res.get('gflops', -1.0) >= 0 else "Error",
            'mAP': f"{res.get('mAP', -1.0):.4f}" if res.get('mAP', -1.0) >= 0 else "N/A",
            'mAP50': f"{res.get('mAP50', -1.0):.4f}" if res.get('mAP50', -1.0) >= 0 else "N/A",
            'Avg Inf Time (ms)': f"{res.get('avg_inference_ms', -1.0):.2f}" if res.get('avg_inference_ms', -1.0) >= 0 else "N/A",
            'Save Path': res.get('save_path', 'N/A')
        })

# --- Create DataFrame ---
df = pd.DataFrame(data_for_df)
# Set Ratio as index AFTER creating DataFrame for better control when saving CSV
df.set_index('Ratio', inplace=True)

# --- Save to CSV ---
print(f"\nSaving results summary table to CSV: {csv_filepath}")
try:
    # index=True saves the 'Ratio' column which is now the index
    df.to_csv(csv_filepath, index=True)
    print("  CSV results saved successfully.")
except Exception as e_csv:
    print(f"  ERROR saving results to CSV: {e_csv}")
    traceback.print_exc()

# --- Print Summary Table To Output ---
print("\n--- Summary Table ---")
if not df.empty:
    # Display relevant columns - adjust columns if needed
    display_columns = ['Params', 'Param Reduc %', 'GFLOPs', 'GFLOP Reduc %', 'mAP', 'mAP50', 'Avg Inf Time (ms)']
    # Filter out columns that might not exist if all runs failed, etc.
    display_columns = [col for col in display_columns if col in df.columns]
    if display_columns:
         print(df[display_columns])
    else:
         print("No valid metric columns found to display in table.")
else:
    print("No pruning results to display.")

# --- Print Saved Model Locations ---
print("\n--- Saved Model Locations ---")
if not df.empty and 'Save Path' in df.columns:
    # Iterate through the DataFrame to print paths cleanly
    for ratio_idx, row in df.iterrows():
         print(f"Ratio {ratio_idx}: {row['Save Path']}")
else:
     print("No models were saved or results DataFrame is empty.")

print("\n--- End of Summary ---")


--- Final Summary and Saving Results ---

Saving detailed results to JSON: /content/drive/MyDrive/kitti_torch_pruning_results_v2/pruning_results_summary.json
  JSON results saved successfully.

--- Baseline Model Metrics (for reference) ---
Parameters: 39,877,769
GFLOPs: 204.87
mAP: 0.1168
mAP50: 0.2639
Avg Inference Time (ms/batch): 196.28
------------------------------------------------------------------------------------------

--- Pruned Model Metrics Summary ---

Saving results summary table to CSV: /content/drive/MyDrive/kitti_torch_pruning_results_v2/pruning_results_summary.csv
  CSV results saved successfully.

--- Summary Table ---
              Params Param Reduc %  GFLOPs GFLOP Reduc %     mAP   mAP50  \
Ratio                                                                      
Baseline  39,877,769          0.00  204.87          0.00  0.1168  0.2639   
0.10      36,535,561          8.38  191.19          6.68  0.0915  0.2250   
0.20      33,827,049         15.17  180.16    