In [None]:
!pip install torch torchvision torchaudio
!pip install pytorch-lightning
!pip install psutil

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models.detection import retinanet_resnet50_fpn
from torch.utils.data import DataLoader
from pytorch_lightning import Trainer, LightningModule
from pytorch_lightning.callbacks import ModelCheckpoint
import time
import psutil
import os

In [None]:
# Step 1: Prepare Dataset
from torchvision.datasets import VOCDetection

transform = transforms.Compose([
    transforms.ToTensor(),
])

train_dataset = VOCDetection(root='/content/dataset', year='2012', image_set='train', download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

test_dataset = VOCDetection(root='/content/dataset', year='2012', image_set='val', download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

Downloading http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar to /content/dataset/VOCtrainval_11-May-2012.tar


100%|██████████| 1999639040/1999639040 [01:53<00:00, 17569280.90it/s]


Extracting /content/dataset/VOCtrainval_11-May-2012.tar to /content/dataset
Using downloaded and verified file: /content/dataset/VOCtrainval_11-May-2012.tar
Extracting /content/dataset/VOCtrainval_11-May-2012.tar to /content/dataset


In [None]:
# Create a mapping from class names to indices
class_names = [
    "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
    "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"
]
class_to_idx = {class_name: idx for idx, class_name in enumerate(class_names)}
idx_to_class = {v: k for k, v in class_to_idx.items()}

In [None]:
# Function to preprocess targets
def preprocess_targets(targets, device):
    processed_targets = []
    for target in targets:
        objects = target['annotation']['object']
        if not isinstance(objects, list):
            objects = [objects]

        boxes = []
        labels = []
        for obj in objects:
            bbox = obj['bndbox']
            box = [float(bbox['xmin']), float(bbox['ymin']), float(bbox['xmax']), float(bbox['ymax'])]
            boxes.append(box)
            labels.append(class_to_idx[obj['name']])

        processed_target = {
            'boxes': torch.tensor(boxes, dtype=torch.float32).to(device),
            'labels': torch.tensor(labels, dtype=torch.int64).to(device)
        }
        processed_targets.append(processed_target)

    return processed_targets

In [None]:
# Step 2: Define the RetinaNet Model
class RetinaNetModel(LightningModule):
    def __init__(self):
        super(RetinaNetModel, self).__init__()
        self.model = retinanet_resnet50_fpn(pretrained=True)
        self.model.train()

    def forward(self, images, targets=None):
        if self.training:
            return self.model(images, targets)
        else:
            return self.model(images)

    def training_step(self, batch, batch_idx):
        images, targets = batch
        device = images[0].device
        targets = preprocess_targets(targets, device)
        loss_dict = self.model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        self.log('train_loss', losses, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return losses

    def configure_optimizers(self):
        optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        return optimizer

In [None]:
checkpoint_dir = '/content/models'
os.makedirs(checkpoint_dir, exist_ok=True)
print(f"Checkpoint directory: {checkpoint_dir}")

Checkpoint directory: /content/models


In [None]:
# Step 3: Train the Model
model = RetinaNetModel()

checkpoint_callback = ModelCheckpoint(
    monitor='train_loss',
    dirpath=checkpoint_dir,
    filename='model-{epoch:02d}-{train_loss:.2f}',
    save_top_k=1,
    mode='min'
)
print(f"Checkpoint callback set to save in: {checkpoint_callback.dirpath}")

trainer = Trainer(
    max_epochs=5,
    devices=1 if torch.cuda.is_available() else None,
    accelerator='gpu' if torch.cuda.is_available() else 'cpu',
    precision=16,  # Use mixed precision training
    callbacks=[checkpoint_callback]
)

start_time = time.time()
trainer.fit(model, train_loader)
end_time = time.time()

training_time = end_time - start_time

# Force a checkpoint save after training
print(f"Forcing checkpoint save...")
trainer.save_checkpoint(os.path.join(checkpoint_dir, "final_model.ckpt"))

# Verify checkpoint creation
print(f"Checkpoint files: {os.listdir(checkpoint_dir)}")

Downloading: "https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth" to /root/.cache/torch/hub/checkpoints/retinanet_resnet50_fpn_coco-eeacb38b.pth
100%|██████████| 130M/130M [00:00<00:00, 215MB/s]
/usr/local/lib/python3.10/dist-packages/lightning_fabric/connector.py:571: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Checkpoint callback set to save in: /content/models


INFO:pytorch_lightning.utilities.rank_zero:You are using a CUDA device ('NVIDIA L4') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type      | Params | Mode 
--------------------------------------------
0 | model | RetinaNet | 34.0 M | train
--------------------------------------------
33.8 M    Trainable params
222 K     Non-trainable params
34.0 M    Total params
136.060   Total estimated model params size (MB)
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. C

Training: |          | 0/? [00:00<?, ?it/s]

  return F.conv2d(input, weight, bias, self.stride,
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/utilities/data.py:78: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 3. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Forcing checkpoint save...
Checkpoint files: ['final_model.ckpt', 'model-epoch=00-train_loss=nan.ckpt']


In [None]:
# Step 4: Calculate Misclassifications
def calculate_misclassifications(outputs, targets):
    misclassifications = 0
    for output, target in zip(outputs, targets):
        if 'boxes' not in output or len(output['boxes']) == 0:
            # If there are no predictions, consider it a misclassification
            misclassifications += 1
            continue

        # Get the predicted class with the highest confidence
        predicted_class = output['labels'][0].item() if len(output['labels']) > 0 else -1  # Assuming the highest confidence box is the first
        # Get the ground truth class
        objects = target['annotation']['object']
        if not isinstance(objects, list):
            objects = [objects]
        ground_truth_class = objects[0]['name']

        # Convert ground truth class to corresponding label index
        ground_truth_class_idx = class_to_idx.get(ground_truth_class, -1)

        # Compare predicted class with ground truth class
        if predicted_class != ground_truth_class_idx:
            misclassifications += 1

    return misclassifications

In [None]:
def evaluate_model(model, test_loader, max_batches=10):
    model.eval()
    misclassifications = 0
    total = 0
    inference_times = []
    batch_count = 0

    for batch_idx, (images, targets) in enumerate(test_loader):
        if batch_count >= max_batches:
            break

        with torch.no_grad():
            for img, target in zip(images, targets):
                start_time = time.time()
                output = model(img.unsqueeze(0))
                end_time = time.time()
                inference_time = end_time - start_time
                inference_times.append(inference_time)

                misclassifications += calculate_misclassifications(output, [target])
                total += 1

                # Print predictions
                print(f"Batch {batch_idx+1}, Image {total}:")
                print(f"  Inference Time: {inference_time:.6f} seconds")
                print(f"  Predictions:")
                for j, box in enumerate(output[0]['boxes']):
                    label = output[0]['labels'][j].item()
                    score = output[0]['scores'][j].item()
                    print(f"   - Label: {idx_to_class[label]}, Score: {score:.4f}, Box: {box.tolist()}")

        batch_count += 1

    avg_inference_time = sum(inference_times) / len(inference_times)
    misclassification_rate = (misclassifications / total) * 100

    return misclassification_rate, avg_inference_time

misclassification_rate, avg_inference_time = evaluate_model(model, test_loader, max_batches=10)

print(f"Misclassification Rate: {misclassification_rate:.2f}%")
print(f"Average Inference Time per Prediction: {avg_inference_time:.6f} seconds")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
   - Label: chair, Score: 0.0693, Box: [29.889009475708008, 14.241961479187012, 45.313262939453125, 38.42023849487305]
   - Label: train, Score: 0.0683, Box: [175.24803161621094, 0.0, 425.13201904296875, 79.50635528564453]
   - Label: train, Score: 0.0683, Box: [0.0, 0.0, 418.7443542480469, 158.46058654785156]
   - Label: train, Score: 0.0683, Box: [0.0, 0.0, 245.0194549560547, 79.50635528564453]
   - Label: train, Score: 0.0683, Box: [85.19174194335938, 0.0, 335.07574462890625, 79.50635528564453]
   - Label: chair, Score: 0.0680, Box: [164.9734344482422, 14.241961479187012, 180.39768981933594, 38.42023849487305]
   - Label: cat, Score: 0.0674, Box: [471.9058532714844, 347.8917541503906, 500.0, 375.0]
   - Label: cat, Score: 0.0674, Box: [355.360595703125, 264.6920166015625, 500.0, 375.0]
   - Label: cat, Score: 0.0674, Box: [491.3301086425781, 361.44586181640625, 500.0, 375.0]
   - Label: cat, Score: 0.0674, Box: [200.40

In [None]:
# Step 6: Measure Resource Requirements and Storage Consumption
import os

# Measure Memory Usage
memory_usage = psutil.Process(os.getpid()).memory_info().rss / 1024 ** 2  # in MB

# Measure Model Storage Consumption
model_size = sum(os.path.getsize(os.path.join(checkpoint_dir, f)) for f in os.listdir(checkpoint_dir) if os.path.isfile(os.path.join(checkpoint_dir, f))) / 1024 ** 2  # in MB

print(f"Training Time: {training_time} seconds")
print(f"Misclassification Rate: {misclassification_rate}%")
print(f"Average Inference Time per Batch: {avg_inference_time} seconds")
print(f"Memory Usage: {memory_usage} MB")
print(f"Model Storage Consumption: {model_size} MB")

Training Time: 3353.562888622284 seconds
Misclassification Rate: 90.0%
Average Inference Time per Batch: 1.1770779490470886 seconds
Memory Usage: 6291.5390625 MB
Model Storage Consumption: 776.2984046936035 MB
