In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install pandas
!pip install pycocotools
!pip install opencv-python
!pip install albumentations
!pip install torchmetrics



In [None]:
import zipfile
import os

zip_path = '/content/drive/MyDrive/datasci/drive-download-20250522T234314Z-1-001.zip'
extract_path = '/content/drive/MyDrive/datasci'

# Create the extract directory if it doesn't exist
os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print(f"Successfully unzipped {zip_path} to {extract_path}")


Successfully unzipped /content/drive/MyDrive/datasci/drive-download-20250522T234314Z-1-001.zip to /content/drive/MyDrive/datasci


In [None]:
import torchvision
from torchvision.models.detection import retinanet_resnet50_fpn_v2
import torch

model = retinanet_resnet50_fpn_v2(weights=None,weights_backbone="DEFAULT", num_classes=2)
model.load_state_dict(torch.load('/content/drive/MyDrive/datasci/coco/retina_final.pth'))
model.train()
model.to('cuda')

RetinaNet(
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      

In [None]:
import torch
from torchvision.datasets import CocoDetection
from torchvision import transforms as T
import os

class CocoDetectionRetinaNet(CocoDetection):
    def __init__(self, root, annFile, transform=None):
        super(CocoDetectionRetinaNet, self).__init__(root, annFile)
        self.transform = transform
    def __getitem__(self, idx):
        img, ann = super().__getitem__(idx)
        boxes = []
        labels = []

        for obj in ann:
            if 'iscrowd' in obj and obj['iscrowd']:
                continue
            bbox = obj['bbox']
            x1 = bbox[0]
            y1 = bbox[1]
            x2 = bbox[0] + bbox[2]
            y2 = bbox[1] + bbox[3]

            boxes.append([x1, y1, x2, y2])
            labels.append(obj['category_id'])

        if len(boxes) == 0:
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)
        else:
            boxes = torch.tensor(boxes, dtype=torch.float32)
            labels = torch.tensor(labels, dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels
        }

        if self.transform is not None:
            img = self.transform(img)

        return img, target


In [None]:
!pip install pycocotools



In [None]:
from torchvision.datasets import CocoDetection
from torchvision.transforms import ToTensor

In [None]:
train_ds = CocoDetectionRetinaNet(root='/content/drive/MyDrive/datasci/coco/train', annFile='/content/drive/MyDrive/datasci/coco/annotations/clean_instances_train.json',transform=ToTensor())
test_ds = CocoDetectionRetinaNet(root='/content/drive/MyDrive/datasci/coco/test',annFile='/content/drive/MyDrive/datasci/coco/annotations/clean_instances_test.json', transform=ToTensor())
valid_ds = CocoDetectionRetinaNet(root='/content/drive/MyDrive/datasci/coco/valid', annFile='/content/drive/MyDrive/datasci/coco/annotations/clean_instances_valid.json',transform=ToTensor())

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [None]:
from torch.utils.data import DataLoader

In [None]:
train_dal = DataLoader(train_ds,
                      batch_size=12,
                      shuffle=True,
                      collate_fn=lambda batch: tuple(zip(*batch)))

test_dal = DataLoader(test_ds,
                     batch_size=12,
                     shuffle=False,
                     collate_fn=lambda batch: tuple(zip(*batch)))

valid_dal = DataLoader(valid_ds,
                    batch_size=12,
                    shuffle=False,
                    collate_fn=lambda batch: tuple(zip(*batch)))

In [None]:
from tqdm import tqdm

In [None]:
from torchvision.models.detection import retinanet_resnet50_fpn
import torch

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 10
curr_loss = 1.1766
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, targets in tqdm(train_dal, desc=f"Epoch {epoch+1}/{num_epochs}", leave=True):
        images = [img.to('cuda') for img in images]
        targets = [{k: v.to('cuda') for k, v in t.items()} for t in targets]

        if any(t['boxes'].numel() == 0 for t in targets):
            continue

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        running_loss += losses.item()
    for images, targets in tqdm(test_dal, desc=f"Epoch {epoch+1}/{num_epochs}", leave=True):
        images = [img.to('cuda') for img in images]
        targets = [{k: v.to('cuda') for k, v in t.items()} for t in targets]

        if any(t['boxes'].numel() == 0 for t in targets):
            continue

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        running_loss += losses.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss:.4f}")
    if running_loss < curr_loss:
        print(f"Loss decreased from {curr_loss} to {running_loss}")
        curr_loss = running_loss
        torch.save(model.state_dict(), "/content/drive/MyDrive/datasci/coco/retina_finall.pth")

Epoch 1/10: 100%|██████████| 139/139 [05:02<00:00,  2.18s/it]
Epoch 1/10: 100%|██████████| 3/3 [00:06<00:00,  2.16s/it]


Epoch 1, Loss: 86.3861


Epoch 2/10: 100%|██████████| 139/139 [05:12<00:00,  2.25s/it]
Epoch 2/10: 100%|██████████| 3/3 [00:06<00:00,  2.18s/it]


Epoch 2, Loss: 73.8164


Epoch 3/10: 100%|██████████| 139/139 [05:12<00:00,  2.25s/it]
Epoch 3/10: 100%|██████████| 3/3 [00:06<00:00,  2.15s/it]


Epoch 3, Loss: 67.0479


Epoch 4/10: 100%|██████████| 139/139 [05:11<00:00,  2.24s/it]
Epoch 4/10: 100%|██████████| 3/3 [00:06<00:00,  2.15s/it]


Epoch 4, Loss: 60.8330


Epoch 5/10: 100%|██████████| 139/139 [05:13<00:00,  2.26s/it]
Epoch 5/10: 100%|██████████| 3/3 [00:06<00:00,  2.19s/it]


Epoch 5, Loss: 55.6632


Epoch 6/10: 100%|██████████| 139/139 [05:15<00:00,  2.27s/it]
Epoch 6/10: 100%|██████████| 3/3 [00:06<00:00,  2.21s/it]


Epoch 6, Loss: 51.3137


Epoch 7/10: 100%|██████████| 139/139 [05:15<00:00,  2.27s/it]
Epoch 7/10: 100%|██████████| 3/3 [00:06<00:00,  2.21s/it]


Epoch 7, Loss: 47.4622


Epoch 8/10: 100%|██████████| 139/139 [05:14<00:00,  2.26s/it]
Epoch 8/10: 100%|██████████| 3/3 [00:06<00:00,  2.16s/it]


Epoch 8, Loss: 41.5214


Epoch 9/10: 100%|██████████| 139/139 [05:11<00:00,  2.24s/it]
Epoch 9/10: 100%|██████████| 3/3 [00:06<00:00,  2.18s/it]


Epoch 9, Loss: 37.9865


Epoch 10/10: 100%|██████████| 139/139 [05:10<00:00,  2.24s/it]
Epoch 10/10: 100%|██████████| 3/3 [00:06<00:00,  2.16s/it]

Epoch 10, Loss: 35.4660





In [None]:
import torch
from torchmetrics.detection.mean_ap import MeanAveragePrecision

metric = MeanAveragePrecision(iou_type="bbox", iou_thresholds=[0.5])

model.eval()
metric.reset()

with torch.no_grad():
    for images, targets in valid_dal:
        images = [img.to('cuda') for img in images]
        targets = [{k: v.to('cuda') for k, v in t.items()} for t in targets]

        outputs = model(images)
        preds = []
        for out in outputs:
            preds.append({
                'boxes': out['boxes'].cpu(),
                'scores': out['scores'].cpu(),
                'labels': out['labels'].cpu(),
            })

        gts = []
        for t in targets:
            gts.append({
                'boxes': t['boxes'].cpu(),
                'labels': t['labels'].cpu(),
            })

        metric.update(preds, gts)
results = metric.compute()
print("mAP@0.5:", results["map_50"].item())
print("mAP@0.5-0.95:", results["map"].item())

 mAP@0.5: 0.8078113198280334
 mAP@0.5-0.95: 0.8078113198280334
