In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import os
import numpy as np

In [None]:
from google.colab import files

uploaded = files.upload()

KeyboardInterrupt: 

In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

mv: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory


In [None]:
!kaggle datasets download -d crawford/cat-dataset -p /content
!unzip -q /content/cat-dataset.zip -d /content/cat-dataset

Dataset URL: https://www.kaggle.com/datasets/crawford/cat-dataset
License(s): CC0-1.0
Downloading cat-dataset.zip to /content
100% 4.03G/4.04G [01:02<00:00, 90.4MB/s]
100% 4.04G/4.04G [01:02<00:00, 69.8MB/s]


In [None]:
class ResNet50FPN(nn.Module):
    def __init__(self, num_classes=2):
        super(ResNet50FPN, self).__init__()
        resnet = models.resnet50(pretrained=True)
        self.backbone = nn.Sequential(*list(resnet.children())[:-2])

        #FPN layers
        self.conv1 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1)
        self.conv2 = nn.Conv2d(in_channels=2048, out_channels=256, kernel_size=1)

        self.cls_head = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=3, padding=1)
        self.reg_head = nn.Conv2d(in_channels=256, out_channels=4, kernel_size=3, padding=1)

    def forward(self, x):

        c4 = None
        c5 = None
        for i, layer in enumerate(self.backbone):
            x = layer(x)
            if i == 6:
                c4 = x
            elif i == 7:
                c5 = x


        p4 = self.conv1(c4)
        p5 = self.conv2(c5)


        cls_output = self.cls_head(p4)
        reg_output = self.reg_head(p4)

        return cls_output, reg_output

In [None]:
class CatDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.image_files = [f for f in os.listdir(root_dir) if f.endswith('.jpg')]
        self.transform = transform
        self.annotation_suffix = '.jpg.cat'

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.root_dir, img_name)


        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)


        annotation_path = os.path.join(self.root_dir, img_name.replace('.jpg', self.annotation_suffix))
        with open(annotation_path, 'r') as f:
            points = list(map(int, f.readline().split()))[1:]
            points = np.array(points).reshape(-1, 2)

        #classification targets
        cls_target = np.zeros((14, 14), dtype=np.float32)
        for point in points:
            x_idx = min(point[0] // 16, 13)
            y_idx = min(point[1] // 16, 13)
            cls_target[y_idx, x_idx] = 1

        #regression targets
        reg_target = np.zeros((4, 14, 14), dtype=np.float32)
        for point in points:
            x_idx = min(point[0] // 16, 13)
            y_idx = min(point[1] // 16, 13)
            reg_target[0, y_idx, x_idx] = point[0] % 16
            reg_target[1, y_idx, x_idx] = point[1] % 16
            reg_target[2, y_idx, x_idx] = 1.0  #confidence score
            reg_target[3, y_idx, x_idx] = 1.0  #dummy value

        return image, torch.tensor(cls_target).unsqueeze(0), torch.tensor(reg_target)

In [None]:
model = ResNet50FPN(num_classes=2)
print(model)

ResNet50FPN(
  (backbone): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])


image_root_dir = '/content/cat-dataset/CAT_00'
train_loader = CatDataset(image_root_dir, transform=transform)

model = ResNet50FPN(num_classes=1)
optimizer = torch.optim.Adam(model.parameters())

num_epochs = 2
for epoch in range(num_epochs):
    for images, cls_targets, reg_targets in train_loader:
        optimizer.zero_grad()
        cls_output, reg_output = model(images.unsqueeze(0))
        cls_output = cls_output.squeeze(0)
        reg_output = reg_output.squeeze(0)

        #calculating losses
        cls_loss = F.binary_cross_entropy_with_logits(cls_output, cls_targets)
        reg_loss = F.mse_loss(reg_output, reg_targets)
        total_loss = cls_loss + reg_loss


        total_loss.backward()
        optimizer.step()

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss.item()}')

Epoch 1/2, Loss: 1.6834444999694824
Epoch 1/2, Loss: 1.544158935546875
Epoch 1/2, Loss: 1.304704189300537
Epoch 1/2, Loss: 0.8815590143203735
Epoch 1/2, Loss: 1.3183056116104126
Epoch 1/2, Loss: 1.9968407154083252
Epoch 1/2, Loss: 0.7983160614967346
Epoch 1/2, Loss: 1.2074443101882935
Epoch 1/2, Loss: 1.059779405593872
Epoch 1/2, Loss: 1.7838151454925537
Epoch 1/2, Loss: 1.4019266366958618
Epoch 1/2, Loss: 2.0159668922424316
Epoch 1/2, Loss: 1.3609296083450317
Epoch 1/2, Loss: 1.8484386205673218
Epoch 1/2, Loss: 1.60092294216156
Epoch 1/2, Loss: 0.6609963178634644
Epoch 1/2, Loss: 0.36430999636650085
Epoch 1/2, Loss: 1.8668653964996338
Epoch 1/2, Loss: 2.8026299476623535
Epoch 1/2, Loss: 2.8057408332824707
Epoch 1/2, Loss: 1.2628791332244873
Epoch 1/2, Loss: 0.5693655014038086
Epoch 1/2, Loss: 0.9784088730812073
Epoch 1/2, Loss: 1.7586456537246704
Epoch 1/2, Loss: 1.6193962097167969
Epoch 1/2, Loss: 0.7733280658721924
Epoch 1/2, Loss: 1.1087172031402588
Epoch 1/2, Loss: 0.9353039264678