In [None]:
import torch
from torch import nn

class Classifier(nn.Module):
    def __init__(self, input_channel, num_classes):
        super(Classifier, self).__init__()
        # Conf 1
        self.classification = nn.Sequential(
            nn.Linear(input_channel, 50),
            nn.Dropout(p=0.5),
            nn.ReLU(),
            nn.Linear(50, 50),
            nn.Dropout(p=0.5),
            nn.Linear(50, 50),
            nn.Dropout(p=0.5),
            nn.ReLU(),
            nn.Linear(50, num_classes)
        )
        
        # regression layers (for bounding boxes)
        self.regression = nn.Sequential(
            nn.Linear(input_channel, 50),
            nn.Dropout(p=0.5),
            nn.ReLU(),
            nn.Linear(50, 50),
            nn.Dropout(p=0.5),
            nn.ReLU(),
            nn.Linear(50, num_classes * 4)
        )

    def forward(self, x):
        x = x.flatten(start_dim=1)
        scores = self.classification(x)
        bbox_coord = self.regression(x)
        return scores, bbox_coord

In [None]:
from torch.utils.data import Dataset
from pycocotools.coco import COCO
from PIL import Image
import os
import torchvision.transforms as T

class CocoDataset(Dataset):
    def __init__(self, annotation_file, images_file, show_bbox=False):
        self.showbbox = show_bbox
        self.coco = COCO(annotation_file)
        self.images_file = images_file 
        self.ids = list(sorted(self.coco.imgs.keys()))
        
    def __len__(self):
        return len(self.ids)

    def __getitem__(self, index):
        id_image = self.ids[index]
        
        img_name = self.coco.loadImgs(id_image)[0]["file_name"]
        img = Image.open(os.path.join(self.images_file, img_name))

        annotations_id = self.coco.getAnnIds(imgIds=id_image)
        annotations = self.coco.loadAnns(annotations_id)
        
        
        num_objs = len(annotations)

        boxes = []
        areas = []
        labels = []
        for j in range(num_objs):
            x_min = annotations[j]['bbox'][0]
            y_min = annotations[j]['bbox'][1]
            x_max = x_min + annotations[j]['bbox'][2]
            y_max = y_min + annotations[j]['bbox'][3]
            boxes.append([x_min, y_min, x_max, y_max])
            areas.append(annotations[j]['area'])
            labels.append(annotations[j]['category_id'])

        if num_objs == 0:
          boxes = torch.zeros((0, 4), dtype=torch.float32)
        else:
          boxes = torch.as_tensor(boxes, dtype=torch.float32)

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.long)
        id_image = torch.tensor([id_image])
        areas = torch.as_tensor(areas, dtype=torch.float32)
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)


        Annotations = {
            "boxes": boxes,
            "labels": labels,
            "image_id": id_image,
            "area": areas,
            "iscrowd": iscrowd
        }

        return T.ToTensor()(img), Annotations

In [None]:
import torch

annotation_file='drive/MyDrive/coco/annotations/instances_val2017.json'
images_file='drive/MyDrive/coco/val2017'
dataset = CocoDataset(annotation_file, images_file)
dataset_validation = CocoDataset(annotation_file, images_file)
dataset_test = CocoDataset(annotation_file, images_file)

torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()

dataset = torch.utils.data.Subset(dataset, indices[:300])
dataset_validation = torch.utils.data.Subset(dataset_validation, indices[500:600])
dataset_test = torch.utils.data.Subset(dataset_test, indices[600:700])

def collate_fn(batch):
    return tuple(zip(*batch))

data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=2,
    collate_fn=collate_fn)

data_loader_validation = torch.utils.data.DataLoader(
    dataset_validation, batch_size=2, shuffle=True, num_workers=2,
    collate_fn=collate_fn
)
data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=2,
    collate_fn=collate_fn)

loading annotations into memory...
Done (t=1.66s)
creating index...
index created!
loading annotations into memory...
Done (t=0.83s)
creating index...
index created!
loading annotations into memory...
Done (t=0.74s)
creating index...
index created!


In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = Classifier(in_features, num_classes)
    # model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 91

model = get_model(num_classes)
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
# optimizer = torch.optim.ADAM(params, lr=0.005,
#                            momentum=0.9, weight_decay=0.0005)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

In [None]:
'''
function for validation loss
https://stackoverflow.com/questions/71288513/how-can-i-determine-validation-loss-for-faster-rcnn-pytorch
'''
from typing import Tuple, List, Dict, Optional
import torch
from torch import Tensor
from collections import OrderedDict
from torchvision.models.detection.roi_heads import fastrcnn_loss
from torchvision.models.detection.rpn import concat_box_prediction_layers
def eval_forward(model, images, targets):
    model.eval()

    original_image_sizes: List[Tuple[int, int]] = []
    for img in images:
        val = img.shape[-2:]
        assert len(val) == 2
        original_image_sizes.append((val[0], val[1]))

    images, targets = model.transform(images, targets)
    if targets is not None:
        for target_idx, target in enumerate(targets):
            boxes = target["boxes"]
            degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]
            if degenerate_boxes.any():
                # print the first degenerate box
                bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0]
                degen_bb: List[float] = boxes[bb_idx].tolist()
                raise ValueError(
                    "All bounding boxes should have positive height and width."
                    f" Found invalid box {degen_bb} for target at index {target_idx}."
                )

    features = model.backbone(images.tensors)
    if isinstance(features, torch.Tensor):
        features = OrderedDict([("0", features)])
    model.rpn.training=True
    #####proposals, proposal_losses = model.rpn(images, features, targets)
    features_rpn = list(features.values())
    objectness, pred_bbox_deltas = model.rpn.head(features_rpn)
    anchors = model.rpn.anchor_generator(images, features_rpn)

    num_images = len(anchors)
    num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]
    num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors]
    objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness, pred_bbox_deltas)
    # apply pred_bbox_deltas to anchors to obtain the decoded proposals
    # note that we detach the deltas because Faster R-CNN do not backprop through
    # the proposals
    proposals = model.rpn.box_coder.decode(pred_bbox_deltas.detach(), anchors)
    proposals = proposals.view(num_images, -1, 4)
    proposals, scores = model.rpn.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level)

    proposal_losses = {}
    assert targets is not None
    labels, matched_gt_boxes = model.rpn.assign_targets_to_anchors(anchors, targets)
    regression_targets = model.rpn.box_coder.encode(matched_gt_boxes, anchors)
    loss_objectness, loss_rpn_box_reg = model.rpn.compute_loss(
        objectness, pred_bbox_deltas, labels, regression_targets
    )
    proposal_losses = {
        "loss_objectness": loss_objectness,
        "loss_rpn_box_reg": loss_rpn_box_reg,
    }

    #####detections, detector_losses = model.roi_heads(features, proposals, images.image_sizes, targets)
    image_shapes = images.image_sizes
    proposals, matched_idxs, labels, regression_targets = model.roi_heads.select_training_samples(proposals, targets)
    box_features = model.roi_heads.box_roi_pool(features, proposals, image_shapes)
    box_features = model.roi_heads.box_head(box_features)
    class_logits, box_regression = model.roi_heads.box_predictor(box_features)

    result: List[Dict[str, torch.Tensor]] = []
    detector_losses = {}
    loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
    detector_losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}

    return detector_losses

In [None]:
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    print(f"Epoch {epoch}")
    i = 0
    for images, targets in data_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # during training the model's output is a dictionary with losses        
        loss_dict = model(images, targets)
        # print(loss_dict)
        # loss = sum(loss for loss in loss_dict.values())
        loss = loss_dict['loss_classifier'] + loss_dict['loss_box_reg']
        # print(f"iteration: {i}\tloss: {loss}")
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # if i % 10 == 0:
        print(f"iteration: {i}\tloss: {loss}")
        i += 1

    # validation loss
    loss_validation_dict = eval_forward(model, images, targets)
    loss_validation = loss_validation_dict['loss_classifier'] + loss_validation_dict['loss_box_reg']
    print(f"validadion_loss: {loss_validation}")
    lr_scheduler.step()
    print("\n\n")

In [None]:
weights_name='weights_head-fastrcnnpredictor_backbone-resnet50_dataset-coco.pth'
torch.save(model.state_dict(), weights_name)

In [None]:
# performance
import cv2
import json
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval


# load model
# model = get_model(91)
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# cpu
# model.load_state_dict(torch.load(
#     'drive/MyDrive/weights_head-fastrcnnpredictor_backbone-resnet50_dataset-coco.pth',
#      map_location=torch.device('cpu')))

# gpu
# model.load_state_dict(torch.load(
#    '/content/drive/MyDrive/weights_head-fastrcnnpredictor_backbone-resnet50_dataset-coco.pth'))

tr = T.ToTensor()
results = []
coco_format_dict ={
    "image_id": 0,
    "category_id": 0,
    "bbox":[0, 0, 0, 0],
    "score": 0
}
i = 0
n_iter = 0
model.eval()
with open("results.json", "w") as outfile:
  outfile.write('[')
  for images, targets in data_loader_test:
    # print(f"# iter: {n_iter}")
    images = list(img.to(device) for img in images)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    if torch.cuda.is_available():
              torch.cuda.synchronize()

    predictions = model(images)

    for prediction, target in zip(predictions, targets):
      boxes = prediction["boxes"]
      labels = prediction["labels"]
      scores = prediction["scores"]

      coco_format_dict ={
          "image_id": target["image_id"].item(),
          "category_id": 0,
          "bbox":[0, 0, 0, 0],
          "score": 0
      }

      for box, label, score in zip(boxes, labels, scores):
        coco_format_dict["category_id"] = label.item()
        if score > 0.7:
          # model box format         [x_min, y_min, x_max, y_max]
          # coco box format          [x_min,  y_min,  width=x_max-x_min, height=y_max-y_min]
          coco_format_dict["bbox"] = [box[0].item(), box[1].item(), box[2].item() - box[0].item(),   box[3].item() - box[1].item()]
          coco_format_dict["score"] = score.item()
          # print(json.dumps(coco_format_dict))
          if i > 0:
            outfile.write(',')
          i += 1
          outfile.write(json.dumps(coco_format_dict))
    n_iter += 1
  outfile.write(']')

dts = json.load(open("results.json", 'r'))
imgIds = [imid['image_id'] for imid in dts]
imgIds = sorted(list(set(imgIds)))
del dts

cocoGt = COCO('drive/MyDrive/coco/annotations/instances_val2017.json')
cocoDt = cocoGt.loadRes('results.json')
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
cocoEval.params.imgIds = imgIds
# cocoEval.params.catIds = [1]
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

In [None]:
# evaluation
import cv2
from google.colab.patches import cv2_imshow

# model = get_model(91)
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# cpu
# model.load_state_dict(torch.load(
#     'drive/MyDrive/weights_head-fastrcnnpredictor_backbone-resnet50_dataset-coco.pth',
#      map_location=torch.device('cpu')))

# gpu
# model_test.load_state_dict(torch.load(
#    '/content/drive/MyDrive/weights_head-fastrcnnpredictor_backbone-resnet50_dataset-coco.pth'))

tr = T.ToTensor()

model.eval()

for images, targets in data_loader_test:
  images = list(img.to(device) for img in images)
  
  if torch.cuda.is_available():
            torch.cuda.synchronize()

  predictions = model(images)

  img_name = '0' * (12 - len(str(targets[0]['image_id'].item()))) + str(targets[0]['image_id'].item()) + '.jpg'
  img = cv2.imread('drive/MyDrive/coco/val2017/' + img_name)

  for prediction in predictions:
    boxes = prediction['boxes']
    labels = prediction['labels']
    scores = prediction['scores']
    for box, label, score in zip(boxes, labels, scores):
      if score > 0.5:
        print(box, label, score)
        cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 2)

    cv2_imshow(img)