In [1]:
import collections
import numpy as np
import torch
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import cv2

In [2]:
import models
from dataset import CocoDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, Normalizer

In [3]:
path = './data/'
train_transforms = transforms.Compose([
    Normalizer(), 
    Augmenter(), 
    Resizer()
])
val_transforms = transforms.Compose([
    Normalizer(),
    Resizer()
])

In [4]:
def collate_fn(batch):
    imgs = [s['img'] for s in batch]
    annots = [s['annot'] for s in batch]

    imgs = torch.stack([torch.from_numpy(img).permute(2, 0, 1) for img in imgs], 0)
    annots = [torch.from_numpy(annot) for annot in annots]

    return {'img': imgs, 'annot': annots}

In [5]:
trainset = CocoDataset(root=path, set_name='train2017', transform=train_transforms, fraction=0.4)
valset = CocoDataset(root=path, set_name='val2017', transform=val_transforms, fraction=0.4)

loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [6]:
sampler = AspectRatioBasedSampler(trainset, batch_size=2, drop_last=False)
sampler_val = AspectRatioBasedSampler(valset, batch_size=1, drop_last=False)

In [7]:
trainloader = DataLoader(trainset, num_workers=0, collate_fn=collater, batch_sampler=sampler, pin_memory=True)
valloader = DataLoader(valset, num_workers=0, collate_fn=collater, batch_sampler=sampler_val, pin_memory=True)

In [8]:
retinanet = models.resnet18(num_classes=trainset.num_classes())
retinanet = torch.nn.DataParallel(retinanet)

In [9]:
retinanet.training = True

In [10]:
optimizer = optim.AdamW(retinanet.parameters(), lr=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
loss_hist = collections.deque(maxlen=500)




In [11]:
num_epochs = 1

In [12]:
retinanet.train()
retinanet.module.freeze_bn()
print(f'Number of training images : {len(trainloader)}');

Number of training images : 100


In [13]:
for epoch_num in range(num_epochs):

        retinanet.train()
        retinanet.module.freeze_bn()
        epoch_loss = []

        for iter_num, data in enumerate(trainloader):
            try:
                optimizer.zero_grad(set_to_none=True)
                classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']])    
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                loss = classification_loss + regression_loss
                if bool(loss == 0):
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))
                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
                        epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
                del classification_loss
                del regression_loss
            except Exception as e:
                 print(e)
                 continue
            scheduler.step(np.mean(epoch_loss))

Epoch: 0 | Iteration: 0 | Classification loss: 1.31181 | Regression loss: 1.02456 | Running loss: 2.33637
Epoch: 0 | Iteration: 1 | Classification loss: 1.35089 | Regression loss: 1.03956 | Running loss: 2.36341
Epoch: 0 | Iteration: 2 | Classification loss: 1.24268 | Regression loss: 1.01728 | Running loss: 2.32893
Epoch: 0 | Iteration: 3 | Classification loss: 1.26288 | Regression loss: 1.04213 | Running loss: 2.32295
Epoch: 0 | Iteration: 4 | Classification loss: 1.18546 | Regression loss: 1.07306 | Running loss: 2.31006
Epoch: 0 | Iteration: 5 | Classification loss: 1.15298 | Regression loss: 1.03791 | Running loss: 2.29020
Epoch: 0 | Iteration: 6 | Classification loss: 1.00447 | Regression loss: 1.01881 | Running loss: 2.25207
Epoch: 0 | Iteration: 7 | Classification loss: 1.12124 | Regression loss: 1.01179 | Running loss: 2.23719
Epoch: 0 | Iteration: 8 | Classification loss: 1.09549 | Regression loss: 1.07933 | Running loss: 2.23026
Epoch: 0 | Iteration: 9 | Classification loss:

In [14]:
torch.save(retinanet.module.state_dict(), 'retinanet_model_1.pt')

In [15]:
from pycocotools.cocoeval import COCOeval
import json

def evaluate_coco(dataset, model, threshold=0.05):
    model.eval()
    with torch.no_grad():
        results = []
        image_ids = []

        for index in range(len(dataset)):
            data = dataset[index]
            scale = data['scale']

            img = data['img'].unsqueeze(0).permute(0, 3, 1, 2).float()

            if torch.cuda.is_available():
                scores, labels, boxes = model(img.cuda())
            else:
                scores, labels, boxes = model(img)

            scores = scores.cpu()
            labels = labels.cpu()
            boxes = boxes.cpu()

            boxes /= scale

            if boxes.shape[0] > 0:
                boxes[:, 2] -= boxes[:, 0]
                boxes[:, 3] -= boxes[:, 1]

                for box_id in range(boxes.shape[0]):
                    score = float(scores[box_id])
                    label = int(labels[box_id])
                    box = boxes[box_id, :]

                    if score < threshold:
                        break

                    image_result = {
                        'image_id': dataset.image_ids[index],
                        'category_id': dataset.label_to_coco_label(label),
                        'score': float(score),
                        'bbox': box.tolist(),
                    }
                    results.append(image_result)

            image_ids.append(dataset.image_ids[index])
            print('{}/{}'.format(index, len(dataset)), end='\r')

        if not results:
            return {"error": "No detection results"}

        json.dump(results, open(f'{dataset.set_name}_bbox_results.json', 'w'), indent=4)

        coco_true = dataset.coco
        coco_pred = coco_true.loadRes(f'{dataset.set_name}_bbox_results.json')

        coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
        coco_eval.params.imgIds = image_ids
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()

        return coco_eval.stats