In [1]:
import os
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as patches
from bs4 import BeautifulSoup

In [2]:
from PIL import Image
import cv2
import numpy as np
import time
import torch
import torchvision
from torch.utils.data import Dataset
from torchvision import transforms
import albumentations
import albumentations.pytorch
from matplotlib import pyplot as plt
import os
import random

In [3]:
# CUDA
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [4]:
# helper function for dataset
def generate_box(obj):
    
    xmin = float(obj.find('xmin').text)
    ymin = float(obj.find('ymin').text)
    xmax = float(obj.find('xmax').text)
    ymax = float(obj.find('ymax').text)
    
    return [xmin, ymin, xmax, ymax]

In [5]:
def generate_label(obj):
    # only have ships
    return 0 

In [6]:
def generate_target(file): 
    with open(file) as f:
        data = f.read()
        soup = BeautifulSoup(data, "xml")
        objects = soup.find_all("object")

        num_objs = len(objects)

        boxes = []
        labels = []
        for i in objects:
            boxes.append(generate_box(i))
            labels.append(generate_label(i))


        boxes = torch.as_tensor(boxes, dtype=torch.float32) 
        labels = torch.as_tensor(labels, dtype=torch.int64) 
        
        target = {}

        
        target["boxes"] = boxes
        target["labels"] = labels
        
        return target


In [7]:
list(sorted(os.listdir("annotations_yolo/")))[0][:-3]

'01_10_12.'

In [8]:
class ShipDataset:
    def __init__(self, path, transform=None):
        self.path = path
        self.files = list(sorted(os.listdir("annotations_yolo/")))
        self.transform = transform
        
    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        file_image = self.files[idx][:-3] + 'jpg'
        file_label = self.files[idx][:-3] + 'xml'

        img_path = os.path.join("images/", file_image)
        label_path = os.path.join("annotations/", file_label)
        
        #print(img_path)
        # Read an image with OpenCV, gray scale
        image = cv2.imread(img_path,0)
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = torch.tensor(image,dtype=torch.float32)
        image = torch.unsqueeze(image, dim=0)
        target = generate_target(label_path)

        #start_t = time.time()
        if self.transform:
            augmented = self.transform(image=image)
            #total_time = (time.time() - start_t)
            image = augmented['image']
        
            
        return image, target #, total_time

In [9]:
# TODO: Include transformations/albumnations for dataset
# resize to 640, include mirror images, as well as gaussian noise 

In [10]:
dataset = ShipDataset(
    path = 'images/'
)
len(dataset)

1859

In [11]:
train_set, val_set = torch.utils.data.random_split(dataset, [1400,459])
len(train_set), len(val_set)

(1400, 459)

In [12]:
#train_set[0]

(tensor([[[175., 192., 242.,  ...,  65.,  52.,  51.],
          [146., 183., 248.,  ...,  57.,  41.,  40.],
          [164., 154., 233.,  ...,  49.,  47.,  45.],
          ...,
          [ 67.,  53.,  46.,  ...,  46.,  42.,  60.],
          [ 50.,  35.,  29.,  ...,  48.,  38.,  50.],
          [ 39.,  34.,  31.,  ...,  53.,  65.,  71.]]]),
 {'boxes': tensor([[297., 442., 317., 454.],
          [375., 327., 388., 346.],
          [420., 260., 444., 277.],
          [525., 632., 553., 658.],
          [672., 628., 705., 647.],
          [634., 726., 667., 744.],
          [790., 771., 800., 778.],
          [ 95., 364., 108., 371.],
          [112., 342., 133., 355.],
          [103., 246., 120., 260.],
          [ 93., 227., 106., 235.],
          [115., 217., 125., 228.],
          [122., 198., 133., 214.],
          [137., 209., 150., 223.],
          [410.,  95., 419., 106.],
          [ 76., 469.,  88., 478.],
          [350., 114., 363., 126.]]),
  'labels': tensor([0, 0, 0, 0, 0, 

In [13]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [14]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=4, collate_fn=collate_fn)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=4,collate_fn=collate_fn)

In [15]:
retina = torchvision.models.detection.retinanet_resnet50_fpn(num_classes = 1, weights=False, pretrained_backbone = True)




In [16]:
num_epochs = 5
retina.to(device)
    
# parameters
params = [p for p in retina.parameters() if p.requires_grad] # select parameters that require gradient calculation
optimizer = torch.optim.SGD(params, lr=0.0001,
                                momentum=0.9, weight_decay=0.0005)

len_dataloader = len(train_loader)

# about 4 min per epoch on Colab GPU
for epoch in range(num_epochs):
    start = time.time()
    retina.train()

    i = 0    
    epoch_loss = 0
    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        
        #print(targets)
        

        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        #print(images)
        loss_dict = retina(images, targets) 

        losses = sum(loss for loss in loss_dict.values()) 

        i += 1

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
        epoch_loss += losses 
    print(epoch_loss, f'time: {time.time() - start}')

tensor(nan, device='cuda:0', grad_fn=<AddBackward0>) time: 176.85456132888794
tensor(nan, device='cuda:0', grad_fn=<AddBackward0>) time: 175.33253264427185


KeyboardInterrupt: 

In [None]:
# getting a Nan value for loss, possible causes:
#   Gradients exploding, use gradient clipping
#   Data is not normalized, reduce values of data