# Training

## Clean Dataset

The cleaning of the dataset consist of:
1. Removing images that are all black
2. Removing images that I considerer to big to process with my computer (with or height greater than 3000 pixels)

In [3]:
import sys
from tqdm import tqdm
import time
import os
from PIL import Image
import numpy as np
import warnings
warnings.filterwarnings('ignore')

ROOT_DATA_DIR = "../full/train/"

ROOT_DATA_DIR = sys.argv[1]

print(ROOT_DATA_DIR)

def get_directories(path):
    directories = []
    for x in os.walk(path):
        img_id = x[0][len(path):]
        if img_id != "":
            img_id = x[0][len(path):]
            img_path = "".join([ROOT_DATA_DIR, img_id, "/", img_id, "_PAN.tif"])
            with Image.open(img_path).convert("L") as img:
                if img.size[0] < 3000 and img.size[1] < 3000:
                    img = np.array(img)
                    if img.any(axis=-1).sum() > 0: #non black
                        directories.append(img_id)
    return list(sorted(directories))

start_time = time.time()

directories = get_directories(ROOT_DATA_DIR)
total_files = len(directories)
print(len(directories))
print(directories[0:5])

elapsed_time = time.time() - start_time
hours, rem = divmod(elapsed_time, 3600)
minutes, seconds = divmod(rem, 60)
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

../full/train/
3892
['000f527d366c24e3f0d87e7baf5847f7', '003580d9b1e88aa1191734ec71a3a78c', '003e45460a331abb8624e62c6746c088', '0044f18fb09548e3a98b23d61d373e1d', '00464d3a7df4b98dea66bed12915ef6e']
00:00:51.08


## Create Pytorch Dataset Class

In [4]:
import os
import numpy as np
import torch
from PIL import Image
import fiona
import rasterio
import rasterio.mask

class CircleFinderDataset(torch.utils.data.Dataset):
    def __init__(self, images_ids, transforms=None):
        self.transforms = transforms
        self.imgs = images_ids

    def __getitem__(self, idx):
        
        shapes = None
        with fiona.open(ROOT_DATA_DIR + self.imgs[idx] + "/" + self.imgs[idx] + "_anno.geojson", "r") as shapefile:
            shapes = [feature["geometry"] for feature in shapefile]
        
        with rasterio.open(ROOT_DATA_DIR + self.imgs[idx] + "/" + self.imgs[idx] + "_PAN.tif") as src:
            transform = src.transform
            rev = ~transform
        
        
        
        # load images ad masks
        img_path = "".join([ROOT_DATA_DIR, self.imgs[idx], "/", self.imgs[idx], "_PAN.tif"])
        img = Image.open(img_path).convert("L")

        # get bounding box coordinates for each mask
        num_objs = len(shapes)
        boxes = []
        areas = []
        for shape in shapes:
            bounds = rasterio.features.bounds(shape,transform=None)
            tmp = rev * tuple((bounds[0],bounds[1]))
            x0,y0 = (round(tmp[0]), round(tmp[1])) #left_bottom
            tmp = rev * tuple((bounds[2],bounds[3]))
            x1,y1 = (round(tmp[0]),round(tmp[1])) #right_top
            xmin = np.min([x0,x1])
            xmax = np.max([x0,x1])
            ymin = np.min([y0,y1])
            ymax = np.max([y0,y1])
            boxes.append([xmin, ymin, xmax, ymax])
            areas.append((xmax-xmin)*(ymax-ymin))

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        areas = torch.as_tensor(areas, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)

        image_id = torch.tensor([idx])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = areas
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

## Load Pytorch Pretrained Model

In [5]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
      
def build_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True, trainable_backbone_layers=2)

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

## Split Train and Validation sets and Create Data Loaders

In [6]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T

TRAINING_TEST_EXAMPLES = total_files
TEST_EXAMPLES = 100

def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

# use our dataset and defined transformations
dataset = CircleFinderDataset(directories, get_transform(train=True))
dataset_test = CircleFinderDataset(directories, get_transform(train=False))

# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()[:TRAINING_TEST_EXAMPLES]
dataset = torch.utils.data.Subset(dataset, indices[:-TEST_EXAMPLES])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-TEST_EXAMPLES:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=1, shuffle=True, num_workers=0,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=0,
    collate_fn=utils.collate_fn)

## Choose Hyperparameters

In [7]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and circle
num_classes = 2

# get the model using our helper function
model = build_model(num_classes)
# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.8, weight_decay=0.00001)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

## Train Model

In [8]:
import time
start_time = time.time()
# number of epochs
num_epochs = 7

for epoch in range(num_epochs):
    # train for one epoch
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=100)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

elapsed_time = time.time() - start_time

Epoch: [0]  [   0/3792]  eta: 1:48:07  lr: 0.000010  loss: 0.9363 (0.9363)  loss_classifier: 0.8102 (0.8102)  loss_box_reg: 0.1200 (0.1200)  loss_objectness: 0.0022 (0.0022)  loss_rpn_box_reg: 0.0039 (0.0039)  time: 1.7108  data: 0.0738  max mem: 818
Epoch: [0]  [ 100/3792]  eta: 0:22:02  lr: 0.000509  loss: 0.2467 (0.5099)  loss_classifier: 0.0996 (0.2149)  loss_box_reg: 0.1221 (0.1201)  loss_objectness: 0.0041 (0.1532)  loss_rpn_box_reg: 0.0036 (0.0217)  time: 0.3442  data: 0.0236  max mem: 1322
Epoch: [0]  [ 200/3792]  eta: 0:21:22  lr: 0.001009  loss: 0.1746 (0.4164)  loss_classifier: 0.0452 (0.1506)  loss_box_reg: 0.0944 (0.1338)  loss_objectness: 0.0061 (0.1100)  loss_rpn_box_reg: 0.0037 (0.0221)  time: 0.3626  data: 0.0335  max mem: 1461
Epoch: [0]  [ 300/3792]  eta: 0:20:35  lr: 0.001508  loss: 0.1009 (0.3532)  loss_classifier: 0.0361 (0.1209)  loss_box_reg: 0.0455 (0.1178)  loss_objectness: 0.0101 (0.0952)  loss_rpn_box_reg: 0.0025 (0.0193)  time: 0.3392  data: 0.0248  max mem

Epoch: [0]  [3300/3792]  eta: 0:02:55  lr: 0.005000  loss: 0.1726 (0.1926)  loss_classifier: 0.0595 (0.0613)  loss_box_reg: 0.0496 (0.0763)  loss_objectness: 0.0190 (0.0403)  loss_rpn_box_reg: 0.0018 (0.0146)  time: 0.3605  data: 0.0238  max mem: 2438
Epoch: [0]  [3400/3792]  eta: 0:02:19  lr: 0.005000  loss: 0.0738 (0.1909)  loss_classifier: 0.0167 (0.0608)  loss_box_reg: 0.0353 (0.0759)  loss_objectness: 0.0045 (0.0398)  loss_rpn_box_reg: 0.0018 (0.0145)  time: 0.3543  data: 0.0296  max mem: 2438
Epoch: [0]  [3500/3792]  eta: 0:01:43  lr: 0.005000  loss: 0.0372 (0.1897)  loss_classifier: 0.0138 (0.0605)  loss_box_reg: 0.0229 (0.0758)  loss_objectness: 0.0013 (0.0390)  loss_rpn_box_reg: 0.0013 (0.0144)  time: 0.3461  data: 0.0261  max mem: 2438
Epoch: [0]  [3600/3792]  eta: 0:01:08  lr: 0.005000  loss: 0.0587 (0.1897)  loss_classifier: 0.0352 (0.0606)  loss_box_reg: 0.0250 (0.0760)  loss_objectness: 0.0036 (0.0386)  loss_rpn_box_reg: 0.0031 (0.0145)  time: 0.3719  data: 0.0442  max me

Epoch: [1]  [2100/3792]  eta: 0:09:45  lr: 0.005000  loss: 0.0293 (0.1315)  loss_classifier: 0.0128 (0.0426)  loss_box_reg: 0.0185 (0.0587)  loss_objectness: 0.0030 (0.0191)  loss_rpn_box_reg: 0.0007 (0.0110)  time: 0.3404  data: 0.0167  max mem: 2438
Epoch: [1]  [2200/3792]  eta: 0:09:11  lr: 0.005000  loss: 0.1317 (0.1327)  loss_classifier: 0.0428 (0.0432)  loss_box_reg: 0.0691 (0.0596)  loss_objectness: 0.0012 (0.0188)  loss_rpn_box_reg: 0.0046 (0.0111)  time: 0.3601  data: 0.0232  max mem: 2438
Epoch: [1]  [2300/3792]  eta: 0:08:37  lr: 0.005000  loss: 0.0914 (0.1328)  loss_classifier: 0.0313 (0.0430)  loss_box_reg: 0.0332 (0.0593)  loss_objectness: 0.0202 (0.0193)  loss_rpn_box_reg: 0.0023 (0.0112)  time: 0.3398  data: 0.0218  max mem: 2438
Epoch: [1]  [2400/3792]  eta: 0:08:02  lr: 0.005000  loss: 0.1218 (0.1337)  loss_classifier: 0.0402 (0.0437)  loss_box_reg: 0.0402 (0.0594)  loss_objectness: 0.0149 (0.0195)  loss_rpn_box_reg: 0.0018 (0.0111)  time: 0.3577  data: 0.0259  max me

Epoch: [2]  [ 900/3792]  eta: 0:16:35  lr: 0.005000  loss: 0.0428 (0.1192)  loss_classifier: 0.0153 (0.0396)  loss_box_reg: 0.0269 (0.0583)  loss_objectness: 0.0007 (0.0111)  loss_rpn_box_reg: 0.0011 (0.0103)  time: 0.3397  data: 0.0164  max mem: 2438
Epoch: [2]  [1000/3792]  eta: 0:16:03  lr: 0.005000  loss: 0.0363 (0.1199)  loss_classifier: 0.0128 (0.0396)  loss_box_reg: 0.0138 (0.0583)  loss_objectness: 0.0008 (0.0113)  loss_rpn_box_reg: 0.0005 (0.0107)  time: 0.3333  data: 0.0165  max mem: 2438
Epoch: [2]  [1100/3792]  eta: 0:15:28  lr: 0.005000  loss: 0.0603 (0.1202)  loss_classifier: 0.0215 (0.0398)  loss_box_reg: 0.0340 (0.0578)  loss_objectness: 0.0012 (0.0114)  loss_rpn_box_reg: 0.0011 (0.0113)  time: 0.3565  data: 0.0282  max mem: 2438
Epoch: [2]  [1200/3792]  eta: 0:14:56  lr: 0.005000  loss: 0.0581 (0.1215)  loss_classifier: 0.0198 (0.0398)  loss_box_reg: 0.0381 (0.0588)  loss_objectness: 0.0003 (0.0115)  loss_rpn_box_reg: 0.0023 (0.0114)  time: 0.3474  data: 0.0189  max me

Epoch: [3]  [   0/3792]  eta: 0:22:14  lr: 0.000500  loss: 0.0591 (0.0591)  loss_classifier: 0.0144 (0.0144)  loss_box_reg: 0.0427 (0.0427)  loss_objectness: 0.0015 (0.0015)  loss_rpn_box_reg: 0.0005 (0.0005)  time: 0.3520  data: 0.0150  max mem: 2438
Epoch: [3]  [ 100/3792]  eta: 0:21:42  lr: 0.000500  loss: 0.0457 (0.1079)  loss_classifier: 0.0111 (0.0354)  loss_box_reg: 0.0252 (0.0527)  loss_objectness: 0.0006 (0.0123)  loss_rpn_box_reg: 0.0006 (0.0076)  time: 0.3494  data: 0.0272  max mem: 2438
Epoch: [3]  [ 200/3792]  eta: 0:21:00  lr: 0.000500  loss: 0.0483 (0.1200)  loss_classifier: 0.0154 (0.0383)  loss_box_reg: 0.0261 (0.0581)  loss_objectness: 0.0018 (0.0118)  loss_rpn_box_reg: 0.0007 (0.0118)  time: 0.3502  data: 0.0252  max mem: 2438
Epoch: [3]  [ 300/3792]  eta: 0:20:20  lr: 0.000500  loss: 0.0287 (0.1070)  loss_classifier: 0.0120 (0.0341)  loss_box_reg: 0.0167 (0.0541)  loss_objectness: 0.0010 (0.0095)  loss_rpn_box_reg: 0.0002 (0.0094)  time: 0.3609  data: 0.0231  max me

Epoch: [3]  [3300/3792]  eta: 0:02:54  lr: 0.000500  loss: 0.0364 (0.0924)  loss_classifier: 0.0106 (0.0298)  loss_box_reg: 0.0292 (0.0478)  loss_objectness: 0.0005 (0.0076)  loss_rpn_box_reg: 0.0004 (0.0073)  time: 0.3735  data: 0.0313  max mem: 2438
Epoch: [3]  [3400/3792]  eta: 0:02:19  lr: 0.000500  loss: 0.0272 (0.0920)  loss_classifier: 0.0099 (0.0296)  loss_box_reg: 0.0157 (0.0476)  loss_objectness: 0.0007 (0.0075)  loss_rpn_box_reg: 0.0005 (0.0073)  time: 0.3495  data: 0.0205  max mem: 2438
Epoch: [3]  [3500/3792]  eta: 0:01:43  lr: 0.000500  loss: 0.0217 (0.0919)  loss_classifier: 0.0094 (0.0296)  loss_box_reg: 0.0095 (0.0475)  loss_objectness: 0.0004 (0.0075)  loss_rpn_box_reg: 0.0004 (0.0073)  time: 0.3521  data: 0.0237  max mem: 2438
Epoch: [3]  [3600/3792]  eta: 0:01:08  lr: 0.000500  loss: 0.0446 (0.0925)  loss_classifier: 0.0168 (0.0298)  loss_box_reg: 0.0283 (0.0478)  loss_objectness: 0.0005 (0.0075)  loss_rpn_box_reg: 0.0009 (0.0074)  time: 0.3871  data: 0.0402  max me

Epoch: [4]  [2100/3792]  eta: 0:10:01  lr: 0.000500  loss: 0.0621 (0.0877)  loss_classifier: 0.0172 (0.0285)  loss_box_reg: 0.0292 (0.0469)  loss_objectness: 0.0014 (0.0061)  loss_rpn_box_reg: 0.0006 (0.0062)  time: 0.3686  data: 0.0349  max mem: 2438
Epoch: [4]  [2200/3792]  eta: 0:09:25  lr: 0.000500  loss: 0.0320 (0.0874)  loss_classifier: 0.0156 (0.0284)  loss_box_reg: 0.0143 (0.0467)  loss_objectness: 0.0010 (0.0061)  loss_rpn_box_reg: 0.0003 (0.0063)  time: 0.3580  data: 0.0249  max mem: 2438
Epoch: [4]  [2300/3792]  eta: 0:08:49  lr: 0.000500  loss: 0.0403 (0.0864)  loss_classifier: 0.0122 (0.0282)  loss_box_reg: 0.0210 (0.0461)  loss_objectness: 0.0006 (0.0060)  loss_rpn_box_reg: 0.0006 (0.0061)  time: 0.3497  data: 0.0207  max mem: 2438
Epoch: [4]  [2400/3792]  eta: 0:08:14  lr: 0.000500  loss: 0.0407 (0.0861)  loss_classifier: 0.0140 (0.0280)  loss_box_reg: 0.0202 (0.0461)  loss_objectness: 0.0005 (0.0059)  loss_rpn_box_reg: 0.0009 (0.0061)  time: 0.3525  data: 0.0197  max me

Epoch: [5]  [ 900/3792]  eta: 0:16:59  lr: 0.000500  loss: 0.0280 (0.0806)  loss_classifier: 0.0081 (0.0268)  loss_box_reg: 0.0132 (0.0437)  loss_objectness: 0.0003 (0.0052)  loss_rpn_box_reg: 0.0005 (0.0050)  time: 0.3364  data: 0.0156  max mem: 2438
Epoch: [5]  [1000/3792]  eta: 0:16:19  lr: 0.000500  loss: 0.0213 (0.0796)  loss_classifier: 0.0082 (0.0263)  loss_box_reg: 0.0153 (0.0433)  loss_objectness: 0.0003 (0.0050)  loss_rpn_box_reg: 0.0004 (0.0050)  time: 0.3367  data: 0.0188  max mem: 2438
Epoch: [5]  [1100/3792]  eta: 0:15:44  lr: 0.000500  loss: 0.0622 (0.0804)  loss_classifier: 0.0125 (0.0264)  loss_box_reg: 0.0396 (0.0439)  loss_objectness: 0.0007 (0.0050)  loss_rpn_box_reg: 0.0007 (0.0050)  time: 0.3534  data: 0.0212  max mem: 2438
Epoch: [5]  [1200/3792]  eta: 0:15:09  lr: 0.000500  loss: 0.0160 (0.0815)  loss_classifier: 0.0075 (0.0266)  loss_box_reg: 0.0100 (0.0445)  loss_objectness: 0.0003 (0.0051)  loss_rpn_box_reg: 0.0003 (0.0053)  time: 0.3418  data: 0.0218  max me

Epoch: [6]  [   0/3792]  eta: 0:24:42  lr: 0.000050  loss: 0.0661 (0.0661)  loss_classifier: 0.0324 (0.0324)  loss_box_reg: 0.0315 (0.0315)  loss_objectness: 0.0003 (0.0003)  loss_rpn_box_reg: 0.0020 (0.0020)  time: 0.3910  data: 0.0580  max mem: 2438
Epoch: [6]  [ 100/3792]  eta: 0:21:28  lr: 0.000050  loss: 0.0505 (0.1075)  loss_classifier: 0.0166 (0.0327)  loss_box_reg: 0.0341 (0.0589)  loss_objectness: 0.0002 (0.0067)  loss_rpn_box_reg: 0.0007 (0.0092)  time: 0.3458  data: 0.0241  max mem: 2438
Epoch: [6]  [ 200/3792]  eta: 0:20:35  lr: 0.000050  loss: 0.0288 (0.0877)  loss_classifier: 0.0106 (0.0268)  loss_box_reg: 0.0132 (0.0486)  loss_objectness: 0.0005 (0.0051)  loss_rpn_box_reg: 0.0005 (0.0073)  time: 0.3380  data: 0.0214  max mem: 2438
Epoch: [6]  [ 300/3792]  eta: 0:20:07  lr: 0.000050  loss: 0.0384 (0.0906)  loss_classifier: 0.0114 (0.0273)  loss_box_reg: 0.0197 (0.0473)  loss_objectness: 0.0002 (0.0068)  loss_rpn_box_reg: 0.0012 (0.0092)  time: 0.3510  data: 0.0245  max me

Epoch: [6]  [3300/3792]  eta: 0:02:50  lr: 0.000050  loss: 0.0386 (0.0817)  loss_classifier: 0.0132 (0.0259)  loss_box_reg: 0.0308 (0.0445)  loss_objectness: 0.0002 (0.0050)  loss_rpn_box_reg: 0.0005 (0.0063)  time: 0.3715  data: 0.0326  max mem: 2438
Epoch: [6]  [3400/3792]  eta: 0:02:16  lr: 0.000050  loss: 0.0301 (0.0810)  loss_classifier: 0.0079 (0.0257)  loss_box_reg: 0.0171 (0.0442)  loss_objectness: 0.0003 (0.0049)  loss_rpn_box_reg: 0.0003 (0.0062)  time: 0.3416  data: 0.0187  max mem: 2438
Epoch: [6]  [3500/3792]  eta: 0:01:41  lr: 0.000050  loss: 0.0230 (0.0811)  loss_classifier: 0.0057 (0.0257)  loss_box_reg: 0.0163 (0.0444)  loss_objectness: 0.0002 (0.0049)  loss_rpn_box_reg: 0.0005 (0.0061)  time: 0.3487  data: 0.0193  max mem: 2438
Epoch: [6]  [3600/3792]  eta: 0:01:06  lr: 0.000050  loss: 0.0271 (0.0809)  loss_classifier: 0.0123 (0.0257)  loss_box_reg: 0.0120 (0.0442)  loss_objectness: 0.0004 (0.0049)  loss_rpn_box_reg: 0.0007 (0.0061)  time: 0.3585  data: 0.0180  max me

In [9]:
hours, rem = divmod(elapsed_time, 3600)
minutes, seconds = divmod(rem, 60)
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

02:46:15.07


In [10]:
torch.save(model, 'circledetectionModel.pt')