In [None]:
# Based on https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html

In [1]:
import os

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
from datetime import datetime
from dotenv import load_dotenv
import json
import matplotlib.pyplot as plt
import numpy as np
import torch
from torchvision.utils import draw_bounding_boxes
import torchvision.transforms.functional as F
from torchvision.transforms import ToTensor

from koger_detection.obj_det.engine import train, get_detection_model
from koger_detection.obj_det.mydatasets import CocoDetection
from koger_detection.utils.dataset import get_ious

ModuleNotFoundError: No module named 'albumentations'

In [2]:
# im = np.zeros((1024, 1024, 3), dtype=np.uint8)
# score_thresh = .7
# boxes_drawn = 10

# _ = cv2.putText(im, f"score thresh: {score_thresh}. Detections: {boxes_drawn}", 
#             org=[20, 50], fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, 
#             color=[255, 255, 255], thickness=2, lineType=cv2.LINE_AA)
# plt.imshow(im)

### Define training config

In [3]:
# Assumes we have a local .env file that stores things like ROOT
load_dotenv()

root = os.environ.get("ROOT")
project = "high-elevation"

image_folder = os.path.join(root, "data", project, "images")
train_json_path = os.path.join(root, "data", project, "train_annotations_2_12_24.json")
val_json_path = os.path.join(root, "data", project, "val_annotations_4_24_23.json")
# train_json_path = os.path.join(root, "data", "train_annotations_gt_178_anns.json")
# val_json_path = os.path.join(root, "data", "train_annotations_gt_178_anns.json")


# Where info about the training run will be saved (including run cfg)
now = datetime.now() # current date and time
date_time = now.strftime("%m-%d-%Y-%H-%M-%S")
run_folder = f"/home/koger/Dropbox/UWWRF/detection/high-altitude/runs/main/high-salmon-{date_time}"
os.makedirs(run_folder)


cfg = {'model':
           {'model_type': "bbox_v2",
            'num_classes': 3, # Background counts as a class (0),
            'trainable_backbone_layers': 5,
            'rpn_batch_size_per_image': 256,
            'rpn_pre_nms_top_n_train': 4000,
            'rpn_post_nms_top_n_train': 2000,
            'rpn_pre_nms_top_n_test': 4000,
            'rpn_post_nms_top_n_test': 2000,
            'box_detections_per_img': 700,
            'box_nms_thresh': .7,
            'box_batch_size_per_image': 512,
            'box_positive_fraction': 0.5,
            'fixed_size': [1024, 1024]
           },
       'training': 
           {'image_folder': image_folder,
            'train_json_path': train_json_path,
            'val_json_path': val_json_path,
            'batch_size': 4,
            'num_workers': 4,
            'num_epochs': 30,
            'run_folder': run_folder,
            'epochs_per_val': 1,
            'optimizer':
                {'name': 'SGD',
                 'lr': 0.005, 
                 'momentum': 0.9,
                 'weight_decay': 0.0005
                },
            'lr_scheduler':
                {'name': 'ReduceOnPlateau',
                 'patience': 4,
                 'factor': .3
                }
           }
      }

# ---------- DEFINE AUGMENTAIONS -----------------------------

bbox_params = A.BboxParams(
    format='pascal_voc', label_fields=['class_labels', 'area']
)

train_data_transform = A.Compose([
    A.ToFloat(max_value=255),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=.3),
    A.geometric.resize.RandomScale(.2, interpolation=cv2.INTER_LINEAR, p=.75),
    A.geometric.transforms.PadIfNeeded(min_height=1024, min_width=1024, 
                 border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0),
    A.crops.transforms.RandomCrop(1024, 1024, p=1.0),
    A.RandomBrightnessContrast(brightness_limit=.3, contrast_limit=.1, 
                                   brightness_by_max=True, p=.75),
    A.Blur(p=.1),
    ToTensorV2()
], bbox_params=bbox_params)

val_data_transform = A.Compose([
    A.ToFloat(max_value=255),
    ToTensorV2()
], bbox_params=bbox_params)

cfg['train_aug'] = train_data_transform.to_dict()
cfg['val_aug'] = val_data_transform.to_dict()

cfg['readme'] = "First training with train_annotations_2_12_24. Using full current augmentaion regieme." 
# --------- SAVE CFG ------------------------

#define the path for your json file
cfg_path = os.path.join(cfg['training']['run_folder'], "cfg.json")

# open your json file and add the dictionary
with open (cfg_path, 'w') as f:
    json.dump(cfg, f)

In [4]:
# This is important for data augementation to work properly
# with parrallel data loading. By default when each process
# is created in parrallel seed will be intitialized to same value
# so augmentation won't be random across images in batch
# (specifically for numpy based preprosseseing)
def worker_init_fn(worker_id):
    np.random.seed(datetime.datetime.now().microsecond + worker_id * 1000000)

# val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size,
#                                  shuffle=True, num_workers=7, pin_memory=True,
#                                  worker_init_fn=worker_init_fn)

# worker_init_fn=worker_init_fn

### Create training objects

In [5]:
def get_lr_scheduler(optimizer, name, **cfg_lr):
    if name == "StepLR":
        return torch.optim.lr_scheduler.StepLR(optimizer, **cfg_lr)
    if name == "ReduceOnPlateau":
        return torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, verbose=True, **cfg_lr)
        

In [6]:
model = get_detection_model(**cfg['model'])
params = [p for p in model.parameters() if p.requires_grad]
cfg_t = cfg['training']

cfg_t['optimizer'].pop('name') # TODO use name to choose optimizer in seperate function
optimizer = torch.optim.SGD(params, **cfg_t['optimizer'])

lr_scheduler = get_lr_scheduler(optimizer, **cfg_t['lr_scheduler'])

train_dataset = CocoDetection(image_folder,
                              train_json_path,
                              transform=train_data_transform)

val_dataset = CocoDetection(image_folder,
                            val_json_path,
                            transform=val_data_transform)

num_annotations = [d[1]['boxes'].shape[0] for d in train_dataset]
print(f"{len(train_dataset)} training images.")
print(f"max annoations per image {np.max(np.array(num_annotations))}")

num_annotations = [d[1]['boxes'].shape[0] for d in val_dataset]
print(f"{len(val_dataset)} validation images.")
print(f"max annoations per image {np.max(np.array(num_annotations))}")

loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
235 training images.
max annoations per image 327
36 validation images.
max annoations per image 188


In [9]:
optimizer.param_groups[0]["lr"]

0.005

In [15]:
len(train_dataset)

235

### Visualize image and annotations

In [5]:
save_fig = False

for im_ind in range(230, 233):
    # im_ind = #20

    ann = train_dataset[im_ind]

    boxes = ann[1]['boxes']
    labels = ann[1]['labels']
    colors = [(230,215,255), (255, 0, 0)]
    box_colors = [colors[l-1] for l in labels]
    if len(boxes) > 0:
        display = draw_bounding_boxes((ann[0]*255).type(torch.uint8), 
                                      boxes=boxes, colors=box_colors,
                                      width=4)

    else:
        display = ann[0]
        print("No annotations")

    plt.figure(figsize=(5, 5))
    plt.imshow(F.to_pil_image(display))
    plt.axis('off')
    if save_fig:
        plt.savefig("...Dropbox/UWWRF/detection/high-altitude/media/bounding-boxes-train-115.jpg", dpi=300,
                   bbox_inches="tight", pad_inches=0)

In [12]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [13]:
data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=cfg['training']['batch_size'], shuffle=False, 
        num_workers=cfg['training']['num_workers'], collate_fn=collate_fn)

In [4]:
if True:
    start = 0
    end = 20
    for ind, data in enumerate(data_loader):
        for batch_ind, _ in enumerate(data):
            if ind < start:
                continue
            if ind >= end:
                break
            im, tar = data
            im = im[batch_ind].permute(1, 2, 0).numpy()
            plt.figure(figsize=(5,5))
            plt.imshow(im)
            plt.title(ind)

            boxes = tar[batch_ind]['boxes']
            for box in boxes:
                box = box.numpy()
                plt.scatter(np.mean([box[0], box[2]]), np.mean([box[1], box[3]]), s=3, c='b')

In [15]:
train(cfg, model, optimizer, lr_scheduler, train_data_transform,
      val_data_transform)

loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Epoch: [0]  [ 0/59]  eta: 0:01:19  lr: 0.000091  loss: 2.6334 (2.6334)  loss_classifier: 0.8659 (0.8659)  loss_box_reg: 0.1056 (0.1056)  loss_objectness: 1.4432 (1.4432)  loss_rpn_box_reg: 0.2187 (0.2187)  time: 1.3411  data: 0.1908  max mem: 10683
Epoch: [0]  [50/59]  eta: 0:00:03  lr: 0.004397  loss: 1.0494 (1.6117)  loss_classifier: 0.2321 (0.4272)  loss_box_reg: 0.5409 (0.5074)  loss_objectness: 0.1153 (0.5283)  loss_rpn_box_reg: 0.0810 (0.1489)  time: 0.3775  data: 0.0021  max mem: 12320
Epoch: [0]  [58/59]  eta: 0:00:00  lr: 0.005000  loss: 1.0544 (1.5503)  loss_classifier: 0.2256 (0.4024)  loss_box_reg: 0.6244 (0.5398)  loss_objectness: 0.0821 (0.4666)  loss_rpn_box_reg: 0.0898 (0.1415)  time: 0.3748  data: 0.0021  max mem: 12320
Epoch: [0] Total time: 0:00:23 (0.3925 s / it)
Test:  [ 0/36]  eta: 0:00:14  model_time

In [17]:
data_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=1, shuffle=False, 
        num_workers=4) #, collate_fn=collate_fn)

In [18]:
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       

### Compare ground truth to predictions

In [2]:
for ind, (images, tar) in enumerate(data_loader):
    images = list(img.to(torch.device('cuda')) for img in images)
    # im = im[0]
    res = model(images)[0]
    boxes = res['boxes'].to('cpu').detach().numpy().astype(np.uint32)
    scores = res['scores'].to('cpu').detach().numpy()
    im = images[0].cpu().numpy().transpose(1, 2, 0).copy() # Copy makes circle work for unclear reasons
    im_preds = im.copy()
    for box, score in zip(boxes, scores):
        if score < .7:
            continue
        x = np.mean([box[0], box[2]])
        y = np.mean([box[1], box[3]])
        cv2.circle(im_preds, [int(x), int(y)], 3, (1.0, 0, 1.0), -1)
    for box in tar['boxes'][0]:
        box = box.numpy()
        x = np.mean([box[0], box[2]])
        y = np.mean([box[1], box[3]])
        cv2.circle(im_preds, [int(x), int(y)], 6, (1.0, 1.0, 1.0), 1)
        
        

        
    plt.figure(figsize=(10,10))
    plt.imshow(im_preds)
    print(len(boxes))