<a href="https://colab.research.google.com/github/cjdolan/HighResolutionSemanticClassification/blob/main/BiFPN_RemoteSensingObjectDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

R-CNN with BiFPN Notebook
Author: Connor Dolan

In [None]:
!pip install rasterio

In [None]:
import pandas as pd
import requests
from collections import defaultdict
import time
from tqdm import tqdm
from urllib.parse import urljoin
from shapely.geometry import Polygon
import numpy as np
from multiprocessing.pool import ThreadPool
import zipfile
from glob import glob
from google.colab import drive
drive.mount('/content/drive')
from google.colab import auth
import os
import shutil
from matplotlib import pyplot as plt
import random
from sklearn.model_selection import train_test_split
import rasterio
from ast import literal_eval
import cv2
from tqdm.notebook import tqdm

In [None]:
df = pd.read_csv('/content/drive/MyDrive/ATML_Dataset_Largest.csv')
df.head(11)

In [None]:
df['bbox'] = df['bbox'].apply(lambda x: literal_eval(x))
df['Class'] = df['Class'].apply(lambda x: literal_eval(x))

In [None]:
def correct_bbox(fileName, bbox):
  new_bbox = []
  try:
    with rasterio.open('/content/drive/MyDrive/ATMLData/atml_bucket/' + fileName.lower() + '.tif') as img:
      for box in bbox:
        new_box = []
        y_old = img.shape[0]
        x_old = img.shape[1]

        for i in range(4):
          old = None
          if i % 2 == 0:
            old = x_old
          else:
            old = y_old
          new_val = int((float(box[i]) / float(old)) * 1500)
          if new_val < 0:
            new_val = 0
          elif new_val > 1500:
            new_val = 1500
          new_box.append(new_val)


        new_bbox.append(new_box)
  except:
    pass
  return new_bbox

print(correct_bbox(df.at[4, 'fileName'], df.at[8, 'bbox']))

In [None]:
tqdm.pandas()

In [None]:
df['bbox'] = df[['fileName', 'bbox']].progress_apply(lambda x: correct_bbox(x.fileName, x.bbox), axis=1)

In [None]:
df.to_csv('/content/drive/MyDrive/ATML_Dataset_1500.csv')

In [None]:
df = pd.read_csv('/content/drive/MyDrive/ATML_Dataset_1500.csv')
df['bbox'] = df['bbox'].apply(lambda x: literal_eval(x))
df['Class'] = df['Class'].apply(lambda x: literal_eval(x))
encodings = {'airport':0, 'stadium':1, 'power_plant':2}

df['Label'] = df['Class'].apply(lambda x: [encodings[y] for y in x])

In [None]:
new_df = pd.DataFrame(columns=df.columns)
idx = 0
for index, row in tqdm(df.iterrows(), position=0, leave=True):
  if len(row.bbox) > 0:
    new_df.at[idx, 'fileName'] = row.fileName
    new_df.at[idx, 'bbox'] = row.bbox
    new_df.at[idx, 'Class'] = row.Class
    new_df.at[idx, 'Label'] = row.Label
    idx += 1
df = new_df

In [None]:
IMG_DIR = '/content/drive/MyDrive/NAIP_Lower_Res_1500_v3/'

In [None]:
!pip install d2l

In [None]:
from d2l import torch as d2l

def bbox_to_rect(bbox, color):
    """Convert bounding box to matplotlib format."""
    # Convert the bounding box (upper-left x, upper-left y, lower-right x,
    # lower-right y) format to the matplotlib format: ((upper-left x,
    # upper-left y), width, height)
    return d2l.plt.Rectangle(
        xy=(bbox[0], bbox[1]), width=bbox[2]-bbox[0], height=bbox[3]-bbox[1],
        fill=False, edgecolor=color, linewidth=2)

In [None]:
idx = 60

img = cv2.imread(IMG_DIR + df.at[idx, 'fileName'].lower() + '.png')[905:946,502:527]

fig, ax = plt.subplots()

# Display the image
ax.imshow(img)
ax.add_patch(bbox_to_rect(df.at[idx, 'bbox'][0], 'blue'))

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [None]:
def overlap(rect1,rect2):
  try:
    p1 = Polygon([[rect1[0], rect1[1]], [rect1[2],rect1[1]],
                  [rect1[0], rect1[3]], [rect1[2], rect1[3]]])
    p2 = Polygon([[rect2[0], rect2[1]], [rect2[2],rect2[1]],
                  [rect2[0], rect2[3]], [rect2[2], rect2[3]]])
    return(p1.intersects(p2))
  except:
    return False

In [None]:
from torch.nn.utils.rnn import pad_sequence

In [None]:
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target

In [None]:
%%shell
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.15.1

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

In [None]:
import transforms as T


def get_transform(train):
    transforms = []
    transforms.append(T.PILToTensor())
    if train:
      transforms.append(T.RandomHorizontalFlip(0.5))
    return Compose(transforms)

In [None]:
from PIL import Image

In [None]:
class RemoteDataset(Dataset):
  def __init__(self, fileNames, bboxes, classes, transformations=None, validation=False):
    self.images = []
    self.targets = []
    self.bboxes = bboxes
    self.classes = classes
    self.validation = validation
    self.transforms = transformations
    if not validation:
      for f in tqdm(fileNames, position=0, leave=True):
        try:
          img = cv2.imread(IMG_DIR + f.lower() + '.png') / 255.0
          self.images.append(img)
        except:
          continue
    else:
      for i,f in tqdm(enumerate(fileNames), position=0, leave=True):
        try:
          img = cv2.imread(IMG_DIR + f.lower() + '.png') / 255.0
          bboxes = self.bboxes[i]
          bbox = bboxes[0]
          label = self.classes[i]

          final_boxes = []
          final_classes = []

          if bbox[0] < (1500-bbox[2]):
            start_x = random.randint(0, min(bbox[0], 860))
            while start_x + 640 < bbox[2]:
              start_x += 100
            end_x = start_x + 640
            x1 = bbox[0] - start_x
            x2 = bbox[2] - start_x
          else:
            end_x = random.randint(max(bbox[2], 640), 1500)
            while end_x - 640 > bbox[0]:
              end_x -= 100
            start_x = end_x - 640
            x1 = bbox[0] - start_x
            x2 = bbox[2] - start_x
          if bbox[1] < (1500-bbox[3]):
            start_y = random.randint(0, min(bbox[1], 860))
            while start_y + 640 < bbox[3]:
              start_y += 100
            end_y = start_y + 640
            y1 = bbox[1] - start_y
            y2 = bbox[3] - start_y
          else:
            end_y = random.randint(max(bbox[3], 640), 1500)
            while end_y - 640 > bbox[1]:
              end_y -= 100
            start_y = end_y - 640
            y1 = bbox[1] - start_y
            y2 = bbox[3] - start_y

          img = img[start_y:end_y,start_x:end_x,:]

          for i, b in enumerate(bboxes):
            new_b = [0.0,0.0,0.0,0.0]
            new_b[0] = max(b[0] - start_x, 0)
            new_b[1] = min(b[1] - start_y, 640)
            new_b[2] = max(b[2] - start_x, 0)
            new_b[3] = min(b[3] - start_y, 640)
            if overlap(b, [start_x, start_y, end_x, end_y]) and new_b[0] < new_b[2] and new_b[1] < new_b[3]:
              final_boxes.append([p for p in new_b])
              final_classes.append(label[i]+1)
          if len(final_boxes) == 0:
            final_boxes = torch.zeros((0, 4), dtype=torch.float32)
            final_classes.append(0)
          else:
            # bounding box to tensor
            final_boxes = torch.as_tensor(final_boxes, dtype=torch.float32)
          # area of the bounding boxes
          area = (final_boxes[:, 3] - final_boxes[:, 1]) * (final_boxes[:, 2] - final_boxes[:, 0])
          # no crowd instances
          iscrowd = torch.zeros((final_boxes.shape[0],), dtype=torch.int64)
          # labels to tensor
          final_classes = torch.as_tensor(final_classes, dtype=torch.int64)
          # prepare the final `target` dictionary
          target = {}
          target["boxes"] = final_boxes
          target["labels"] = final_classes
          target["area"] = area
          target["iscrowd"] = iscrowd
          image_id = torch.tensor([idx])
          target["image_id"] = image_id
          img = Image.fromarray((img * 255).astype(np.uint8)).convert("RGB")
          if self.transforms is not None:
            img, target = self.transforms(img, target)
          target["bbox"] = target["boxes"]
          target["cls"] = target["labels"]
          self.images.append(img)
          self.targets.append(target)
        except:
          continue
  
  def __len__(self):
    return len(self.bboxes)
  
  def __getitem__(self, idx):
    img = self.images[idx]
    if not self.validation:
      bboxes = self.bboxes[idx]
      bbox = bboxes[0]
      label = self.classes[idx]

      final_boxes = []
      final_classes = []

      if bbox[0] < (1500-bbox[2]):
        start_x = random.randint(0, min(bbox[0], 860))
        while start_x + 640 < bbox[2]:
          start_x += 100
        end_x = start_x + 640
        x1 = bbox[0] - start_x
        x2 = bbox[2] - start_x
      else:
        end_x = random.randint(max(bbox[2], 640), 1500)
        while end_x - 640 > bbox[0]:
          end_x -= 100
        start_x = end_x - 640
        x1 = bbox[0] - start_x
        x2 = bbox[2] - start_x
      if bbox[1] < (1500-bbox[3]):
        start_y = random.randint(0, min(bbox[1], 860))
        while start_y + 640 < bbox[3]:
          start_y += 100
        end_y = start_y + 640
        y1 = bbox[1] - start_y
        y2 = bbox[3] - start_y
      else:
        end_y = random.randint(max(bbox[3], 640), 1500)
        while end_y - 640 > bbox[1]:
          end_y -= 100
        start_y = end_y - 640
        y1 = bbox[1] - start_y
        y2 = bbox[3] - start_y

      img = img[start_y:end_y,start_x:end_x,:]

      for i, b in enumerate(bboxes):
        new_b = [0.0,0.0,0.0,0.0]
        new_b[0] = max(b[0] - start_x, 0)
        new_b[1] = max(b[1] - start_y, 0)
        new_b[2] = min(b[2] - start_x, 640)
        new_b[3] = min(b[3] - start_y, 640)
        if overlap(b, [start_x, start_y, end_x, end_y]) and new_b[0] < new_b[2] and new_b[1] < new_b[3]:
          final_boxes.append([p for p in new_b])
          final_classes.append(label[i]+1)
      if len(final_boxes) == 0:
        final_boxes = torch.zeros((0, 4), dtype=torch.float32)
        final_classes.append(0)
      else:
        # bounding box to tensor
        final_boxes = torch.as_tensor(final_boxes, dtype=torch.float32)
      # area of the bounding boxes
      area = (final_boxes[:, 3] - final_boxes[:, 1]) * (final_boxes[:, 2] - final_boxes[:, 0])
      # no crowd instances
      iscrowd = torch.zeros((final_boxes.shape[0],), dtype=torch.int64)
      # labels to tensor
      final_classes = torch.as_tensor(final_classes, dtype=torch.int64)
      # prepare the final `target` dictionary
      target = {}
      target["boxes"] = final_boxes
      target["labels"] = final_classes
      target["area"] = area
      target["iscrowd"] = iscrowd
      image_id = torch.tensor([idx])
      target["image_id"] = image_id
      img = Image.fromarray((img * 255).astype(np.uint8)).convert("RGB")
      if self.transforms is not None:
        img, target = self.transforms(img, target)
      target["bbox"] = target["boxes"]
      target["cls"] = target["labels"]
    else:
      target = self.targets[idx]

    return img/255.0, target

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(df.iloc[0:10], test_size=0.2, shuffle=True, random_state=42)
train_dataset = RemoteDataset(train_df['fileName'].tolist(), train_df['bbox'].tolist(), train_df['Label'].tolist(), get_transform(True), validation=False)
val_dataset = RemoteDataset(val_df['fileName'].tolist(), val_df['bbox'].tolist(), val_df['Label'].tolist(), get_transform(False), validation=True)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True, num_workers=0, collate_fn=collate_fn)

In [None]:
idx = 2
sample = train_dataset[idx]
img = sample[0]
print(img.shape)
targets = sample[1]
bboxes = targets['boxes']
print(targets['labels'])
print(bboxes)
img = np.transpose(img.detach().numpy(),(1,2,0))#[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]

fig, ax = plt.subplots()

# Display the image
ax.imshow(img)
for box in bboxes.detach().numpy():
  if box[0] >= 0:
    ax.add_patch(bbox_to_rect(box, 'blue'))

In [None]:
train_dataset[2]

In [None]:
def iou(y_true, y_pred):
  intsersection_x1 = np.maximum(y_true[0], y_pred[0])
  intsersection_x2 = np.minimum(y_true[2], y_pred[2])
  intsersection_y1 = np.maximum(y_true[1], y_pred[1])
  intsersection_y2 = np.minimum(y_true[3], y_pred[3])

  x_diff = np.maximum(intsersection_x2 - intsersection_x1 + 1, np.array(0.))
  y_diff = np.maximum(intsersection_y2 - intsersection_y1 + 1, np.array(0.))

  intersection = x_diff * y_diff

  true_x = y_true[2] - y_true[0] + 1
  true_y = y_true[3] - y_true[1] + 1

  pred_x = y_pred[2] - y_pred[0] + 1
  pred_y = y_pred[3] - y_pred[1] + 1

  union = true_x * true_y + pred_x * pred_y - intersection

  return intersection / union

In [None]:
from collections import defaultdict

In [None]:
def multi_iou(y_true, y_pred):
  canvas = np.zeros(640*640).reshape([640,640])
  true_canvas = canvas.copy()
  for t in y_true:
    true_canvas = cv2.rectangle(true_canvas,
                                (int(t[0]),int(t[1])),
                                (int(t[2]),int(t[3])),
                                1,
                                -1)
  pred_canvas = canvas.copy()
  for p in y_pred:
    pred_canvas = cv2.rectangle(pred_canvas,
                                (int(p[0]),int(p[1])),
                                (int(p[2]),int(p[3])),
                                2,
                                -1)
  actual = np.matrix(true_canvas)
  predictions = np.matrix(pred_canvas)
  combined = np.squeeze(np.asarray(actual + predictions))

  unique, counts = np.unique(combined, return_counts=True)
  mapping = defaultdict(lambda: 0, zip(unique, counts))

  TN = mapping[0]
  FN = mapping[1]
  FP = mapping[2]
  TP = mapping[3]

  added = 0

  if len(y_pred) == 0:
    return -1,-1,-1

  if (TP + FN) == 0 or (TP + FP) == 0:
    added += 1
  iou = float(TP) / float((FN + FP + TP + added))
  precision = float(TP) / float((TP + FP + added))
  recall = float(TP) / float((TP + FN + added))

  return iou, precision, recall

In [None]:
import torchvision
from torch.nn import Identity

In [None]:
%%shell
git clone https://github.com/rwightman/efficientdet-pytorch.git
cd efficientdet-pytorch

cp effdet/efficientdet.py ../
cp effdet/anchors.py ../
cp effdet/__init__.py ../
cp -r effdet/config/ ../

In [None]:
!pip install timm

In [None]:
!git clone https://github.com/rwightman/efficientdet-pytorch.git

In [None]:
!pip install effdet

In [None]:
import timm
from collections import OrderedDict

In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from effdet.config import get_fpn_config, get_efficientdet_config
from effdet.efficientdet import BiFpn, get_feature_info, create_model, HeadNet


class BiFPN_Backbone(nn.Module):
  def __init__(self, backbone, bifpn, layers, config):
    super().__init__()
    self.backbone = backbone
    self.bifpn = bifpn
    self.layers = layers
    self.config = config
    self.class_net = HeadNet(self.config, num_outputs=4)
    self.box_net = HeadNet(self.config, num_outputs=4)
  def forward(self, X):
    x = self.backbone(X)
    x = self.bifpn(x)
    
    cls = self.class_net(x)
    box = self.box_net(x)
    return cls, box

# class BiFPN_Backbone(nn.Module):
#   def __init__(self, backbone, bifpn, layers):
#     super().__init__()
#     self.backbone = backbone
#     self.bifpn = bifpn
#     self.layers = layers

#   def forward(self, X):
#     x = self.backbone(X)
#     x = self.bifpn(x)
#     d = {}
#     for i in range(len(self.layers)):
#       d[i] = x[i]
#       print(x[i].shape)

#     return d

def create_model_bifpn(num_classes):
    
    # load Faster RCNN pre-trained model
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, trainable_backbone_layers=5)
    config = get_efficientdet_config('resdet50')
    config['fpn_channels'] = 256
    config['num_levels'] = 5
    config['max_level'] = 7
    config['min_level'] = 3
    config['num_classes'] = 3
    backbone = create_model(
            'resnet50',
            features_only=True,
            out_indices=config.backbone_indices or (2,3,4),
            pretrained=False,
            **config.backbone_args
        )
    bifpn_config = get_fpn_config('bifpn_sum')
    # bifpn_config['num_levels'] = 3
    # bifpn_config['norm_layer'] = None
    # bifpn_config['norm_kwargs'] = None
    # bifpn_config['act_type'] = 'swish'
    config['fpn_config'] = bifpn_config
    bifpn = BiFpn(config, get_feature_info(backbone))
    # get the number of input features 
    #model.backbone.fpn = bifpn
    model = BiFPN_Backbone(backbone, bifpn, (2,3,4), config)
    #in_features = model.roi_heads.box_predictor.cls_score.in_features
    # define a new head for the detector with required number of classes
    #model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
    return model

model = create_model_bifpn(4).cuda()

In [None]:
from effdet.efficientdet import EfficientDet
from effdet.bench import DetBenchTrain
config = get_efficientdet_config('resdet50')
config['fpn_channels'] = 88
config['num_classes'] = 3
backbone = create_model(
        'resnet50',
        features_only=True,
        out_indices=(0,1,2,3,4),
        pretrained=False,
        **config.backbone_args
    )
bifpn_config = get_fpn_config('bifpn_sum')
# bifpn_config['num_levels'] = 3
# bifpn_config['norm_layer'] = None
# bifpn_config['norm_kwargs'] = None
# bifpn_config['act_type'] = 'swish'
config['fpn_config'] = bifpn_config

model = EfficientDet(config, True, False).cpu()

In [None]:
print(model)

In [None]:
train_dataset[0][1]['bbox'] = train_dataset[0][1]['bbox'].contiguous()

In [None]:
train_dataset[0][1]

In [None]:
print(config)

In [None]:
img.unsqueeze(0).shape

In [None]:
model = DetBenchTrain(model).cuda()

In [None]:
print(config.keys())

In [None]:
model.train()
sample = train_dataset[4]
img = sample[0]
target = sample[1]
target['bbox'].cuda()
target['cls'].cuda()

print(target)
model(img.unsqueeze(0).cuda(), target=target)

In [None]:
def read_data(X):
  images = list(image.cuda() for image in X[0])
  targets = [{k: v.cuda() for k, v in t.items()} for t in X[1]]
  return images, targets
# For Training
model.train()
X = next(iter(train_loader))
images, targets = read_data(X)
output = model(images,targets)   # Returns losses and detections
#print(output)
model.eval()
train_preds = model(images)
for i in range(len(train_preds)):
  num_true_boxes = len(targets[i]['boxes'])

  multi_iou(targets[i]['boxes'][0:num_true_boxes], 
            train_preds[i]['boxes'][0:num_true_boxes])


In [None]:
for i in range(len(train_preds)):
  num_true_boxes = len(targets[i]['boxes'])

  multi_iou(targets[i]['boxes'][0:num_true_boxes], 
            train_preds[i]['boxes'][0:num_true_boxes])

In [None]:
model.eval()
model.cuda()
images = [t.cuda() for t in images]
model(images)

In [None]:
model.to('cuda')
print('')

In [None]:
# model = torchvision.models.resnet50()
# req_layers = list(model.children())[:8]
# backbone = nn.Sequential(*req_layers)
# out = backbone(torch.unsqueeze(train_dataset[0][0], 0))

In [None]:
# model = torchvision.models.resnet50()
# for p in backbone.named_parameters():
#   print(p[1].requires_grad)

In [None]:
# out_c, out_h, out_w = out.size(dim=1), out.size(dim=2), out.size(dim=3)

In [None]:
from torchvision import ops

In [None]:
!mv /content/drive/MyDrive/utils.py /content/

In [None]:
import torch.nn.functional as F

In [None]:
model = torchvision.models.resnet50()
req_layers = list(model.children())[:8]
backbone = nn.Sequential(*req_layers)
out = backbone(torch.unsqueeze(train_dataset[0][0], 0))
out_c, out_h, out_w = out.size(dim=1), out.size(dim=2), out.size(dim=3)
dummy_img = torch.zeros((1, 3, 600, 600)).float()
print(out_c)
print(out_h)
print(out_w)
out_map = backbone(dummy_img)
print(out_map.size())

In [None]:
def read_data(X):
  images = list(image.cuda() for image in X[0])
  targets = [{k: v.cuda() for k, v in t.items()} for t in X[1]]
  return images, targets

In [None]:
from engine import evaluate

In [None]:
def train(model, train_loader, val_loader, epochs=300, lr=0.1):
  optimizer = torch.optim.Adam(model.parameters(), lr=lr)
  scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(epochs*0.5),int(epochs*0.75)], gamma=0.1)
  # loss_fn = nn.CrossEntropyLoss()
  classLossFunc = nn.CrossEntropyLoss()
  bboxLossFunc = nn.MSELoss()
  training_data = pd.DataFrame(columns=['Epoch', 'Train Loss', 'Train IOU', 'Train Precision'
                                        'Train Recall', 'Val Loss', 'Val IOU', 'Val Precision', 'Val Recall'])
  for e in range(epochs):
    model.train()
    tbar = tqdm(train_loader, position=0, leave=True)
    
    start = time.time()
    train_loss_temp = []
    train_iou_temp = []
    train_precision_temp = []
    train_recall_temp = []

    correct = 0
    total = 0
    for batch, (X) in enumerate(tbar):
      model.train()
      img, y_true = read_data(X)

      optimizer.zero_grad()
      loss = model(img, y_true)
      losses = sum(loss for loss in loss.values())
      losses.backward()
      optimizer.step()

      loss_history = losses.item()

      train_loss_temp.append(loss_history)


      model.eval()
      train_preds = model(img)
      iou = 0.0
      recall = 0.0
      precision = 0.0
      total_count = 0
      for i in range(len(train_preds)):
        num_true_boxes = len(y_true[i]['boxes'])
        if num_true_boxes > 0:
          temp_iou, temp_precision, temp_recall = multi_iou(y_true[i]['boxes'][0:num_true_boxes], 
                    train_preds[i]['boxes'][0:num_true_boxes])
          
          if temp_iou >= 0:
            iou += temp_iou
            recall += temp_recall
            precision += temp_precision
            total_count += 1

      if total_count > 0:
        iou /= total_count
        recall /= total_count
        precision /= total_count

      train_iou_temp.append(iou)
      train_precision_temp.append(precision)
      train_recall_temp.append(recall)

      tbar.set_description('Epoch: %i, Loss: %f, IoU: %f, Precision: %f, Recall: %f' % (e+1, np.round(np.mean(train_loss_temp),4),
                                                                                        np.round(np.mean(train_iou_temp),4),
                                                                                        np.round(np.mean(train_precision_temp),4),
                                                                                        np.round(np.mean(train_recall_temp),4)))

    model.eval()
    vbar = tqdm(val_loader, position=0, leave=True)
    val_loss_temp = []
    val_iou_temp = []
    val_precision_temp = []
    val_recall_temp = []
    with torch.no_grad():
      for batch, (X) in enumerate(vbar):
        img, y_true = read_data(X)

        y_preds = model(img, y_true)

        iou = 0.0
        recall = 0.0
        precision = 0.0
        total_count = 0
        for i in range(len(y_preds)):
          num_true_boxes = len(y_true[i]['boxes'])
          if num_true_boxes > 0:
            temp_iou, temp_precision, temp_recall = multi_iou(y_true[i]['boxes'][0:num_true_boxes], 
                      y_preds[i]['boxes'][0:num_true_boxes])
            
            if temp_iou >= 0:
              iou += temp_iou
              recall += temp_recall
              precision += temp_precision
              total_count += 1

        if total_count > 0:
          iou /= total_count
          recall /= total_count
          precision /= total_count
          

        val_iou_temp.append(iou)
        val_precision_temp.append(precision)
        val_recall_temp.append(recall)

        #losses = sum(loss for loss in loss.values())

        #loss_history = losses.item()

        #val_loss_temp.append(loss_history)

        vbar.set_description('Epoch: %i, Val IoU: %f, Val Precision: %f, Val Recall: %f' % (e+1,
                                                                                            np.round(np.mean(val_iou_temp),4),
                                                                                            np.round(np.mean(val_precision_temp),4),
                                                                                            np.round(np.mean(val_recall_temp),4)))
    evaluate(model, val_loader, 'cuda')
    end = time.time()
    training_data.at[e, 'Epoch'] = e+1
    training_data.at[e, 'Train Loss'] = np.round(np.mean(train_loss_temp),4)
    training_data.at[e, 'Train IOU'] = np.round(np.mean(train_iou_temp),4)
    training_data.at[e, 'Train Precision'] = np.round(np.mean(train_precision_temp),4)
    training_data.at[e, 'Train Recall'] = np.round(np.mean(train_recall_temp),4)
    training_data.at[e, 'Val Loss'] = np.round(np.mean(val_loss_temp),4)
    training_data.at[e, 'Val IOU'] = np.round(np.mean(val_iou_temp),4)
    training_data.at[e, 'Val Precision'] = np.round(np.mean(val_precision_temp),4)
    training_data.at[e, 'Val Recall'] = np.round(np.mean(val_recall_temp),4)
    scheduler.step()

  return training_data

In [None]:
training_data = train(model, train_loader, val_loader, epochs=300, lr=0.005)

In [None]:
!mkdir /content/drive/MyDrive/ATMLModels
torch.save(model.state_dict(), '/content/drive/MyDrive/ATMLModels/RCNN_FPN.pt')

In [None]:
!mkdir /content/drive/MyDrive/ATMLResults
training_data.to_csv('/content/drive/MyDrive/ATMLResults/RCNN_FPN_RESULTS.csv')