*based on https://towardsdatascience.com/building-your-own-object-detector-pytorch-vs-tensorflow-and-how-to-even-get-started-1d314691d4ae*

In [None]:
import os,sys, random
from shutil import copy
import glob
import pandas as pd
import xml.etree.ElementTree as ET

!pip install --upgrade wandb
!wandb login 1d9174288139c1b3c01e1aeed9df5ed89511e203

import wandb

Load dataset

In [None]:
!mkdir -p "/content/pytorch object detection" "/content/data"
!unzip -q "/content/drive/MyDrive/Masterthesis/datasets/sinsheim_random_C/sinsheim_random_C.zip"  -d /content/data #'/content/drive/My Drive/Masterthesis/datasets/sinsheim_random460/sinsheim_random460.zip' -d /content/data

os.chdir("/content/data")
!find . -name '.DS_Store' -type f -delete

Create csv for dataset from .xml annotations

In [None]:
def xml_to_csv(path):
  xml_list = []
  for xml_file in glob.glob(path + '/*.xml'):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for member in root.findall('object'):
      value = (root.find('filename').text,
               int(root.find('size')[0].text),
               int(root.find('size')[1].text),
               member[0].text,
               int(member[4][0].text),
               int(member[4][1].text),
               int(member[4][2].text),
               int(member[4][3].text)
               )
      xml_list.append(value)
  column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
  xml_df = pd.DataFrame(xml_list, columns=column_name)
  return xml_df

image_path = os.path.join(os.getcwd(), '/content/data/annotations')
xml_df = xml_to_csv(image_path)
xml_df.to_csv('/content/data/labels.csv', index=None)
print('Successfully converted xml to csv.')

Install torchvision

In [None]:
os.chdir("/content/pytorch object detection")

In [None]:
%%bash
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0
cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

In [None]:
import pycocotools
import numpy as np
import torch
import torch.utils.data
import torch.nn as nn

from PIL import Image, ImageDraw
import pandas as pd
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import utils

import transforms as T

#Configuring the model

Helper functions

In [None]:
def parse_one_annot(path_to_data_file, filename):
  data = pd.read_csv(path_to_data_file)
  boxes_array = data[data["filename"] == filename][["xmin", "ymin",        
  "xmax", "ymax"]].values
  return boxes_array

class SinsheimDataset(torch.utils.data.Dataset):
  def __init__(self, root, data_file, transforms=None):
    self.root = root
    self.transforms = transforms
    self.imgs = sorted(os.listdir(os.path.join(root, "images")))
    self.path_to_data_file = data_file

  def __getitem__(self, idx):
    # load images and bounding boxes
    img_path = os.path.join(self.root, "images", self.imgs[idx])
    img = Image.open(img_path).convert("RGB")
    box_list = parse_one_annot(self.path_to_data_file, 
    self.imgs[idx])
    boxes = torch.as_tensor(box_list, dtype=torch.float32)
  
    num_objs = len(box_list)
    # there is only one class
    labels = torch.ones((num_objs,), dtype=torch.int64)
    image_id = torch.tensor([idx])
    area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:,0])
    # suppose all instances are not crowd
    iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
    target = {}
    target["boxes"] = boxes
    target["labels"] = labels
    target["image_id"] = image_id
    target["area"] = area
    target["iscrowd"] = iscrowd

    if self.transforms is not None:
      img, target = self.transforms(img, target)

    return img, target

  def __len__(self):
    return len(self.imgs)

In [None]:
dataset = SinsheimDataset(root= "/content/data/",
data_file= "/content/data/labels.csv")
dataset.__getitem__(0)

In [None]:
def get_model(num_classes):
   # load an object detection model pre-trained on COCO
   model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
   # get the number of input features for the classifier
   in_features = model.roi_heads.box_predictor.cls_score.in_features
   # replace the pre-trained head with a new one
   model.roi_heads.box_predictor = FastRCNNPredictor(in_features,num_classes)
   return model

In [None]:
def get_transform(train):
   transforms = []
   # converts the image, a PIL image, into a PyTorch Tensor
   transforms.append(T.ToTensor())
   if train:
      # during training, randomly flip the training images
      # and ground-truth for data augmentation
      transforms.append(T.RandomHorizontalFlip(0.5))
   return T.Compose(transforms)

Preparing dataset

In [None]:
TRAINING_SPLIT = 0.8

train_split = round(len(dataset) * TRAINING_SPLIT)
# use our dataset and defined transformations
dataset = SinsheimDataset(root= "/content/data",
                         data_file= "/content/data/labels.csv",
                         transforms = get_transform(train=True))
dataset_test = SinsheimDataset(root= "/content/data",
                              data_file= "/content/data/labels.csv",
                              transforms = get_transform(train=False))
# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:train_split])
dataset_test = torch.utils.data.Subset(dataset_test, indices[train_split:])

print("We have: {} examples, {} are training and {} testing".format(len(indices), len(dataset), len(dataset_test)))

Loading the model

In [None]:

if torch.cuda.is_available():
  device = torch.device('cuda')
  print("GPU is available")
else:
  device = torch.device('cpu')
  print("GPU is not available!")
  
# our dataset has two classes only - damage and not damage
num_classes = 2
# get the model using our helper function
model = get_model(num_classes)
# move model to the right device
model.to(device)


In [None]:
for i in model.parameters():
  i.requires_grad = True
  

Set parameters

In [None]:
TRAINING_BATCH_SIZE = 2

LR = 0.005
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005

STEP_SIZE = 3
GAMMA = 0.1

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
              dataset, batch_size=TRAINING_BATCH_SIZE, shuffle=True, num_workers=4,
              collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
         dataset_test, batch_size=1, shuffle=False, num_workers=4,
         collate_fn=utils.collate_fn)

def set_parameters(LR,MOMENTUM,WEIGHT_DECAY,STEP_SIZE,GAMMA):
  # construct an optimizer
  params = [p for p in model.parameters() if p.requires_grad]
  optimizer = torch.optim.SGD(params, lr=LR,
                              momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  # and a learning rate scheduler which decreases the learning rate by # 10x every 3 epochs
  lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=STEP_SIZE,
                                                gamma=GAMMA)
  return optimizer, lr_scheduler



Engine fuctions

In [None]:
import torchvision.models.detection.mask_rcnn

from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
import utils, math, time

def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
  model.train()
  metric_logger = utils.MetricLogger(delimiter="  ")
  metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
  header = 'Epoch: [{}]'.format(epoch)

  lr_scheduler = None
  if epoch == 0:
    warmup_factor = 1. / 1000
    warmup_iters = min(1000, len(data_loader) - 1)

    lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

  for images, targets in metric_logger.log_every(data_loader, print_freq, header):
    images = list(image.to(device) for image in images)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    loss_dict = model(images, targets)

    losses = sum(loss for loss in loss_dict.values())

    # reduce losses over all GPUs for logging purposes
    loss_dict_reduced = utils.reduce_dict(loss_dict)
    losses_reduced = sum(loss for loss in loss_dict_reduced.values())

    loss_value = losses_reduced.item()

    if not math.isfinite(loss_value):
      print("Loss is {}, stopping training".format(loss_value))
      print(loss_dict_reduced)
      sys.exit(1)

    optimizer.zero_grad()
    losses.backward()
    optimizer.step()

    wandb.log({'loss': losses_reduced, 'epoch': epoch})

    if lr_scheduler is not None:
      lr_scheduler.step()

    metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
    metric_logger.update(lr=optimizer.param_groups[0]["lr"])


def _get_iou_types(model):
  model_without_ddp = model
  if isinstance(model, torch.nn.parallel.DistributedDataParallel):
    model_without_ddp = model.module
  iou_types = ["bbox"]
  if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
    iou_types.append("segm")
  if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
    iou_types.append("keypoints")
  return iou_types


@torch.no_grad()
def evaluate(model, data_loader, device):
  n_threads = torch.get_num_threads()
  # FIXME remove this and make paste_masks_in_image run on the GPU
  torch.set_num_threads(1)
  cpu_device = torch.device("cpu")
  model.eval()
  metric_logger = utils.MetricLogger(delimiter="  ")
  header = 'Test:'

  coco = get_coco_api_from_dataset(data_loader.dataset)
  iou_types = _get_iou_types(model)
  coco_evaluator = CocoEvaluator(coco, iou_types)

  for image, targets in metric_logger.log_every(data_loader, 100, header):
    image = list(img.to(device) for img in image)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    torch.cuda.synchronize()
    model_time = time.time()
    outputs = model(image)

    outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
    model_time = time.time() - model_time

    res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
    evaluator_time = time.time()
    coco_evaluator.update(res)
    evaluator_time = time.time() - evaluator_time
    metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

  # gather the stats from all processes
  metric_logger.synchronize_between_processes()
  print("Averaged stats:", metric_logger)
  coco_evaluator.synchronize_between_processes()

  # accumulate predictions from all images
  coco_evaluator.accumulate()
  coco_evaluator.summarize()
  torch.set_num_threads(n_threads)
  return coco_evaluator

Training

In [None]:
wandb.init(project="rdd-pytorch")
wandb.config.update({"Training batch size":TRAINING_BATCH_SIZE, 
                     "Learning rate" : LR, 
                     "Momentum" : MOMENTUM, 
                     "Weight decay":WEIGHT_DECAY,
                     "Step size":STEP_SIZE,
                     "Gamma":GAMMA
                     })

# let's train it for 10 epochs
num_epochs = 10
optimizer, lr_scheduler = set_parameters(LR,MOMENTUM,WEIGHT_DECAY,STEP_SIZE,GAMMA)

for epoch in range(num_epochs):
   # train for one epoch, printing every 10 iterations
   train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=20)
   # update the learning rate
   lr_scheduler.step()
   # evaluate on the test dataset
   evaluate(model, data_loader_test, device=device)



In [None]:
torch.save(model.state_dict(), "/content/drive/MyDrive/Masterthesis/Colab_Notebooks/object_detection/pytorch_transfer_learning/models/model_2")
wandb.save("/content/drive/MyDrive/Masterthesis/Colab_Notebooks/object_detection/pytorch_transfer_learning/models/model_2")

### Hyperparameter optimization

In [None]:
model_number = 8
for WEIGHT_DECAY in [0.005]:
  wandb.init(project="rdd-pytorch", reinit=True)
  for LR in [0.001,0.01,0.1]:
    wandb.config.update({"Training batch size":TRAINING_BATCH_SIZE, 
                      "Learning rate" : LR, 
                      "Momentum" : MOMENTUM, 
                      "Weight decay":WEIGHT_DECAY,
                      "Step size":STEP_SIZE,
                      "Gamma":GAMMA,
                      "Model number": model_number
                      })
    optimizer, lr_scheduler = set_parameters(LR,MOMENTUM,WEIGHT_DECAY,STEP_SIZE,GAMMA)
    for epoch in range(10):
      # train for one epoch, printing every 10 iterations
      train_one_epoch(model, optimizer, data_loader, device, epoch,
                    print_freq=10)
      # update the learning rate
      lr_scheduler.step()
      # evaluate on the test dataset
      evaluate(model, data_loader_test, device=device)
    
    torch.save(model.state_dict(), "/content/drive/My Drive/Masterthesis/Colab_Notebooks/transferlearning/pytorch_transfer_learning/models/model_{}".format(model_number))
    model_number += 1
  wandb.join()


#Predictions with the Model

In [None]:
def show_inference(idx,dataset,model_number = "1",threshold = 0.8):
  img, _ = dataset_test[idx]
  label_boxes = np.array(dataset[idx][1]["boxes"])
  #put the model in evaluation mode
  loaded_model.eval()
  with torch.no_grad():
    prediction = loaded_model([img])
  image = Image.fromarray(img.mul(255).permute(1, 2,0).byte().numpy())
  draw = ImageDraw.Draw(image)
  # draw groundtruth
  for elem in range(len(label_boxes)):
    draw.rectangle([(label_boxes[elem][0], label_boxes[elem][1]),
    (label_boxes[elem][2], label_boxes[elem][3])],
    outline ="green", width =3)
  for element in range(len(prediction[0]["boxes"])):
    boxes = prediction[0]["boxes"][element].cpu().numpy()
    score = np.round(prediction[0]["scores"][element].cpu().numpy(),
                    decimals= 4)
    if score > threshold:
      draw.rectangle([(boxes[0], boxes[1]), (boxes[2], boxes[3])], 
      outline ="red", width =3)
      draw.text((boxes[0], boxes[1]), text = str(score))
  image.save("/content/drive/MyDrive/Masterthesis/Colab_Notebooks/object_detection/pytorch_transfer_learning/models/model_{}_results/image{}.jpg".format(model_number,idx))
  return image

Test all test-images

In [None]:
MODEL_NUMBER = 2

loaded_model = get_model(num_classes = 2)
loaded_model.load_state_dict(torch.load("/content/drive/MyDrive/Masterthesis/Colab_Notebooks/object_detection/pytorch_transfer_learning/models/model_"+str(MODEL_NUMBER)))

from tqdm import tqdm
try:
  os.mkdir("/content/drive/MyDrive/Masterthesis/Colab_Notebooks/object_detection/pytorch_transfer_learning/models/model_{}_results".format(str(MODEL_NUMBER)))
except:
  pass
for idx in tqdm(range(len(dataset_test))):
  show_inference(idx, dataset_test, str(MODEL_NUMBER),0.5)

# Get damage features of all Sinsheim images

Load cropped images

In [None]:
import csv,tqdm

csv_path = '/content/drive/My Drive/Masterthesis/datasets/ka_si_BC_IRI.csv'
zip_path = '/content/drive/My\ Drive/Masterthesis/datasets/ka_si_C.zip'
datadir = '/content/data/'
img_folder_path = os.path.join(datadir,zip_path.split("/")[-1].split(".")[0])

In [None]:
!unzip -q $zip_path -d $datadir
os.chdir(datadir)
!find . -name '.DS_Store' -type f -delete
os.chdir("/content")

Extract information

In [None]:
def calculate_diag(box):
  diag = np.sqrt((box[0]-box[2])**2 + (box[1]-box[3])**2)
  return diag

def get_features(pred_result,threshold = 0.5):
  sum_diagonals = 0
  num_damages = 0
  num_horizontal = 0
  num_vertical = 0
  sum_horizontal = 0
  sum_vertical = 0 
  scores = pred_result["scores"].tolist()
  boxes = pred_result["boxes"].tolist()
  for i,score in enumerate(scores):
    if score >= threshold:
      num_damages += 1
      sum_diagonals += calculate_diag(boxes[i])
      #print(boxes[i])
      horizontal = abs(boxes[i][0] - boxes[i][2])
      vertical = abs(boxes[i][1] - boxes[i][3])
      if horizontal >= 7*vertical:
        num_horizontal += 1
        sum_horizontal += horizontal
      elif vertical >= 7*horizontal:
        num_vertical += 1
        sum_vertical += vertical
  return num_damages, int(sum_diagonals), num_horizontal, num_vertical, sum_horizontal, sum_vertical
  
def read_csv(path):
  with open(path, mode='r') as infile:
    reader = csv.reader(infile)
    data_list = [rows for rows in reader]
  return data_list

In [None]:
feature_list = []
csv_list = read_csv(csv_path)
model = loaded_model.cuda()
model.eval()
with torch.no_grad():
  for img_name,iri_val,_ in tqdm.tqdm(csv_list):
    img_path = os.path.join(img_folder_path,img_name)
    img = Image.open(img_path).convert("RGB")
    img,_ = T.ToTensor()(img,"")
    img = img.unsqueeze(0).to('cuda')
    prediction = model(img)
    num_damages, sum_diagonals, num_horizontal, num_vertical, sum_horizontal, sum_vertical = get_features(prediction[0])
    feature_list.append([img_name,iri_val,num_damages,sum_diagonals, num_horizontal, num_vertical, sum_horizontal, sum_vertical])
    
features_df = pd.DataFrame(feature_list)
features_df.to_csv("ka_si_BC_IRI_annotated.csv", index = False,header = False)

### Save state dict for imge classification with IRI
!!! This step changes the model !!!!

In [None]:
!mkdir -p "/content/models/"

model_backbone = model.backbone.body
model_backbone.add_module('avgpool',nn.AdaptiveAvgPool2d(output_size=(1,1)))
model_backbone.add_module('fc',nn.Linear(2048,1000,True))
torch.save(model_backbone.state_dict(), "/content/drive/MyDrive/Masterthesis/datasets/trained_resnet50_backbone_2")