In [None]:
import torch
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from PIL import Image
from xml.dom.minidom import parse
torch.manual_seed(42)

In [None]:
!pip install pycocotools

In [None]:
from shutil import copyfile

# copy our file into the working directory (make sure it has .py suffix)
copyfile(src = "../input/scripts1/engine.py", dst = "../working/engine.py")
copyfile(src = "../input/scripts1/utils.py", dst = "../working/utils.py")
copyfile(src = "../input/scripts1/transforms.py", dst = "../working/transforms.py")
copyfile(src = "../input/scripts1/coco_utils.py", dst = "../working/coco_utils.py")
copyfile(src = "../input/scripts1/coco_eval.py", dst = "../working/coco_eval.py")

# import all our functions
from engine import *
import transforms as T
import utils

In [None]:
import math
import sys
import time
import torch

import torchvision.models.detection.mask_rcnn

from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
import utils

def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
  
def _get_iou_types(model):
    model_without_ddp = model
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model_without_ddp = model.module
    iou_types = ["bbox"]
    if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
        iou_types.append("segm")
    if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
        iou_types.append("keypoints")
    return iou_types

@torch.no_grad()
def evaluate(model, data_loader, device):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for image, targets in metric_logger.log_every(data_loader, 100, header):
        image = list(img.to(device) for img in image)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(image)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator


In [None]:
class MarkDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "train/images"))))
        self.bbox_xml = list(sorted(os.listdir(os.path.join(root, "train/annotations/PASCAL_VOC_xml"))))
 
    def __getitem__(self, idx):
        # load images and bbox
        img_path = os.path.join(self.root, "train/images", self.imgs[idx])
        bbox_xml_path = os.path.join(self.root, "train/annotations/PASCAL_VOC_xml", self.bbox_xml[idx])
        img = Image.open(img_path).convert("RGB")        
        
        # Read file, VOC format dataset label is xml format file
        dom = parse(bbox_xml_path)
        # Get Document Element Object
        data = dom.documentElement
        # Get objects
        objects = data.getElementsByTagName('object')        
        # get bounding box coordinates
        boxes = []
        labels = []
        for object_ in objects:
            # Get the contents of the label
            name = object_.getElementsByTagName('name')[0].childNodes[0].nodeValue  # Is label, mark_type_1 or mark_type_2
            
            # labels.append(np.int(name[-1]))  # Background label is 0, mark_type_1 and mark_type_2 labels are 1 and 2, respectively
            
            bndbox = object_.getElementsByTagName('bndbox')[0]
            xmin = int(np.float(bndbox.getElementsByTagName('xmin')[0].childNodes[0].nodeValue))
            ymin = int(np.float(bndbox.getElementsByTagName('ymin')[0].childNodes[0].nodeValue))
            xmax = int(np.float(bndbox.getElementsByTagName('xmax')[0].childNodes[0].nodeValue))
            ymax = int(np.float(bndbox.getElementsByTagName('ymax')[0].childNodes[0].nodeValue))
            boxes.append([xmin, ymin, xmax, ymax]) 

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((len(boxes),), dtype=torch.int64)
        # labels = torch.as_tensor(labels, dtype=torch.int64)        
 
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((len(objects),), dtype=torch.int64)
 
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        # Since you are training a target detection network, there is no target [masks] = masks in the tutorial
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
 
        if self.transforms is not None:
            # Note that target (including bbox) is also transformed\enhanced here, which is different from transforms from torchvision import
            # Https://github.com/pytorch/vision/tree/master/references/detectionOfTransforms.pyThere are examples of target transformations when RandomHorizontalFlip
            img, target = self.transforms(img, target)
 
        return img, target
 
    def __len__(self):
        return len(self.imgs)

# df = MarkDataset(r'../input/evraz-data/data_task2/')
# df[50]
# (<PIL.Image.Image image mode=RGB size=1920x1080 at 0x7FA8341A4F10>,
#  {'boxes': tensor([[ 888.,  485., 1059., 1011.],
#           [ 742.,  507., 1018., 1049.]]),
#   'labels': tensor([1, 1]),
#   'image_id': tensor([50]),
#   'area': tensor([ 89946., 149592.]),
#   'iscrowd': tensor([0, 0])})

In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 
      
def get_object_detection_model(num_classes):
    # load an object detection model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
#     model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=True)
#     model = torchvision.models.detection.n(pretrained=True)
    
    num_classes = 2 
 
    in_features = model.roi_heads.box_predictor.cls_score.in_features
 
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
 
    return model

In [None]:
import utils
import transforms as T
 
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
 
    return T.Compose(transforms)

In [None]:
root = r'../input/evraz-data/data_task2/'
import os
dataset = MarkDataset(root, get_transform(train=True))
dataset_test = MarkDataset(root, get_transform(train=False))

# split the dataset in train and test set
torch.manual_seed(42)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_valid = torch.utils.data.Subset(dataset_test, indices[-50:])

data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=4, shuffle=True, num_workers=2,
    collate_fn=utils.collate_fn)

data_loader_valid = torch.utils.data.DataLoader(
    dataset_valid, batch_size=2, shuffle=False, num_workers=2,
    collate_fn=utils.collate_fn)

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2

model = get_object_detection_model(num_classes)

model.to(device)


params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0005,
                            momentum=0.9, weight_decay=0.0005)


lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=5,
                                               gamma=0.1)

In [None]:
num_epochs = 2
import math
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=50)
    
    lr_scheduler.step()
    
    evaluate(model, data_loader_valid, device=device)

In [None]:
torch.save(model, f'model_epoch_{num_epochs}.pkl')

# Predict

In [None]:
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "test/images"))))
        # self.bbox_xml = list(sorted(os.listdir(os.path.join(root, "train/annotations/PASCAL_VOC_xml"))))
 
    def __getitem__(self, idx):
        # load images and bbox
        image_idx = self.imgs[idx]
        img_path = os.path.join(self.root, "test/images", self.imgs[idx])
        # bbox_xml_path = os.path.join(self.root, "train/annotations/PASCAL_VOC_xml", self.bbox_xml[idx])
        img = Image.open(img_path).convert("RGB") 

        image_id = torch.tensor([idx])


        target = {'labels': torch.as_tensor([[0]], dtype=torch.float32),
                  'boxes': torch.as_tensor([[0, 0, 0, 0]], dtype=torch.float32)}

        image_dict = {
                    'image': img,
                    'bboxes': target['boxes'],
                    'labels': target['labels']
                    }
        # img = self.transforms(**image_dict)['image']  
        img = self.transforms(img)        
        return img, image_idx

    def __len__(self):
        return len(self.imgs)  

In [None]:
trans = transforms.Compose([transforms.ToTensor()])
root = r'../input/evraz-data/data_task2/'
test_dataset = TestDataset(root, transforms=trans)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=2, shuffle=False)

detection_threshold = 0.5
results = []
device = 'cuda'

results = []
values = {}

for images, image_ids in test_loader:
    images = list(image.to(device) for image in images)
    outputs = model(images)
    for i, image in enumerate(images):
        boxes = outputs[i]['boxes'].data.cpu().numpy()
        scores = outputs[i]['scores'].data.cpu().numpy()
        boxes = boxes[scores >= 0.9].astype(np.int32)
        scores = scores[scores >= detection_threshold]

        image_id = image_ids[i]
        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]         
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]  # #Convert the box formate to [Xmin,Ymin,W,H]

        values[image_id] = {
            'boxes' : boxes,
            'score' :scores
        }

        result = {
            'image_id' : image_id,
            'boxes' : boxes,
            'score' : scores
        }
        results.append(result)


In [None]:
def sort_bboxes(x):
    if x.shape[1] > 3:    
        x = sorted(x, key=lambda z: (z[0], z[1], z[2], z[3]))
        x = np.array(x)
    return x

In [None]:
import json
with open('../input/samplesubmit/submission_example.json') as train:
    example = json.load(train)
    
for i in range(len(example['images'])):
    # id in images == image_id in annotations
    file_name = example['images'][i]['file_name']
    image_id = example['images'][i]['id']
    box = values[file_name]['boxes']
    box = sort_bboxes(box)
    
    # box = list(box[0])
    score = values[file_name]['score']
    score = list(score)
    # print(image_id)

    for j in range(len(example['annotations'])):
        if example['annotations'][j]['image_id'] == image_id:
            # print(example['annotations'][j]['bbox'])
            for l in range(len(box)):
                # print(box[l])
                example['annotations'][j]['bbox'] = list(map(int, box[l]))
            # print(example['annotations'][j]['bbox'])
            
with open('submit10.json', 'w') as train:
    json.dump(example, train)