In [1]:
import os
import random
import math

import numpy as np
import pandas as pd
from PIL import Image, ImageDraw


import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

import torchvision
from torchvision import datasets, models, transforms
from torchvision.models.detection.retinanet import RetinaNet
from torchvision.models.detection.faster_rcnn import FasterRCNN
import  torchvision.transforms.functional as F
from torchvision.models.detection.anchor_utils import AnchorGenerator


from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import xml.etree.ElementTree as ET
import collections
from torchvision.datasets.voc import VisionDataset

from functions import *
from functions_torch import *

In [2]:
params = {}
params['target_size']=(2000,1500)

DSRoots = ['../../ladd-and-weights/dataset/'+d for d in
    ['full_train_ds', 'LADD/spring_korolev_2019', 'LADD/summer_moscow_2019', 'LADD/summer_tambov_2019', 'LADD/winter_moscow_2018']]

In [3]:
anchor_sizes = tuple((x, int(x * 2 ** (1.0 / 3)), int(x * 2 ** (2.0 / 3))) for x in [16, 32, 64, 128, 256])
aspect_ratios = ((0.5, 1.0, 2.0, 3.0),) * len(anchor_sizes)
anchor_generator = AnchorGenerator(
    anchor_sizes, aspect_ratios
)

model = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=False, num_classes=2, pretrained_backbone=False, 
                                                           min_size=params['target_size'][0], max_size = params['target_size'][1],
                                                           anchor_generator = anchor_generator)
model.load_state_dict(torch.load('resnet50_RN_LADD_epoch_18.pth'), strict=True )
model = model.to(torch.device('cuda'))
model.eval()


RetinaNet(
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d(256, e

In [4]:
# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, num_classes=2, pretrained_backbone=True, 
#                                                            min_size=params['target_size'][0], max_size = params['target_size'][1],
#                                                            trainable_backbone_layers = 5)
# model.load_state_dict(torch.load('/app/host/lacmus/weights/resnet50_FRCNN_LADD_epoch_9.pth'), strict=True)
# model = model.to(torch.device('cuda'))

In [5]:
class LADDDataSET(torchvision.datasets.VisionDataset):
    def __init__(
            self,
            root: str,
            image_set: str,
            transforms: Optional[Callable] = None):     
        super(LADDDataSET, self).__init__(root, transforms=transforms)
        self.image_set = image_set

        voc_root = root
        image_dir = os.path.join(voc_root, 'JPEGImages')
        annotation_dir = os.path.join(voc_root, 'Annotations')

        if not os.path.isdir(voc_root):
            raise RuntimeError('Dataset not found or corrupted.')

        splits_dir = os.path.join(voc_root, 'ImageSets/Main')
        split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')

        with open(os.path.join(split_f), "r") as f:
            file_names = [x.strip() for x in f.readlines()]

        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
        self.annotations = [os.path.join(annotation_dir, x + ".xml") for x in file_names]
        assert (len(self.images) == len(self.annotations))
        
    def __getitem__(self, index: int) -> Tuple[Any, Any]:
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is a dictionary of the XML tree.
        """
        img = Image.open(self.images[index]).convert('RGB')
        description = LADDDataSET.parse_voc_xml(
            ET.parse(self.annotations[index]).getroot())

        # get bounding box coordinates 
        num_objs = len(description['annotation']['object'])
        boxes = []
        for l in description['annotation']['object']:
            bb = l['bndbox']
            boxes.append([int(bb['xmin']), int(bb['ymin']), int(bb['xmax']), int(bb['ymax'])])

        target = {}
        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)                
        target["labels"] = labels = torch.ones((num_objs,), dtype=torch.int64)  # there is only one class   
        
        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target
    

    def __len__(self) -> int:
        return len(self.images)

    @staticmethod
    def parse_voc_xml(node: ET.Element) -> Dict[str, Any]:
        voc_dict: Dict[str, Any] = {}
        children = list(node)
        if children:
            def_dic: Dict[str, Any] = collections.defaultdict(list)
            for dc in map(LADDDataSET.parse_voc_xml, children):
                for ind, v in dc.items():
                    def_dic[ind].append(v)
            if node.tag == 'annotation':
                def_dic['object'] = [def_dic['object']]
            voc_dict = {
                node.tag:
                    {ind: v[0] if len(v) == 1 else v
                     for ind, v in def_dic.items()}
            }
        if node.text:
            text = node.text.strip()
            if not children:
                voc_dict[node.tag] = text
        return voc_dict

In [None]:
# for ds in DSRoots:
#     dataset = LADDDataSET(ds,'test',get_transform(train=False,target_size=params['target_size'])) 
#     data_loader = torch.utils.data.DataLoader(
#         dataset, batch_size=1, shuffle=False, num_workers=1,collate_fn=collate_fn)
#     inference_res = evaluate(model,data_loader)
#     print("Inference for %s, computing mAp : "%ds)
#     print(evaluate_res(inference_res, iou_threshold = 0.5, score_threshold = 0.05))    
#     print(evaluate_res(inference_res, iou_threshold = 0.6, score_threshold = 0.05))
    


In [6]:
dataset = LADDDataSET(DSRoots[4],'test',get_transform(train=False,target_size=params['target_size'])) 
data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, shuffle=False, num_workers=1,collate_fn=collate_fn)

inference_res = evaluate(model,data_loader)


In [7]:
inference_res

[(({'boxes': tensor([[1321.8750,  274.6667, 1348.8750,  312.6667]]),
    'labels': tensor([1])},),
  [{'boxes': tensor([[1323.5446,  275.2711, 1350.2203,  315.9069],
            [ 119.2671, 1227.5459,  171.1528, 1277.9830],
            [ 240.5078, 1147.3656,  270.7879, 1205.0126],
            [ 140.9097, 1231.9814,  173.9967, 1285.4724]]),
    'scores': tensor([0.9568, 0.3488, 0.1418, 0.0771]),
    'labels': tensor([1, 1, 1, 1])}]),
 (({'boxes': tensor([[ 798.7500, 1357.3334,  837.7500, 1396.6666],
            [ 829.1250,  777.3333,  873.3750,  818.0000],
            [ 886.5000,   34.6667,  916.5000,   77.3333]]),
    'labels': tensor([1, 1, 1])},),
  [{'boxes': tensor([[ 796.5808, 1354.9255,  836.5349, 1395.8972],
            [ 828.8597,  777.9426,  872.5923,  819.8660],
            [ 887.7839,   37.1435,  914.8092,   76.3933]]),
    'scores': tensor([0.9452, 0.8701, 0.8424]),
    'labels': tensor([1, 1, 1])}]),
 (({'boxes': tensor([[ 907.1250, 1071.3334,  939.3750, 1108.6666]]),
    

In [8]:
# inference_res
evaluate_res(inference_res, iou_threshold = 0.5, score_threshold = 0.05)

(0.9660113110583429, 0.6716417910447762)

In [None]:
img = Image.open('../../ladd-and-weights/dataset/MAX_0147.JPG').convert('RGB')

In [None]:
img.show()

In [None]:
transformed = get_transform(train=False,target_size=params['target_size'])(img,{'boxes':np.array([])})

In [None]:
# list(img.to(device) for img in images)
at_device = [transformed[0].to('cuda')]

In [None]:
outputs = model(at_device)

In [None]:
outputs

In [None]:
im = F.to_pil_image(transformed[0])
draw = ImageDraw.Draw(im)

for idx in range(len(outputs[0]['boxes'])):
    width = math.ceil(outputs[0]['scores'][idx]*10)
    bb = outputs[0]['boxes'][idx]
    draw.line([(bb[0], bb[1]), (bb[0], bb[3]), (bb[2], bb[3]),
               (bb[2], bb[1]), (bb[0], bb[1])], width=width, fill=(255, 0, 0))


im.show()