In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname,_,filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(filename)

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
import torchvision 
from torchvision import datasets, transforms, models
from torch import optim as optim
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import matplotlib
import matplotlib.patches as patches
import glob
import xml.etree.ElementTree as ET
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

In [None]:
dataset = datasets.ImageFolder("../input/stanford-dogs-dataset/images/Images/")

In [None]:
label = list(sorted(os.listdir("../input/stanford-dogs-dataset/images/Images/")))

In [None]:
list_annotations = []
list_images = []

In [None]:
for l in label:
    labels_annotation = list(sorted(os.listdir(os.path.join("../input/stanford-dogs-dataset/annotations/Annotation", l))))
    images = list(sorted(os.listdir(os.path.join("../input/stanford-dogs-dataset/images/Images", l))))
    list_annotations += labels_annotation
    list_images += images

In [None]:
class Dogs(torch.utils.data.Dataset):
    def __init__(self, dataset, transform):
        self.img_path = "../input/stanford-dogs-dataset/images/Images/"
        self.annotation_path = "../input/stanford-dogs-dataset/annotations/Annotation/"
        self.dataset = dataset
        self.transform = transform
        
    def __getitem__(self, idx):
        image, type_  = self.dataset[idx]
 
        image = cv2.imread(self.img_path + label[type_] + "/" + list_images[idx], cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
#         image = cv2.resize(image, (256, 256), interpolation = cv2.INTER_AREA)
        image /= 255.
        
        image_id = torch.tensor([idx])
#         label_image = torch.as_tensor(type_, dtype=torch.int64).view(-1,)
        label_image = torch.ones(1, dtype=torch.int64)
        
        tree = ET.parse(self.annotation_path + label[type_] + "/" + list_annotations[idx])
        root = tree.getroot()
        boxes = []
        value = []
        for x in root[5][4][:]:
            value.append(int(x.text))
        boxes.append(value)
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
            
        area = (boxes[0][3]-boxes[0][1])*(boxes[0][2]-boxes[0][0])
        area = torch.as_tensor(area, dtype=torch.float32).view(-1, )
        
        iscrowd = torch.zeros((1, ), dtype=torch.int64)
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = label_image
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        
        if self.transform is not None:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': label_image
            }
            sample = self.transform(**sample)
            image = sample['image']
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
            target['boxes'] = target['boxes'].float()
        
        return image, target
    
    
    def __len__(self):
        return len(self.dataset)

In [None]:
# Albumentations
def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        A.Resize(256, 256),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 't', 'label_fields': ['labels']})


In [None]:
dog_dataset = Dogs(dataset, get_train_transform())

In [None]:
rs = dog_dataset.__getitem__(1000)

In [None]:
print(rs)

In [None]:
fig, ax = plt.subplots()
ax.imshow(rs[0].numpy().transpose(1, 2,0))
rect = patches.Rectangle((rs[1]["boxes"][0][0], rs[1]["boxes"][0][1]), (rs[1]["boxes"][0][2]-rs[1]["boxes"][0][0]), (rs[1]["boxes"][0][3]-rs[1]["boxes"][0][1]), linewidth=2, edgecolor="r", facecolor='none')
ax.add_patch(rect)
ax.text(rs[1]["boxes"][0][0], rs[1]["boxes"][0][1]-5, label[rs[1]["labels"]], color="r", fontsize=10)

In [None]:
def my_collate(batch):
    return tuple(zip(*batch))

In [None]:
dt_loader = torch.utils.data.DataLoader(dog_dataset, batch_size=32, shuffle=True, collate_fn=my_collate)

In [None]:
itr = iter(dt_loader)
imgs, targets = next(itr)
images = list(image for image in imgs)
targets = [{k: v for k, v in t.items()} for t in targets]

In [None]:
targets

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

In [None]:
# backbone = torchvision.models.mobilenet_v2(pretrained=True).features
# backbone.out_channels = 1280

# backbone

In [None]:
# anchor_generator = torchvision.models.detection.rpn.AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
#                                                                    aspect_ratios=((0.5, 1.0, 2.0),))
# roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2)
# model = models.detection.FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler)

In [None]:
# model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

In [None]:
def predict(image, model, device, detection_threshold):
    image = transform(image).to(device)
    image = image.unsqueeze(0)
    out = model(image)
    
    pred_classes = [coco_names[i] for i in out[0]['labels'].cpu().numpy()]
    
    pred_scores = out[0]['scores'].detach().cpu().numpy()
    pred_bboxes = out[0]['boxes'].detach().cpu().numpy()
    
    boxes = pred_bboxes[pred_scores >= detection_threshold].astype(np.int32)
    
    print(out)
    
    return boxes, pred_classes, out[0]['labels']

In [None]:
def draw_bboxes(boxes, classes, labels, image):
#     image = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2RGB)
#     image = image.cpu()
    fig, ax = plt.subplots()
    ax.imshow(image)
    for i, box in enumerate(boxes):
        color = COLORS[labels[i]]
        rect = patches.Rectangle((box[0], box[1]), (box[2]-box[0]), (box[3]-box[1]), linewidth=2, edgecolor=color, facecolor='none')
        ax.add_patch(rect)
        ax.text(box[0], box[1]-5, classes[i], color=color, fontsize=10)
#         cv2.rectangle(image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, 2)
#         cv2.putText(image, classes[i], (int(box[0]), int(box[1]-5)),
#                     cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, 
#                     lineType=cv2.LINE_AA)

In [None]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0


In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [None]:
# num_epochs = 2
# loss_hist = Averager()
# model = model.to(device)
# itr = 1
# from torch import optim

# params = [p for p in model.parameters() if p.requires_grad]
# optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
# for epoch in range(num_epochs):
#     for images, targets in dt_loader:
#         images = list(image.to(device) for image in images)
#         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
#         loss_dict = model(images, targets)
#         losses = sum(loss for loss in loss_dict.values())
#         loss_value = losses.item()
#         loss_hist.send(loss_value)
#         optimizer.zero_grad()
#         losses.backward()
#         optimizer.step()
#         if itr % 50 == 0:
#             print(f"Iteration #{itr} loss: {loss_value}")
#         itr += 1
#     # update the learning rate
#     if lr_scheduler is not None:
#         lr_scheduler.step()
#     print(f"Epoch #{epoch} loss: {loss_hist.value}")