In [1]:
%load_ext autoreload
%autoreload 2    
%matplotlib inline

In [12]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import os
import numpy as np
import glob
from PIL import Image, ExifTags

# pip install torchsummary
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.functional as fn
from torchvision import models
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from time import time
import pandas as pd
import random
import itertools

import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import csv

from matplotlib.patches import Polygon, Rectangle
import pylab
pylab.rcParams['figure.figsize'] = (6,6)

# Own imports 
from config import * 
from utils import *
from data_loader import TacoDataset
from eval import *

# speed-up using multithreads
cv2.setUseOptimized(True);
cv2.setNumThreads(8);

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
img_size = 512

train_transform = A.Compose([
    A.Resize(img_size, img_size),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=30, p=0.7),
    A.RandomBrightnessContrast(p=0.2),
    A.RGBShift(10, 10, 10, p=0.3),
    A.GaussNoise(p=0.5),
    A.Normalize(), # If you want to visualize - comment this line 
    ToTensorV2(),
], bbox_params=A.BboxParams(format='coco',
                            label_fields=['labels'],
                            min_visibility=0.3, # min visibility of the original area in case of a crop
                           )
)

test_transform = A.Compose([
    A.Resize(img_size, img_size),
    A.Normalize(),
    ToTensorV2(),
], bbox_params=A.BboxParams(format='coco',
                            label_fields=['labels'],
                           )
)

In [4]:
BATCH_SIZE = 16

In [5]:
trainset = TacoDataset( 'train', transforms=train_transform, test_size=0.2) # test_transform for no augment
valset   = TacoDataset('val', transforms=test_transform, test_size=0.2)
testset  = TacoDataset('test', transforms=test_transform, test_size=0.2)

train_loader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=12, collate_fn=lambda x: x)# persistent_workers=True, pin_memory=True)
val_loader = DataLoader(valset, batch_size=1, shuffle=False, num_workers=12, collate_fn=lambda x: x)# persistent_workers=True, pin_memory=True)
test_loader = DataLoader(testset, batch_size=1, shuffle=False, num_workers=12, collate_fn=lambda x: x)# persistent_workers=True, pin_memory=True)

In [6]:
loss_function = nn.BCELoss()

model = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)
model = transfer_model_set(model, freeze_convs=False)
optimizer = torch.optim.Adam(model.parameters(), HEAD_LEARNING_RATE)

lr_scheduler = None

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /zhome/dc/f/181253/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|███████████████████████████████████████████████████████████████████████████████| 83.3M/83.3M [00:00<00:00, 177MB/s]


In [10]:
NUM_EPOCHS = 3
EDGE_BOXES_IMG_SIZE_IN = img_size
EDGE_BOXES_IMG_SIZE_OUT = 150

In [9]:
out_dict = {
    'train_loss': [],
    'train_acc': [],
    'test_map': [],
    'test_map50': [],
    'test_map_small': [],
    'test_map_large': []
}

for epoch in range(NUM_EPOCHS):
    
    model.train()
    print(f"EPOCH {epoch}/{NUM_EPOCHS}")

    train_loss = []
    train_correct = 0
    train_len = 0
    
    for minibatch_no, batch in tqdm(enumerate(train_loader), total=len(train_loader)):
        
        images = [image for image, _, _ in batch]
        bboxes = [bbox for _, bbox, _ in batch]
        labels = [label for _, _, label in batch]
        
        # Selective search
                
        # Edge boxes
        cropped_images_all, proposals_all, predictions_all = edge_boxes_train(images, bboxes, img_size = EDGE_BOXES_IMG_SIZE_OUT)         
        if len(cropped_images_all) ==0:
            print('no boxes detected')
            continue 
        data, target = torch.stack(cropped_images_all).to(device), torch.FloatTensor(predictions_all).to(device)
        
        # CNN
        optimizer.zero_grad()
        output = model(data)[:,0]
        loss = loss_function(output, target)
        loss.backward()
        optimizer.step()

        train_loss.append(loss.item())
        predicted = output > 0.5
        correct_in_the_item = (target==predicted).sum().cpu().item()
        train_correct += correct_in_the_item
        train_len += data.shape[0]
        print(f'train_loss: {loss:.5f}        Accuracy {correct_in_the_item / len(target):.3f}')

        # for now we break
        
    # Test evaluation
    model.eval()
    for batch in tqdm(val_loader, total=len(val_loader)): # Keep loader at 1 
        test_images, test_bboxes, test_labels = batch[0]
        
        # Selective search

        test_cropped_images_all, test_proposals_all = edge_boxes_test(test_images, test_bboxes, img_size = EDGE_BOXES_IMG_SIZE_OUT) 
        test_data = torch.stack(test_cropped_images_all).to(device)
        test_proposals_all = test_proposals_all[0]
        
        with torch.no_grad():
            outputs = model(test_data)[:,0]
        
        bboxes_xywh = torch.stack(test_proposals_all).to(device).to(float)
        bboxes_xyxy = copy.deepcopy(bboxes_xywh)
        bboxes_xyxy[:, 2] = bboxes_xyxy[:, 2] + bboxes_xyxy[:, 0]
        bboxes_xyxy[:, 3] = bboxes_xyxy[:, 3] + bboxes_xyxy[:, 1]
        
        
        bboxes_indices = torchvision.ops.nms(bboxes_xyxy, outputs, iou_threshold=0.1)
        
        final_bboxes = bboxes_xywh[bboxes_indices]
        
        outputs = outputs[bboxes_indices]
        
        # Reshaping
        outputs = outputs.tolist()
        
        pred = [dict(
            boxes=final_bboxes,
            scores=torch.FloatTensor(outputs).to(device),
            labels=torch.FloatTensor(np.ones(len(outputs))).to(device) # Simplification for Binary
        )]
        
        target = [dict(
            boxes=torch.FloatTensor(test_bboxes).to(device),
            labels=torch.FloatTensor(test_labels).to(device)
        ) ]
                  
        # Computing mAP
        metric = MeanAveragePrecision(box_format='xywh')
        metric.update(pred, target)
    maps = metric.compute()
    
    out_dict['train_loss'].append(np.mean(train_loss))
    out_dict['train_acc'].append(train_correct/train_len)
    out_dict['test_map'].append(float(maps["map"].detach().cpu()))
    out_dict['test_map50'].append(float(maps["map_50"].detach().cpu()))
    out_dict['test_map_small'].append(float(maps["map_small"].detach().cpu()))
    out_dict['test_map_large'].append(float(maps["map_large"].detach().cpu()))
    
    print(f'MAP: {float(maps["map"].detach().cpu()):.3f}          MAP@50: {float(maps["map_50"].detach().cpu()):.3f}          MAP_small: {float(maps["map_small"].detach().cpu()):.3f}          MAP_large: {float(maps["map_large"].detach().cpu()):.3f}')
    

EPOCH 0/3


  0%|          | 0/57 [00:00<?, ?it/s]

  data, target = torch.stack(cropped_images_all).to(device), torch.FloatTensor(predictions_all).to(device)


train_loss: 0.28786        Accuracy 0.893
train_loss: 0.34695        Accuracy 0.857
train_loss: 0.48315        Accuracy 0.786
train_loss: 0.42447        Accuracy 0.821
train_loss: 0.37799        Accuracy 0.857
train_loss: 0.32364        Accuracy 0.860
train_loss: 0.36827        Accuracy 0.867
train_loss: 0.28735        Accuracy 0.907
train_loss: 0.29295        Accuracy 0.906
train_loss: 0.20849        Accuracy 0.923
train_loss: 0.24810        Accuracy 0.927
train_loss: 0.28536        Accuracy 0.900
train_loss: 0.30670        Accuracy 0.889
train_loss: 0.26814        Accuracy 0.894
train_loss: 0.21246        Accuracy 0.933
train_loss: 0.32941        Accuracy 0.844
train_loss: 0.27817        Accuracy 0.883
train_loss: 0.30539        Accuracy 0.858
train_loss: 0.17382        Accuracy 0.951
train_loss: 0.27983        Accuracy 0.882
train_loss: 0.27821        Accuracy 0.882
train_loss: 0.29764        Accuracy 0.907
train_loss: 0.32002        Accuracy 0.859
train_loss: 0.25825        Accurac

  0%|          | 0/300 [00:00<?, ?it/s]

MAP: 0.077          MAP@50: 0.257          MAP_small: 0.000          MAP_large: -1.000
EPOCH 1/3


  0%|          | 0/57 [00:00<?, ?it/s]

train_loss: 0.13698        Accuracy 0.948
train_loss: 0.42969        Accuracy 0.826
train_loss: 0.37019        Accuracy 0.852
train_loss: 0.39819        Accuracy 0.838
train_loss: 0.36805        Accuracy 0.849
train_loss: 0.31596        Accuracy 0.872
train_loss: 0.32378        Accuracy 0.845
train_loss: 0.35034        Accuracy 0.838
train_loss: 0.32885        Accuracy 0.857
train_loss: 0.28346        Accuracy 0.890
train_loss: 0.25062        Accuracy 0.906
train_loss: 0.27295        Accuracy 0.898
train_loss: 0.17781        Accuracy 0.954
train_loss: 0.28077        Accuracy 0.848
train_loss: 0.31771        Accuracy 0.839
train_loss: 0.30871        Accuracy 0.867
train_loss: 0.20269        Accuracy 0.934
train_loss: 0.19856        Accuracy 0.931
train_loss: 0.27359        Accuracy 0.894
train_loss: 0.19687        Accuracy 0.936
train_loss: 0.27734        Accuracy 0.878
train_loss: 0.38679        Accuracy 0.856
train_loss: 0.28276        Accuracy 0.899
train_loss: 0.18782        Accurac

  0%|          | 0/300 [00:00<?, ?it/s]

MAP: 0.000          MAP@50: 0.000          MAP_small: 0.000          MAP_large: -1.000
EPOCH 2/3


  0%|          | 0/57 [00:00<?, ?it/s]

train_loss: 0.22599        Accuracy 0.901
train_loss: 0.28698        Accuracy 0.888
train_loss: 0.34447        Accuracy 0.869
train_loss: 0.22959        Accuracy 0.897
train_loss: 0.26103        Accuracy 0.913
train_loss: 0.29715        Accuracy 0.907
train_loss: 0.23706        Accuracy 0.924
train_loss: 0.30658        Accuracy 0.878
train_loss: 0.29810        Accuracy 0.844
train_loss: 0.29346        Accuracy 0.869
train_loss: 0.25389        Accuracy 0.892
train_loss: 0.34531        Accuracy 0.880
train_loss: 0.17595        Accuracy 0.955
train_loss: 0.27188        Accuracy 0.867
train_loss: 0.27874        Accuracy 0.886
train_loss: 0.26888        Accuracy 0.863
train_loss: 0.21709        Accuracy 0.931
train_loss: 0.27277        Accuracy 0.880
train_loss: 0.26280        Accuracy 0.895
train_loss: 0.24192        Accuracy 0.896
train_loss: 0.29892        Accuracy 0.849
train_loss: 0.25396        Accuracy 0.907
train_loss: 0.31803        Accuracy 0.840
train_loss: 0.21152        Accurac

  0%|          | 0/300 [00:00<?, ?it/s]

MAP: 0.077          MAP@50: 0.257          MAP_small: 0.000          MAP_large: -1.000


In [13]:
with open(f'logs/resnet34_{NUM_EPOCHS}_epochs_img_size_{EDGE_BOXES_IMG_SIZE_IN}_in_{EDGE_BOXES_IMG_SIZE_OUT}_out', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerow(out_dict.keys())
        writer.writerows(zip(*out_dict.values()))

In [None]:
# fig,ax = plt.subplots(1)
# img = test_images.detach().cpu()

# img = denormalize(img)

# plt.imshow(img.detach().cpu().permute(1,2,0))
# plt.xticks([])
# plt.yticks([])

# # Show annotations
# for i, ann in enumerate(final_bboxes.cpu().numpy()):
    
#     [x, y, x1, y1] = ann
#     if predictions_all[i]:
#         edge_col = 'green'
#     else:
#         edge_col = 'red'
#     rect = Rectangle((x,y),x1,y1,linewidth=2,edgecolor=edge_col,
#                      facecolor='none', alpha=0.7)
#     ax.add_patch(rect)
# #plt.savefig(f'imgs/output_{img_idx}.png')
# plt.show()

In [None]:
# images = [image for image, _, _ in batch]
# bboxes = [bbox for _, bbox, _ in batch]
# labels = [label for _, _, label in batch]

In [None]:
# k = 3

# cropped_images_all, proposals_all, predictions_all = edge_boxes_train([images[k]], [bboxes[k]])       
# img = images[k]##.cpu().numpy()

# #plt.imshow()

In [None]:
# def denormalize(img):
#     mean=[0.485, 0.456, 0.406]
#     std=[0.229, 0.224, 0.225]
    
#     denormalize = transforms.Normalize(mean=[-0.485, -0.456, -0.406], 
#                          std=[1/0.229, 1/0.224, 1/0.225])

#     denorm_image = denormalize(img)
#     x = ((denorm_image - denorm_image.min())/(denorm_image - denorm_image.min()).max() * 255).to(torch.int64)
#     return x