In [1]:
%load_ext autoreload
%autoreload 2    
%matplotlib inline

In [2]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import os
import numpy as np
import glob
from PIL import Image, ExifTags

# pip install torchsummary
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.functional as fn
from torchvision import models
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from time import time
import pandas as pd
import random
import itertools

import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

from matplotlib.patches import Polygon, Rectangle
import pylab
pylab.rcParams['figure.figsize'] = (6,6)

# Own imports 
from config import * 
from utils import *
from data_loader import TacoDataset
from eval import *

# speed-up using multithreads
cv2.setUseOptimized(True);
cv2.setNumThreads(8);

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
img_size = 512

train_transform = A.Compose([
    A.Resize(img_size, img_size),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=30, p=0.7),
    A.RandomBrightnessContrast(p=0.2),
    A.RGBShift(10, 10, 10, p=0.3),
    A.GaussNoise(p=0.5),
    A.Normalize(), # If you want to visualize - comment this line 
    ToTensorV2(),
], bbox_params=A.BboxParams(format='coco',
                            label_fields=['labels'],
                            min_visibility=0.3, # min visibility of the original area in case of a crop
                           )
)

test_transform = A.Compose([
    A.Resize(img_size, img_size),
    A.Normalize(),
    ToTensorV2(),
], bbox_params=A.BboxParams(format='coco',
                            label_fields=['labels'],
                           )
)

In [4]:
BATCH_SIZE = 16

In [5]:
trainset = TacoDataset( 'train', transforms=train_transform, test_size=0.2) # test_transform for no augment
valset   = TacoDataset('val', transforms=test_transform, test_size=0.2)
testset  = TacoDataset('test', transforms=test_transform, test_size=0.2)

train_loader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=12, collate_fn=lambda x: x)# persistent_workers=True, pin_memory=True)
val_loader = DataLoader(valset, batch_size=1, shuffle=False, num_workers=12, collate_fn=lambda x: x)# persistent_workers=True, pin_memory=True)
test_loader = DataLoader(testset, batch_size=1, shuffle=False, num_workers=12, collate_fn=lambda x: x)# persistent_workers=True, pin_memory=True)

In [6]:
loss_function = nn.BCELoss()

model = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)
model = transfer_model_set(model, freeze_convs=False)
optimizer = torch.optim.Adam(model.parameters(), HEAD_LEARNING_RATE)

lr_scheduler = None

In [7]:
NUM_EPOCHS = 3

In [None]:
for epoch in range(NUM_EPOCHS):
    model.train()
    print(f"EPOCH {epoch}/{NUM_EPOCHS}")

    train_loss = []
    train_correct = 0
    train_len = 0
    
    for minibatch_no, batch in tqdm(enumerate(train_loader), total=len(train_loader)):
        
        images = [image for image, _, _ in batch]
        bboxes = [bbox for _, bbox, _ in batch]
        labels = [label for _, _, label in batch]
        
        # Selective search
                
        # Edge boxes
        cropped_images_all, proposals_all, predictions_all = edge_boxes_train(images, bboxes)         
        if len(cropped_images_all) ==0:
            print('no boxes detected')
            continue 
        data, target = torch.stack(cropped_images_all).to(device), torch.FloatTensor(predictions_all).to(device)
        
        # CNN
        optimizer.zero_grad()
        output = model(data)[:,0]
        loss = loss_function(output, target)
        loss.backward()
        optimizer.step()

        train_loss.append(loss.item())
        predicted = output > 0.5
        correct_in_the_item = (target==predicted).sum().cpu().item()
        train_correct += correct_in_the_item
        train_len += data.shape[0]
        print(f'train_loss: {loss:.5f}        Accuracy {correct_in_the_item / len(target):.3f}')

        # for now we break

        break 
        
    # Test evaluation
    model.eval()
    for batch in val_loader: # Keep loader at 1 
        test_images, test_bboxes, test_labels = batch[0]
        
        # Selective search
        print('selecting boxes...')
        test_cropped_images_all, test_proposals_all = edge_boxes_test(test_images, test_bboxes) 
        test_data = torch.stack(test_cropped_images_all).to(device)
        
        print('predicting...')
        with torch.no_grad():
            outputs = model(test_data)[:,0]
        
        bboxes_xywh = torch.stack(test_proposals_all).to(device).to(float)
        bboxes_xyxy = copy.deepcopy(bboxes_xywh)
        bboxes_xyxy[:, 2] = bboxes_xyxy[:, 2] + bboxes_xyxy[:, 0]
        bboxes_xyxy[:, 3] = bboxes_xyxy[:, 3] + bboxes_xyxy[:, 1]

        print('non max supr')
        
        bboxes_indices = torchvision.ops.nms(bboxes_xyxy, outputs, iou_threshold=0.1)
        
        final_bboxes = bboxes_xywh[bboxes_indices]
        outputs = outputs[bboxes_indices]
        
        # Reshaping
        outputs = outputs.tolist()
        
        pred = [dict(
            boxes=torch.FloatTensor(bboxes),
            scores=torch.FloatTensor(scores),
            labels=torch.FloatTensor(np.ones(len(scores))) # Simplification for Binary
        ) for bboxes, scores in zip(final_bboxes, outputs)]
        
        target = [dict(
            boxes=torch.FloatTensor(bboxes),
            labels=torch.FloatTensor(test_target)
        ) for bboxes, label in zip(test_bboxes, test_labels)]
        
        # Computing mAP
        metric = MeanAveragePrecision()
        metric.update(pred, target)
        maps = metric.compute()
        print(f'MAP: {float(maps["map"].detach().cpu()):.3f}          MAP@50: {float(maps["map_50"].detach().cpu()):.3f}          MAP_small: {float(maps["map_small"].detach().cpu()):.3f}          MAP_large: {float(maps["map_large"].detach().cpu()):.3f}')
        

EPOCH 0/3


  0%|          | 0/57 [00:00<?, ?it/s]



In [None]:
for batch in val_loader:
        test_images, test_bboxes, test_labels = batch[0]

In [None]:
test_cropped_images_all, test_proposals_all = edge_boxes_test(test_images, test_bboxes) 


# Reshaping
#outputs = outputs.tolist()



In [None]:
bboxes_xywh = torch.stack(test_proposals_all).to(device).to(float)
bboxes_xyxy = copy.deepcopy(bboxes_xywh)
bboxes_xyxy[:, 2] = bboxes_xyxy[:, 2] + bboxes_xyxy[:, 0]
bboxes_xyxy[:, 3] = bboxes_xyxy[:, 3] + bboxes_xyxy[:, 1]

        

In [None]:
bboxes_indices = torchvision.ops.nms(bboxes_xyxy, outputs, iou_threshold=0.1)

In [None]:
final_bboxes = bboxes_xywh[bboxes_indices]
outputs = outputs[bboxes_indices]

In [None]:
final_bboxes

In [None]:
outputs = outputs[bboxes_indices]

In [None]:
fig,ax = plt.subplots(1)
img = test_images.detach().cpu()

img = denormalize(img)

plt.imshow(img.detach().cpu().permute(1,2,0))
plt.xticks([])
plt.yticks([])

# Show annotations
for i, ann in enumerate(final_bboxes.cpu().numpy()):
    
    [x, y, x1, y1] = ann
    if predictions_all[i]:
        edge_col = 'green'
    else:
        edge_col = 'red'
    rect = Rectangle((x,y),x1,y1,linewidth=2,edgecolor=edge_col,
                     facecolor='none', alpha=0.7)
    ax.add_patch(rect)
#plt.savefig(f'imgs/output_{img_idx}.png')
plt.show()

In [None]:
outputs = np.array(outputs)[predicted]
test_proposals = np.array(test_proposals_all)[predicted]


In [None]:
np.array(test_proposals_all)

In [None]:
data, target = torch.stack(cropped_images_all).to(device), torch.FloatTensor(predictions_all).to(device)

In [None]:
data.shape

In [None]:
model(data)

In [None]:
outputs = model(data)[:, 0]

In [None]:
outputs.shape

In [None]:

with torch.no_grad():
    outputs = model(test_data)[:,0]

In [None]:
predicted = output > 0.5
train_correct += (target==predicted).sum().cpu().item()
        

In [None]:
(target==predicted).sum().cpu().item()

In [None]:
import time 

In [None]:
images = [image for image, _, _ in batch]
bboxes = [bbox for _, bbox, _ in batch]
labels = [label for _, _, label in batch]

In [None]:
k = 3

cropped_images_all, proposals_all, predictions_all = edge_boxes_train([images[k]], [bboxes[k]])       
img = images[k]##.cpu().numpy()

#plt.imshow()

In [None]:
def denormalize(img):
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    
    denormalize = transforms.Normalize(mean=[-0.485, -0.456, -0.406], 
                         std=[1/0.229, 1/0.224, 1/0.225])

    denorm_image = denormalize(img)
    x = ((denorm_image - denorm_image.min())/(denorm_image - denorm_image.min()).max() * 255).to(torch.int64)
    return x

In [None]:
img = denormalize(img)

In [None]:
plt.imshow(img.permute(1,2,0))

In [None]:
fig,ax = plt.subplots(1)

plt.imshow(img.permute(1,2,0))
plt.xticks([])
plt.yticks([])

# Show annotations
for ann in bboxes[k]:
    [x, y, x1, y1] = ann
    rect = Rectangle((x,y),x1,y1,linewidth=2,edgecolor='green',
                     facecolor='none', alpha=0.7)
    ax.add_patch(rect)
#plt.savefig(f'imgs/output_{img_idx}.png')
plt.show()

In [None]:
fig,ax = plt.subplots(1)

plt.imshow(img.permute(1,2,0))
plt.xticks([])
plt.yticks([])

# Show annotations
for i, ann in enumerate(proposals_all[0]):
    
    [x, y, x1, y1] = ann
    if predictions_all[i]:
        edge_col = 'green'
    else:
        edge_col = 'red'
    rect = Rectangle((x,y),x1,y1,linewidth=2,edgecolor=edge_col,
                     facecolor='none', alpha=0.7)
    ax.add_patch(rect)
#plt.savefig(f'imgs/output_{img_idx}.png')
plt.show()

In [None]:
data, target = torch.stack(cropped_images_all).to(device), torch.FloatTensor(predictions_all).to(device)
        

In [None]:
plt.imshow(images[k].permute(1,2,0).detach().cpu().numpy()[y:y+h, x:x+w])

In [None]:
plt.imshow(fn.crop(, x, y, w, h ))

In [None]:
target

In [None]:
plt.imshow(cropped_images_all[1].detach().permute(1,2,0).cpu().numpy())

In [None]:
target

In [None]:
test_labels

In [None]:
plt.imshow(test_data[3].detach().permute(1,2,0).cpu().numpy())

In [None]:

# CNN
optimizer.zero_grad()
output = model(data)[:,0]
loss = loss_function(output, target)
loss.backward()
optimizer.step()

train_loss.append(loss.item())
predicted = output > 0.5
train_correct += (target==predicted).sum().cpu().item()
train_len += data.shape[0]
print(f'train_loss: {loss:.5f}')
# break
        

In [None]:
optimizer.zero_grad()
output = model(data)[:,0]
        