In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import os
import numpy as np
import glob
from PIL import Image, ExifTags

# pip install torchsummary
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.transforms.functional as fn
from torchvision import models
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from time import time
import pandas as pd
import random

import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

from matplotlib.patches import Polygon, Rectangle
import pylab
pylab.rcParams['figure.figsize'] = (6,6)

# Own imports 
from config import * 
from utils import *
from data_loader import TacoDataset
from eval import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
train_transform = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=30, p=0.7),
    A.RandomBrightnessContrast(p=0.2),
    A.RGBShift(10, 10, 10, p=0.3),
    A.GaussNoise(p=0.5),
    A.Normalize(), # If you want to visualize - comment this line 
    ToTensorV2(),
], bbox_params=A.BboxParams(format='pascal_voc',
                            label_fields=['labels'],
                            min_visibility=0.3, # min visibility of the original area in case of a crop
                           )
)

test_transform = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.Normalize(),
    ToTensorV2(),
], bbox_params=A.BboxParams(format='pascal_voc',
                            label_fields=['labels'],
                           )
)

In [None]:
trainset = TacoDataset( 'train', transforms=train_transform, test_size=0.2) # test_transform for no augment
valset   = TacoDataset('val', transforms=test_transform, test_size=0.2)
testset  = TacoDataset('test', transforms=test_transform, test_size=0.2)

train_loader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8,collate_fn=lambda x: x)# persistent_workers=True, pin_memory=True)
val_loader = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8,collate_fn=lambda x: x)# persistent_workers=True, pin_memory=True)
test_loader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8,collate_fn=lambda x: x)# persistent_workers=True, pin_memory=True)

In [None]:
def selective_search(img):
    """
    Takes image as an input (np.array not Tensor!)
    Returns np.array (number of bboxes x 4)
    Bboxes in format x, y, w, h (see demo notebook for example)
    """
    # create selective search segmentation object
    ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
    ss.setBaseImage(img) 
    # Choose between fast or accurate selective Search method: fast but low recall V.S. high recall but slow 
    ss.switchToSelectiveSearchFast()
    # AM: Quality takes a looong time, maybe better to try with fast for now and see the results, if bad then change to quality
    # ss.switchToSelectiveSearchQuality() 
    # run selective search
    rects = ss.process()
    print('Total Number of Region Proposals: {}'.format(len(rects))) # TODO: comment out after making the whole trainset work
    return rects

In [None]:
def transfer_model_set(model, freeze_convs=False,):
    
    if freeze_convs:
        print('Freezing Convs')
        # freeze the feature extractors
        for param in model.parameters():
            param.requires_grad = False
    
    if type(model) == models.densenet.DenseNet:
        in_features = model.classifier.in_features
    
    elif type(model) == models.resnet.ResNet:
        in_features = model.fc.in_features
    
    
    size_hidden = 512
    out_features = 1
    
    head = nn.Sequential(
                    nn.Linear(in_features, size_hidden),
                    nn.Dropout(DROP_OUT_RATE),
                    nn.ReLU(),
                    nn.BatchNorm1d(size_hidden),
                    nn.Linear(size_hidden, out_features),
                    nn.Sigmoid()        
    )
                    
    
    if type(model) == models.densenet.DenseNet:
        model.classifier = head
    
    elif type(model) == models.resnet.ResNet:
        model.fc = head

    else:
        raise Exception('Not implemented the classifier for this type of model')

    model = model.to(device)

    return model

In [None]:
model = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
model = transfer_model_set(model, freeze_convs=False)
optimizer = torch.optim.Adam(model.parameters(), LR)

In [None]:
k = 0.5 # Threshold for classification
p = 0.05 # Probability of cropping background image

def train(model, train_loader, test_loader, loss_function, optimizer, num_epochs, model_name, lr_scheduler=None, save_model=False ):
    
#     def loss_fun(output, target):
#         return F.cross_entropy(output, target)
    
    out_dict = {'train_acc': [],
                'test_acc': [],
                'train_loss': [],
                'test_loss': []}
  
    for epoch in tqdm(range(num_epochs), unit='epoch'):
        model.train()
        train_correct = 0
        train_len = 0
        train_loss = []
        for minibatch_no, batch in tqdm(enumerate(train_loader), total=len(train_loader)):
            images = [image for image, _, _ in batch]
            bboxes = [bbox for _, bbox, _ in batch]
            labels = [label for _, _, label in batch]
            # images, bboxes, labels = torch.FloatTensor(images).to(device), torch.FloatTensor(bboxes).to(device), torch.FloatTensor(labels).to(device)
            # print(images.shape)
            
            # Selective search
            proposals_all = []
            predictions_all = []
            cropped_images_all = []
            for image, img_bboxes in zip(images, bboxes):
                proposals = selective_search(image.permute([1,2,0]).numpy()) # .cpu()
                proposals_all.append(proposals)
                
                # IoU
                for proposal in proposals:
                    scores_all = []
                    for bbox in img_bboxes:
                        score = IoU(proposal, bbox)
                        scores_all.append(score)
                    
                    prediction = max(scores_all) > k
                    
                    # Extract image
                    if prediction or random.random() < p:
                        cropped_image = fn.crop(image, *proposal)
                        resized_image = fn.resize(cropped_image, size=[1024, 1024])
                        cropped_images_all.append(resized_image)
                        predictions_all.append(prediction)
                        
            # print(f"Len: {len(predictions_all)}, sum: {sum(predictions_all)}")
            
            data, target = torch.stack(cropped_images_all).to(device), torch.FloatTensor(predictions_all).to(device)
            
            
            # CNN
#             optimizer.zero_grad()
#             output = model(data)[:,0]
#             loss = loss_function(output, target)
#             loss.backward()
#             optimizer.step()
#             if lr_scheduler is not None:
#                 lr_scheduler.step()
#             train_loss.append(loss.item())
#             predicted = output > 0.5
#             train_correct += (target==predicted).sum().cpu().item()
#             train_len += data.shape[0]
            
#         test_loss = []
#         test_correct = 0
#         test_len = 0
#         model.eval()
#         for data, target in test_loader:
#             data, target = data.to(device), target.to(torch.float32).to(device)
#             with torch.no_grad():
#                 output = model(data)[:,0]
#             test_loss.append(loss_function(output, target).cpu().item())
#             predicted = output > 0.5
#             test_correct += (target==predicted).sum().cpu().item()
#             test_len += data.shape[0]

#         if save_model and epoch > 0 and test_correct/test_len > max(out_dict['test_acc']):
#             torch.save(model, 'models/' + model_name)
            
            
#         out_dict['train_acc'].append(train_correct/train_len)
#         out_dict['test_acc'].append(test_correct/test_len)
#         out_dict['train_loss'].append(np.mean(train_loss))
#         out_dict['test_loss'].append(np.mean(test_loss))

        
#         print(f"Loss train: {np.mean(train_loss):.3f}\t test: {np.mean(test_loss):.3f}\t",
#               f"Accuracy train: {out_dict['train_acc'][-1]*100:.1f}%\t test: {out_dict['test_acc'][-1]*100:.1f}%")
        
    return out_dict

In [None]:
out_dict = train(model,
                          train_loader,
                          test_loader,
                          LOSS_FN,
                          optimizer,
                          EPOCHS, 
                          save_model=True, 
                          model_name='densenet121_full_Adam')

In [None]:
for it, data in tqdm(enumerate(train_loader), total=len(train_loader)):
    batch = data
    break

In [None]:
images = [image for image, _, _ in batch]

In [None]:
torch.stack(images).shape