In [1]:
%load_ext autoreload
%autoreload 2    
%matplotlib inline

In [12]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import os
import numpy as np
import glob
from PIL import Image, ExifTags

# pip install torchsummary
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.transforms.functional as fn
from torchvision import models
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from time import time
import pandas as pd
import random

import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

from matplotlib.patches import Polygon, Rectangle
import pylab
pylab.rcParams['figure.figsize'] = (6,6)

# Own imports 
from config import * 
from utils import *
from data_loader import TacoDataset
from eval import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [22]:
img_size = 512

train_transform = A.Compose([
    A.Resize(img_size, img_size),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=30, p=0.7),
    A.RandomBrightnessContrast(p=0.2),
    A.RGBShift(10, 10, 10, p=0.3),
    A.GaussNoise(p=0.5),
    A.Normalize(), # If you want to visualize - comment this line 
    ToTensorV2(),
], bbox_params=A.BboxParams(format='pascal_voc',
                            label_fields=['labels'],
                            min_visibility=0.3, # min visibility of the original area in case of a crop
                           )
)

test_transform = A.Compose([
    A.Resize(img_size, img_size),
    A.Normalize(),
    ToTensorV2(),
], bbox_params=A.BboxParams(format='pascal_voc',
                            label_fields=['labels'],
                           )
)

In [64]:
BATCH_SIZE = 16

trainset = TacoDataset( 'train', transforms=train_transform, test_size=0.2) # test_transform for no augment
valset   = TacoDataset('val', transforms=test_transform, test_size=0.2)
testset  = TacoDataset('test', transforms=test_transform, test_size=0.2)

train_loader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8, collate_fn=lambda x: x)# persistent_workers=True, pin_memory=True)
val_loader = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8, collate_fn=lambda x: x)# persistent_workers=True, pin_memory=True)
test_loader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8, collate_fn=lambda x: x)# persistent_workers=True, pin_memory=True)

In [65]:
def selective_search(img):
    """
    Takes image as an input (np.array not Tensor!)
    Returns np.array (number of bboxes x 4)
    Bboxes in format x, y, w, h (see demo notebook for example)
    """
    # create selective search segmentation object
    ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
    ss.setBaseImage(img) 
    # Choose between fast or accurate selective Search method: fast but low recall V.S. high recall but slow 
    ss.switchToSelectiveSearchFast()
    # AM: Quality takes a looong time, maybe better to try with fast for now and see the results, if bad then change to quality
    # ss.switchToSelectiveSearchQuality() 
    # run selective search
    rects = ss.process()

    # get rectangles to x1, y1, x2, y2 as this is format in IoU 
    rects[:, 2] = rects[:, 0] + rects[:, 2]
    rects[:, 3] = rects[:, 1] + rects[:, 3]

    rects = rects[:250, :]
    
    print('Total Number of Region Proposals: {}'.format(len(rects))) # TODO: comment out after making the whole trainset work
    return rects

In [66]:
def transfer_model_set(model, freeze_convs=False,):
    
    if freeze_convs:
        print('Freezing Convs')
        # freeze the feature extractors
        for param in model.parameters():
            param.requires_grad = False
    
    if type(model) == models.densenet.DenseNet:
        in_features = model.classifier.in_features
    
    elif type(model) == models.resnet.ResNet:
        in_features = model.fc.in_features
    
    
    size_hidden = 512
    out_features = 1
    
    head = nn.Sequential(
                    nn.Linear(in_features, size_hidden),
                    nn.Dropout(DROP_OUT_RATE),
                    nn.ReLU(),
                    nn.BatchNorm1d(size_hidden),
                    nn.Linear(size_hidden, out_features),
                    nn.Sigmoid()        
    )
                    
    
    if type(model) == models.densenet.DenseNet:
        model.classifier = head
    
    elif type(model) == models.resnet.ResNet:
        model.fc = head

    else:
        raise Exception('Not implemented the classifier for this type of model')

    model = model.to(device)

    return model

In [67]:
HEAD_LEARNING_RATE = 0.001
NUM_EPOCHS = 5
loss = nn.BCELoss()
DROP_OUT_RATE = 0.5

model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model = transfer_model_set(model, freeze_convs=True)
optimizer = torch.optim.Adam(model.parameters(), HEAD_LEARNING_RATE)

Freezing Convs


In [68]:
import copy

In [69]:
k = 0.5 # Threshold for classification
p = 0.05 # Probability of cropping background image
num_epochs = NUM_EPOCHS

# def train(model, train_loader, test_loader, loss_function, optimizer, num_epochs, model_name, lr_scheduler=None, save_model=False ):

#     def loss_fun(output, target):
#         return F.cross_entropy(output, target)

out_dict = {'train_acc': [],
            'test_acc': [],
            'train_loss': [],
            'test_loss': []}

for epoch in tqdm(range(num_epochs), unit='epoch'):
    model.train()
    train_correct = 0
    train_len = 0
    train_loss = []
    for minibatch_no, batch in tqdm(enumerate(train_loader), total=len(train_loader)):
        images = [image for image, _, _ in batch]
        bboxes = [bbox for _, bbox, _ in batch]
        labels = [label for _, _, label in batch]
        # images, bboxes, labels = torch.FloatTensor(images).to(device), torch.FloatTensor(bboxes).to(device), torch.FloatTensor(labels).to(device)
        # print(images.shape)
        
        # Selective search
        proposals_all = []
        predictions_all = []
        cropped_images_all = []
        for image, img_bboxes in zip(images, bboxes):
            proposals = selective_search(image.permute([1,2,0]).numpy()) # .cpu()
            proposals_all.append(proposals)
            
            # IoU
            print('IoU processing')
            for proposal in proposals:

                proposal_wh = copy.copy(proposal)
                proposal_wh[2] = proposal_wh[2] - proposal_wh[0]
                proposal_wh[3] = proposal_wh[3] - proposal_wh[1]
                
                scores_all = []
                for bbox in img_bboxes:
                    score = IoU(proposal, bbox)
                    scores_all.append(score)
                
                prediction = max(scores_all) > k
                print(scores_all)
                # Extract image
                if prediction or random.random() < p:
                    cropped_image = fn.crop(image, *proposal_wh)
                    resized_image = fn.resize(cropped_image, size=[300, 300]) # pretrained size
                    cropped_images_all.append(resized_image)
                    predictions_all.append(prediction)
        # print(f"Len: {len(predictions_all)}, sum: {sum(predictions_all)}")
        
        data, target = torch.stack(cropped_images_all).to(device), torch.FloatTensor(predictions_all).to(device)
        print('stacked images')
        # CNN
#             optimizer.zero_grad()
#             output = model(data)[:,0]
#             loss = loss_function(output, target)
#             loss.backward()
#             optimizer.step()
#             if lr_scheduler is not None:
#                 lr_scheduler.step()
#             train_loss.append(loss.item())
#             predicted = output > 0.5
#             train_correct += (target==predicted).sum().cpu().item()
#             train_len += data.shape[0]
        
#         test_loss = []
#         test_correct = 0
#         test_len = 0
#         model.eval()
#         for data, target in test_loader:
#             data, target = data.to(device), target.to(torch.float32).to(device)
#             with torch.no_grad():
#                 output = model(data)[:,0]
#             test_loss.append(loss_function(output, target).cpu().item())
#             predicted = output > 0.5
#             test_correct += (target==predicted).sum().cpu().item()
#             test_len += data.shape[0]

#         if save_model and epoch > 0 and test_correct/test_len > max(out_dict['test_acc']):
#             torch.save(model, 'models/' + model_name)
        
        
#         out_dict['train_acc'].append(train_correct/train_len)
#         out_dict['test_acc'].append(test_correct/test_len)
#         out_dict['train_loss'].append(np.mean(train_loss))
#         out_dict['test_loss'].append(np.mean(test_loss))

    
#         print(f"Loss train: {np.mean(train_loss):.3f}\t test: {np.mean(test_loss):.3f}\t",
#               f"Accuracy train: {out_dict['train_acc'][-1]*100:.1f}%\t test: {out_dict['test_acc'][-1]*100:.1f}%")

# return out_dict

  0%|          | 0/5 [00:00<?, ?epoch/s]

  0%|          | 0/57 [00:00<?, ?it/s]

Total Number of Region Proposals: 250
IoU processing
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.09074793902215067]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.006745202167354463]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.023071726674227955]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.030931729610462307]
[0.13846094160060082]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0322806376938127]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.14106183554199223]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.07468824305279792]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.44122273800424977]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fbb54dcf760>
Traceback (most recent call last):
  File "/zhome/8d/e/198218/dlincv/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/zhome/8d/e/198218/dlincv/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1442, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/appl/python/3.10.11/lib/python3.10/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/appl/python/3.10.11/lib/python3.10/multiprocessing/popen_fork.py", line 40, in wait
    if not wait([self.sentinel], timeout):
  File "/appl/python/3.10.11/lib/python3.10/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File "/appl/python/3.10.11/lib/python3.10/selectors.py", line 416, in select
    fd_event_list = self._selector.poll(timeout)
KeyboardInterrupt: 


KeyboardInterrupt: 

In [70]:
proposal

array([ 64, 498,  74, 512], dtype=int32)

In [71]:
proposal_wh

array([ 64, 498,  10,  14], dtype=int32)

In [50]:
scores_all

[0.0, 0.0]

In [49]:
img_bboxes

[(146.86839320457761,
  15.894666336524068,
  268.1166215125553,
  77.78430964029158),
 (144.1386701465699, 224.5796505222911, 422.1630538010227, 448.076537534678)]

In [48]:
proposal

array([398, 448, 102,  64], dtype=int32)

In [46]:
data.shape

torch.Size([20, 3, 300, 300])

In [47]:
target

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       device='cuda:0')

In [None]:
# out_dict = train(model,
#                           train_loader,
#                           test_loader,
#                           loss,
#                           optimizer,
#                           NUM_EPOCHS, 
#                           save_model=True, 
#                           model_name='densenet121_full_Adam')

In [9]:
for it, data in tqdm(enumerate(train_loader), total=len(train_loader)):
    batch = data
    break

  0%|          | 0/450 [00:00<?, ?it/s]

In [10]:
images = [image for image, _, _ in batch]

In [13]:
torch.stack(images).shape

torch.Size([2, 3, 1024, 1024])