# Fast R-CNN

To make R-CNN faster, Girshick (2015) improved the training procedure by unifying three independent models into one jointly trained framework and increasing shared computation results, named Fast R-CNN. Instead of extracting CNN feature vectors independently for each region proposal, this model aggregates them into one CNN forward pass over the entire image and the region proposals share this feature matrix. Then the same feature matrix is branched out to be used for learning the object classifier and the bounding-box regressor. In conclusion, computation sharing speeds up R-CNN.

In [2]:

#First, pre-train a convolutional neural network on image classification tasks.
#Propose regions by selective search (~2k candidates per image).

In [3]:
import imgaug as ia
import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage

In [5]:
import os 
import glob
import pandas as pd
from collections import Counter 
import cv2
import numpy as np

from sklearn.model_selection import train_test_split


import torch

import torch.nn as nn
from torch.utils.data import Dataset,DataLoader

from torchsummary import summary


from torchvision import models
from torchvision import transforms as T

import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.patches as patches

from dataload import xml_to_csv,PetData,Sub_region_train,Sub_region

from tqdm import tqdm
from utills import ssearch,misc
from utills.misc import create_label,balance_df

In [6]:
##Loading data
#root_path="D:/Dataset/Pet_Data/"
root_path="D:/Dataset/Pet_Data/"
#root_path="D:/Dataset/Oxford"

img_path=os.path.join(root_path,"images")
annotation_path=os.path.join(root_path,"annotations/xmls")           
annots = glob.glob(annotation_path+"/*.xml")
seed=0
df=xml_to_csv(annots,img_path)
df.head()

## Make Balanced Dataset (To save time, but don't do this in real research!!)
g = df.groupby('target')
balanced_df = pd.DataFrame(g.apply(lambda x: x.sample(g.size().min(),random_state=seed).reset_index(drop=True),))  
## 
train, valid = train_test_split(balanced_df, test_size=0.3,random_state=seed)  

BATCH_SIZE = 1




## Updated Dataset Class (Added Selective Search "inside" original class)

In [11]:
class PetData(Dataset):
    def __init__(self, dataframe,train=False,ssearch=False):
        self.df=dataframe
        self.ssearch=ssearch
        self.transform=iaa.Sequential([iaa.Resize((512,512))])
        self.torch_transform=T.Compose([T.ToTensor(),
                                        T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])                         
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        regions=None
        fn,target,xmin,ymin,xmax,ymax=self.df.iloc[idx] #
        im=cv2.cvtColor(cv2.imread(fn),cv2.COLOR_BGR2RGB) ##Load Img
        
        class_label=target  ##Class
        bbs=BoundingBoxesOnImage([BoundingBox(xmin,ymin,xmax,ymax,label=class_label)], shape=im.shape) #BBox
        image_aug, bbs_aug = self.transform(image=im, bounding_boxes=bbs) #Transformation
        
                                        
        if self.ssearch:                                
            regions=ssearch.selective_search(image_aug, scale=50, sigma=0.8, min_size=30)
            regions=pd.DataFrame.from_dict(regions)
            regions=regions.drop_duplicates(subset=['rect'])

        return self.torch_transform(image_aug), torch.stack([torch.tensor([bb.x1,bb.y1,bb.x2,bb.y2,bb.label]) for bb in bbs_aug]),regions

In [24]:
train_ds = PetData(train, ssearch=True)
valid_ds= PetData(valid, ssearch=True)


def collate_fn(batch):
    return zip(*batch)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=BATCH_SIZE
                                       , collate_fn=collate_fn,shuffle=False)
valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=BATCH_SIZE
                                       , collate_fn=collate_fn)

In [104]:
sample_ds=train_ds[0]

## <center>    First, pre-train a convolutional neural network on image classification tasks. </center>


In [32]:
model = models.vgg16(pretrained=True)
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
                              
print(f'Num of parameters: {count_parameters(model)}')


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\HyunSeung/.cache\torch\checkpoints\vgg16-397923af.pth
100%|███████████████████████████████████████████████████████████████████████████████| 528M/528M [00:09<00:00, 60.9MB/s]


Num of parameters: 138357544


## <center>    We replace Last Maxpooling with ROI pooling. </center>


In [152]:
model= model.features[0:-1]


In [172]:
class roi_pooling(nn.Module):
    def __init__(self,ft_shape=32,img_shape=512):
        '''
        '''
        super(roi_pooling, self).__init__()
    
    
        #Layers
        self.img_shape=img_shape
        self.ft_shape=ft_shape
        self.adaptivepool=nn.AdaptiveMaxPool2d(size[0], size[1])

    def forward(self,ft,region_box):
        
        scaled_bbox=[0]*4
        scaled_bbox[0:4]=region_box[0:4]/self.img_shape
        scaled_bbox=[int(x*self.ft_shape) for x in scaled_bbox]
        roi=ft[:,:,scaled_bbox[1]:scaled_bbox[3],scaled_bbox[0]:scaled_bbox[2]]
        roi=self.adaptivepool(roi)[0]
        return roi
    

In [None]:
class Fast_RCNN(nn.Module):
    def __init__(self,ft_shape=32,img_shape=512):
        '''
        '''
        super(Fast_RCNN, self).__init__()
    
    
        #Layers
        self.ft_net=models.vgg16(pretrained=True).features[0:-1]
        self.roi=roi_pooling()



    def forward(self,imgs,regions):
        
        scaled_bbox=[0]*4
        scaled_bbox[0:4]=region_box[0:4]/self.img_shape
        scaled_bbox=[int(x*self.ft_shape) for x in scaled_bbox]
        roi=ft[:,:,scaled_bbox[1]:scaled_bbox[3],scaled_bbox[0]:scaled_bbox[2]]
        roi=self.adaptivepool(roi)[0]
        return roi
    

In [168]:
roi_pool=roi_pooling()

In [171]:
roi_pool(fts,bbox).shape

torch.Size([1, 512, 7, 7])

In [166]:
roi_pool(fts,bbox)[1]

tensor([[[[  0,   1,   2,  ...,   5,   6,   7],
          [  9,  10,  11,  ...,  32,  15,  16],
          [ 27,  28,  29,  ...,  32,  33,  34],
          ...,
          [ 73,  74,  74,  ...,  68,  69,  70],
          [ 81,  82,  83,  ...,  86,  87,  88],
          [ 99, 100, 101,  ..., 104, 105, 106]],

         [[  0,   1,   2,  ...,   5,   6,   7],
          [  9,  10,  11,  ...,  14,  15,  16],
          [ 27,  28,  29,  ...,  32,  33,  34],
          ...,
          [ 82,  82,  65,  ...,  68,  69,  70],
          [ 99,  82,  83,  ...,  86,  87,  88],
          [ 99, 100, 101,  ..., 104, 105, 106]],

         [[  0,   1,   2,  ...,   5,  16,  17],
          [  9,  10,  11,  ...,  14,  16,  26],
          [ 27,  28,  29,  ...,  32,  33,  53],
          ...,
          [ 63,  64,  65,  ...,  68,  69,  70],
          [ 81,  82,  83,  ..., 105, 106, 106],
          [ 99, 100, 101,  ..., 114, 114, 106]],

         ...,

         [[  0,   1,  12,  ...,   5,   6,   7],
          [  9,  10,  

In [139]:
fts=model.features[0:-1](sample_ds[0].unsqueeze(0))

In [81]:
sample_ds[1]

tensor([[201.7280, 117.2048, 339.9680, 333.1084,   0.0000]])

In [130]:
bboxs=sample_ds[1]
for bbox in bboxs:
    print(bbox)
    #bbox

tensor([201.7280, 117.2048, 339.9680, 333.1084,   0.0000])


In [131]:
scaled_bbox=[0]*4
img_shape=sample_ds[0].shape[1]
scaled_bbox[0:4]=bbox[0:4]/img_shape
scaled_bbox=[int(x*32) for x in scaled_bbox]


In [133]:
scaled_bbox[0]

12

In [143]:
roi=fts[:,:,scaled_bbox[1]:scaled_bbox[3],scaled_bbox[0]:scaled_bbox[2]]

In [148]:
size = (7, 7)
adaptive_max_pool = nn.AdaptiveMaxPool2d(size[0], size[1])

In [149]:
adaptive_max_pool

AdaptiveMaxPool2d(output_size=7)

In [150]:
adaptive_max_pool(roi)[0].shape

torch.Size([1, 512, 7, 7])

In [151]:
adaptive_max_pool(roi)[1].shape

torch.Size([1, 512, 7, 7])