# Fast R-CNN

To make R-CNN faster, Girshick (2015) improved the training procedure by unifying three independent models into one jointly trained framework and increasing shared computation results, named Fast R-CNN. Instead of extracting CNN feature vectors independently for each region proposal, this model aggregates them into one CNN forward pass over the entire image and the region proposals share this feature matrix. Then the same feature matrix is branched out to be used for learning the object classifier and the bounding-box regressor. In conclusion, computation sharing speeds up R-CNN.

In [2]:
import imgaug as ia
import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage

In [467]:
import os 
import glob
import pandas as pd
from collections import Counter 
import cv2
import numpy as np
import random
from sklearn.model_selection import train_test_split


import torch

import torch.nn as nn
from torch.utils.data import Dataset,DataLoader

from torchsummary import summary


from torchvision import models
from torchvision import transforms as T

import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.patches as patches

from dataload import xml_to_csv,PetData,Sub_region_train,Sub_region

from tqdm import tqdm
from utills import ssearch,misc
from utills.misc import create_label,balance_df

In [4]:
##Loading data
#root_path="D:/Dataset/Pet_Data/"
root_path="D:/Dataset/Pet_Data/"
#root_path="D:/Dataset/Oxford"

img_path=os.path.join(root_path,"images")
annotation_path=os.path.join(root_path,"annotations/xmls")           
annots = glob.glob(annotation_path+"/*.xml")
seed=0
df=xml_to_csv(annots,img_path)
df.head()

## Make Balanced Dataset (To save time, but don't do this in real research!!)
g = df.groupby('target')
balanced_df = pd.DataFrame(g.apply(lambda x: x.sample(g.size().min(),random_state=seed).reset_index(drop=True),))  
## 
train, valid = train_test_split(balanced_df, test_size=0.3,random_state=seed)  

BATCH_SIZE = 1




## Updated Dataset Class (Added Selective Search "inside" original class)

In [631]:
class PetData(Dataset):
    def __init__(self, dataframe,train=False,ssearch=False,samples=16):
        self.df=dataframe
        self.ssearch=ssearch
        self.transform=iaa.Sequential([iaa.Resize((224,224))])
        self.torch_transform=T.Compose([T.ToTensor(),
                                        T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])    
        self.samples=samples
        self.train=train
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        regions=None
        fn,target,xmin,ymin,xmax,ymax=self.df.iloc[idx] #
        im=cv2.cvtColor(cv2.imread(fn),cv2.COLOR_BGR2RGB) ##Load Img
        
        class_label=target  ##Class
        bbs=BoundingBoxesOnImage([BoundingBox(xmin,ymin,xmax,ymax,label=class_label)], shape=im.shape) #BBox
        image_aug, bbs_aug = self.transform(image=im, bounding_boxes=bbs) #Transformation
        region_np=[]
        img_shape=image_aug.shape                                
        if self.ssearch:                                
            regions=ssearch.selective_search(image_aug, scale=50, sigma=0.8, min_size=20)


            regions=[dict(t) for t in {tuple(d.items()) for d in regions}]
            if self.train:
                regions=random.sample(regions, self.samples)
            
            for dicts in regions:
                region_np.append((np.array(dicts['rect'],dtype=np.float)))
                
            region_np=torch.from_numpy(np.stack(region_np))
            region_np=torch.stack([torch.clamp(region_np[:,0]-16,0,224),torch.clamp(region_np[:,1]-16,0,224),
                        torch.clamp(region_np[:,2]+16,0,224),torch.clamp(region_np[:,3]+16,0,224)],dim=1)
       #     regions=pd.DataFrame.from_dict(regions)
        #    regions=regions.drop_duplicates(subset=['rect'])

        #    regions=regions.reset_index()
           # regions=regions['rect']
         #   regions=regions.nu

        return self.torch_transform(image_aug), torch.stack([torch.tensor([bb.x1,bb.y1,bb.x2,bb.y2,bb.label]) for bb in bbs_aug]),region_np

In [632]:
train_ds = PetData(train, train=True, ssearch=True)
valid_ds= PetData(valid, train=True,ssearch=True)

BATCH_SIZE=2
def collate_fn(batch):
    return zip(*batch)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=BATCH_SIZE
                                       , collate_fn=collate_fn,shuffle=False)
valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=BATCH_SIZE
                                       , collate_fn=collate_fn)

## <center>    First, pre-train a convolutional neural network on image classification tasks. </center>


In [633]:
model = models.vgg16(pretrained=True)
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
                              
print(f'Num of parameters: {count_parameters(model)}')


Num of parameters: 138357544


In [634]:
single_batch=next(iter(train_dl))

In [635]:
img,bbx,regions=single_batch

In [636]:
len(regions[0])

16

## Fast RCNN Network Architecture


![ROI](https://i.imgur.com/yYIDM5h.png)

In [686]:
class roi_pooling(nn.Module):
    def __init__(self,ft_shape,img_shape,size):
        '''
        '''
        super(roi_pooling, self).__init__()
        self.size=size
        #Layers
        self.img_shape=img_shape
        self.ft_shape=ft_shape
        self.adaptivepool=nn.AdaptiveMaxPool2d(size[0], size[1])
    
    def forward(self,ft,rois):
        batch_size=len(rois)
        out=[]
        for i in range(batch_size):
            out_img=[]
            roi=rois[i]
            ft_img=ft[i].unsqueeze(0)

            x1=np.floor((roi[:,0]/self.img_shape)*self.ft_shape).type(torch.int32)
            y1=np.floor((roi[:,1]/self.img_shape)*self.ft_shape).type(torch.int32)
            x2=np.ceil((roi[:,2]/self.img_shape)*self.ft_shape).type(torch.int32)
            y2=np.ceil((roi[:,3]/self.img_shape)*self.ft_shape).type(torch.int32)

            for j in range(roi.shape[0]):
                ft_img_=ft_img[:,:,y1[j]:y2[j], x1[j]:x2[j]]
                ft_img_ = self.adaptivepool(ft_img_)[0]
                out_img.append(ft_img_)
            out_img=torch.cat(out_img, dim=0).unsqueeze(0)  
            out.append(out_img)
        out = torch.cat(out, dim=0)  
        return out
    

In [691]:
class Fast_RCNN(nn.Module):
    def __init__(self,ft_shape=32,img_shape=224,num_class=2):
        '''
        '''
        super(Fast_RCNN, self).__init__()
        
        self.num_class=num_class
        
        #Layers
        self.pretrained=models.vgg16(pretrained=True)
        self.ft_net=self.pretrained.features[0:-1]
        self.roi=roi_pooling(ft_shape=14,img_shape=img_shape,size=(7,7))
        self.classifier_net=self.pretrained.classifier[0:-1]
        self.cls_score = nn.Linear(4096, num_class+1)
        self.bbox = nn.Linear(4096, 4*(num_class+1))
        
                

    def forward(self,imgs,regions):
        
        fts=self.ft_net(imgs)
        o_roi=self.roi(fts,regions)
        
        batch_size=o_roi.shape[0]
        regions_num=o_roi.shape[1]
        
        o_roi=o_roi.detach()
        o_roi=o_roi.view(batch_size*regions_num,-1)
        ft=self.classifier_net(o_roi)
        
        cls_score = self.cls_score(ft).view(batch_size,regions_num,-1)
        bbox = self.bbox(ft).view(batch_size,regions_num, self.num_class+1, 4)
        return cls_score,bbox
    

In [692]:
fastRCNN=Fast_RCNN()
fastRCNN.eval()
print("n")

n


In [693]:
cls_score,bbox=fastRCNN(img_variable,regions)

In [694]:
cls_score

tensor([[[-8.7149e-02,  3.9112e-01,  2.9003e-01],
         [-2.7656e-01, -9.3987e-02, -4.3276e-01],
         [ 2.4046e-01, -3.2364e-01,  5.7712e-01],
         [ 3.9255e-02, -9.7825e-01,  2.0765e-01],
         [ 3.7966e-01, -4.8139e-01,  3.1358e-01],
         [ 2.6359e-02, -3.6416e-01,  2.1985e-01],
         [ 1.1144e-01,  2.5747e-02,  7.2403e-01],
         [-3.0191e-01,  7.5240e-01,  1.6367e-01],
         [-8.8209e-04, -7.0917e-01,  3.4709e-01],
         [ 2.2450e-02,  7.8700e-01,  1.3294e-01],
         [ 3.1192e-01, -2.9238e-01,  3.2987e-01],
         [-2.2055e-01, -9.7430e-02, -4.1331e-01],
         [ 2.6927e-01,  2.9613e-01,  6.4216e-01],
         [ 1.2704e-01,  3.6364e-01, -2.4357e-01],
         [ 2.4838e-01,  5.9044e-02, -1.0102e-03],
         [ 2.5836e-01,  2.0476e-01, -5.6644e-01]],

        [[-2.2907e-01, -2.7186e-01, -3.1579e-01],
         [ 6.3522e-02,  2.9129e-01, -2.4769e-01],
         [ 1.0205e+00,  7.1147e-02, -1.1284e-01],
         [ 1.7305e-01,  4.1358e-01, -1.7060e-01]