# Object Detection with R-CNN

I will be testing out R-CNN, Fast R-CNN, and Faster R-CNN to see how they perform differently. I have tried other methods for detecting my dog, but the results are undesirable.

First, let's start with R-CNN


## Libraries and Dependencies

In [1]:
import tensorflow as tf
import cv2
import numpy as np
from keras.preprocessing import image
import os
import json

In [2]:
# Avoid OOM errors by setting GPU Memory Consumption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus: 
    tf.config.experimental.set_memory_growth(gpu, True)

In [3]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

## Preprocessing and Selective Search

In [4]:
def selective_search(img, max_proposals = 200):
    ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
    ss.setBaseImage(img)
    ss.switchToSelectiveSearchFast()
    rects = ss.process()
    return rects[:max_proposals]

## Model Creation

In [5]:
from keras.applications.vgg16 import VGG16, preprocess_input

base = VGG16(weights = 'imagenet', include_top=False)

In [6]:
def load_data(labelDir):
    image_files = []
    annotations = []
    
    for file in os.listdir(labelDir):
        
        if file.endswith('.json'):
            json_file = os.path.join(labelDir, file)
            #load data
            with open(json_file, 'r') as f:
                data = json.load(f)
                
                #Extract image path and annotations
                image_file = os.path.join(labelDir, data['imagePath'])
                image_files.append(image_file)
                
                image_annotations = []
                
                for shape in data['shapes']:
                    if shape['shape_type'] == 'rectangle':
                        label = shape['label']
                        xmin, ymin = shape['points'][0]
                        xmax, ymax = shape['points'][1]
                        bbox = [xmin, ymin, xmax, ymax]
                        annotation = {
                            'class': label,
                            'bbox': bbox
                        }
                        image_annotations.append(annotation)
                annotations.append(image_annotations)
    return image_files, annotations

In [7]:
def iou(rect1, rect2):
    x1, y1, w1, h1 = rect1
    x2, y2, w2, h2 = rect2
    
    xmin1, ymin1, xmax1, ymax1, = x1, y1, x1+ w1, y1 + h1
    xmin2, ymin2, xmax2, ymax2, = x2, y2, x2+ w2, y2 + h2
    
    intersect_xmin = max(xmin1, xmin2)
    intersect_ymin = max(ymin1, ymin2)
    intersect_xmax = min(xmax1, xmax2)
    intersect_ymax = min(ymax1, ymax2)
    
    if intersect_xmin < intersect_xmax and intersect_ymin < intersect_ymax:
        intersect_area = (intersect_xmax - intersect_xmin) * (intersect_ymax - intersect_ymin)
    else:
        intersect_area = 0
        
    rect1_area = w1 * h1
    rect2_area = w2 * h2
    
    union_area = rect1_area + rect2_area - intersect_area
    iou_score = intersect_area / union_area
    return iou_score

In [8]:
def assign_labels(rects, annotations, iou_threshold=0.5):
    labels = []
    
    for rect in rects:
        x,y,w,h = rect
        rect_bbox = [x,y, x+w, y+h]
        
        max_iou = 0
        max_iou_label = 'background'
        
        for obj in annotation:
            obj_bbox = obj['bbox']
            obj_class = obj['class']
            
            iou_score = iou(rect_bbox, obj_bbox)
            if iou_score > max_iou:
                max_iou = iou_score
                max_iou_label = obj_class
                
        if max_iou >= iou_threshold:
            labels.append(max_iou_label)
        else:
            labels.append('background')
    return labels

In [9]:
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

def extract_features(img, rects, base_model):
    features = []
    for rect in rects:
        x, y, w, h = rect
        roi = img[y:y+h, x:x+w]
        roi = cv2.resize(roi, (224, 224))
        roi = img_to_array(roi)
        roi = np.expand_dims(roi, axis = 0)
        roi = preprocess_input(roi)
        feature = base.predict(roi)
        features.append(features)
        
    return np.array(features)

In [10]:
image_files, annotations = load_data('Data/Labels')

In [11]:
image_files = image_files[:3]
annotations = annotations[:3]

In [None]:
X_train, y_train = [], []

for img_file, annotation in zip(image_files, annotations):
    img = cv2.imread(img_file)
    
    rects = selective_search(img)
    
    labels = assign_labels(rects, annotation)
    
    features = extract_features(img, rects, base)
    
    X_train.extend(features)
    y_train.extend(labels)

X_train = np.array(X_train)
y_train = np.array(y_train)



In [None]:
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
svm = OneVsRestClassifier(LinearSVC(C=1, max_iter=1000))
svm.fit(X_train, y_train)