In [1]:
# lib to read files and do math operations
import pandas as pd
import numpy as np

# lib for visualization purpose
import matplotlib.pyplot as plt
import seaborn as sns

# lib for image processing 
import cv2

# lib for NN and CNN
import tensorflow as tf
import tensorflow.keras as keras
from keras.layers import Dense, Conv2D, MaxPool2D, Input
from tensorflow.keras.models import Model

# lib for system and file saving
import os
import pathlib
import joblib

# lib for ML
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

# lib for evaluation
from sklearn.metrics import classification_report, accuracy_score, precision_score, confusion_matrix

# lib for pretrained models
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.applications import InceptionV3




### RCNN (Region-based Convolutional Neural Network):

It is a classic object detection framework that localizes objects in an image and classifies them into different categories. The RCNN pipeline consists of several key components:

-> Region Proposal: Generate region proposals using selective search or another region proposal method. These proposals represent potential object locations in the image.

-> Feature Extraction: Extract features from each region proposal using a pre-trained CNN (Convolutional Neural Network). Typically, the CNN is trained on a large dataset like ImageNet for generic feature extraction.

-> Classification: Classify each region proposal into different object categories using a classifier. This step involves training a classifier (e.g., SVM, Softmax classifier) on top of the extracted features.

-> Bounding Box Regression: Refine the bounding box coordinates of each region proposal to better fit the object using regression techniques.

#### Steps For RCNN:

1. Choose a Pre-trained CNN: Select a pre-trained CNN (e.g., VGG, ResNet, or MobileNet) to extract features from region proposals. You can use models pretrained on large datasets like ImageNet.

2. Generate Region Proposals: Use a region proposal method (e.g., selective search) to generate region proposals in the image. These proposals represent potential object locations.

3. Extract Features: For each region proposal, extract features using the chosen CNN. Pass each region through the CNN and extract features from one of the intermediate layers.

4. Classification and Bounding Box Regression: For each extracted feature vector, pass it through a classifier to classify the object category. Additionally, use a regression model to refine the bounding box coordinates of each region proposal.

5. Non-maximum Suppression: Apply non-maximum suppression to remove redundant and overlapping bounding boxes.

6. Post-processing: Optionally, you can perform post-processing steps such as filtering out low-confidence detections or applying a threshold to the confidence scores.

7. Visualize Results: Draw bounding boxes around detected objects and display the classification results

In [2]:

def get_annotations(dir_path):
    # folder path where we have our annotations
    annot_path= dir_path+'\\'+'_annotations.csv'

    # check the directory
    print(os.listdir(dir_path)[0:5])


    # load the annotations
    annotations= pd.read_csv(annot_path)
    annotations.head()
    
    return annotations

In [3]:
# extract and map images name with their respective classes using dict

def get_classes_labels_dict(annot, class_col, img_col):
    class_dict= {}
    label_dict= {}

    for idx, i in enumerate(annot[class_col].unique()):
        class_dict[i]= annot[annot[class_col]==i][img_col]
        label_dict[i]= idx
        
    return class_dict, label_dict

#### Why did we use block5_pool of VGG16 as output?

The layers of a CNN capture increasingly abstract features as you move deeper into the network. Layers closer to the input capture low-level features like edges and textures, while layers deeper in the network capture higher-level features like object parts and whole objects. The "block5_pool" layer, being one of the deepest layers in VGG16, provides a good balance between high-level semantic features and spatial resolution.

This can be beneficial for tasks like object detection, where spatial information about object locations is important for accurate localization.

Computational Efficiency

In [4]:
def get_pretrained_model(pretrain_m):
    
    if pretrain_m == 'VGG16':
        
        # load VGG16   -------- Note if shape not deined, it byu default takes (224,224,3) shape
        base_model= VGG16(weights= 'imagenet', include_top= False)
        model= Model(inputs= base_model.input, outputs= base_model.get_layer('block5_pool').output)
        
        return model

In [5]:
# function to calulate roi manually using patches

def get_roi_manual(patch, actual_bb): # [xmin, xmax, ymin, ymax]
    
    I_xmin, I_ymin= max(patch[0], actual_bb[0]), max(patch[2], actual_bb[2])
    I_xmax, I_ymax= min(patch[1], actual_bb[1]), min(patch[3], actual_bb[3])
    
    # check if patch and actual_bb are not overlapping on eachother
    if I_xmax > I_xmin or I_ymax > I_ymin:
        
        inter_area= 0
    else:   
        # calculate intersection area = inter_l * inter_b
        inter_area= np.abs((I_xmax - I_xmin) * (I_ymax - I_ymin))  # l*b

        # calculate area of the patch and actual_bb = l*b
        area_patch= (patch[1] - patch[0]) * (patch[3] - patch[2])
        area_actual_bb= (actual_bb[1] - actual_bb[0]) * (actual_bb[3] - actual_bb[2])
    
        # calculate area of union = A(patch) + A(actual_bb) - inter_area
        union_area= (area_patch + area_actual_bb) - inter_area
        
        # calculate iou= inter_area/union_are
        iou= inter_area/union_area
        
        
    return iou

In [15]:
def get_valid_proposal(rects, actual_bb, min_iou= 0.5 ):
    
    for region in rects:
        print(region)
        print(actual_bb)
        # calculate iou with actual_bb
        iou= get_roi_manual(region, actual_bb)
        
        if any(iou >= min_iou):
            return region

In [7]:
# function to create region of propsal

def generate_reg_of_proposal(image):
#     print(image)
    # Create a Selective Search segmentation object
    ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()

    # Set input image for Selective Search
    ss.setBaseImage(image)

    # Switch to fast mode (optional, but recommended)
    ss.switchToSelectiveSearchFast()

    # Perform selective search to generate region proposals
    rects = ss.process()

    # Convert rects to list of tuples (x, y, w, h)
    region_proposals = [(int(x), int(y), int(x + w), int(y + h)) for (x, y, w, h) in rects]

    return  region_proposals                        
    

In [8]:
# !pip install opencv-contrib-python

In [9]:
def get_train_test(X, y, split_size):
    
    # transform list to numpy array
    X= np.array(X)
    y= np.array(y)
    
    # split the data in train and test
    X_train, X_test, y_train, y_test= train_test_split(X, y, test_size= split_size, random_state= 0)
    
    
    return (X_train, y_train), (X_test, y_test)

### Training Code

In [10]:
# set dir_path for train folder and annotations
dir_path= r"D:\Datasets\Aquarium Combined.v2-raw-1024.tensorflow\train"

# get the annotations or labels csv
annot= get_annotations(dir_path)

# create dic for images and labels
class_dict, labels_dict= get_classes_labels_dict(annot, 'class', 'filename')


['IMG_2274_jpeg_jpg.rf.2f319e949748145fb22dcb52bb325a0c.jpg', 'IMG_2275_jpeg_jpg.rf.66355520a49ba7fb7082052f7ca6fee0.jpg', 'IMG_2276_jpeg_jpg.rf.7411b1902c81bad8cdefd2cc4eb3a97b.jpg', 'IMG_2280_jpeg_jpg.rf.5abcce5be523f6507bbaf731dd671226.jpg', 'IMG_2282_jpeg_jpg.rf.510f3bc14c3e0aa378b192199d01cae6.jpg']


In [11]:
annot.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,IMG_2541_jpeg_jpg.rf.fc997b87790e715d47ce1cc83...,768,1024,starfish,302,410,534,730
1,IMG_8406_jpg.rf.fda4b68f345bda8047e7f15060f70e...,1024,768,shark,106,442,175,640
2,IMG_8406_jpg.rf.fda4b68f345bda8047e7f15060f70e...,1024,768,fish,638,490,678,672
3,IMG_8406_jpg.rf.fda4b68f345bda8047e7f15060f70e...,1024,768,fish,625,107,765,187
4,IMG_8406_jpg.rf.fda4b68f345bda8047e7f15060f70e...,1024,768,fish,818,419,830,499


In [16]:
# load pre-trained model
model = get_pretrained_model('VGG16')

# load classifier (SVM or Softmax)
classifier= SVC()

# extract features and labels
features_train= []
labels_train= []

for i, row in annot.iterrows():
    
#     print(row)
    actual_bb= [row.xmin, row.xmax, row.ymin, row.ymax]
    image= cv2.imread(dir_path+ '\\' +row['filename'])

    if image is None:
        print('wrong path !!', dir_path, image)

    else:

        regions= generate_reg_of_proposal(image)

        for region in regions:

            # run the region through iou to get the only those regions which has 
            # high iou overlap  with the actual_bb--------(extra step added by me)
            region = get_valid_proposal(region, image)

            # set the extracted coordinates 
            (x, y, w, h)= region

            # crop the image to get the region of interest
            roi= image[y:y+h, x:x+w]

            # resize the roi image to (224,224) to fit VGG16 input
            roi_resized= cv2.resize(roi, (224, 224))
            roi_preprocessed= np.expand_dims(cv2.cvtColor(roi_resized, cv2.COLOR_BGR2RGB), axis=0)

            # extract features
            features= model.predict(roi_preprocessed)
            feature_flatten= features.flatten()

            # append features and labels to the list
            features_train.append(features)
            labels_train.append(row['class'])

# Train the classifier
classifier.fit(features_train, labels_train)
        
        
        

115
[[[ 43  33  33]
  [ 47  37  37]
  [ 48  38  38]
  ...
  [101 106 115]
  [ 80  84  95]
  [ 67  71  82]]

 [[ 45  35  35]
  [ 47  37  37]
  [ 47  37  37]
  ...
  [ 83  88  97]
  [ 80  84  95]
  [ 82  86  97]]

 [[ 41  31  31]
  [ 40  30  30]
  [ 40  30  30]
  ...
  [ 67  69  80]
  [ 59  61  72]
  [ 61  63  74]]

 ...

 [[ 88  95 144]
  [104 111 160]
  [108 112 160]
  ...
  [  1   1   1]
  [  1   1   1]
  [  1   1   1]]

 [[ 85  91 138]
  [ 95 101 146]
  [ 96  99 143]
  ...
  [  1   1   1]
  [  1   1   1]
  [  1   1   1]]

 [[ 88  94 139]
  [ 92  98 141]
  [ 94  95 139]
  ...
  [  1   1   1]
  [  1   1   1]
  [  1   1   1]]]


TypeError: 'int' object is not subscriptable

In [None]:
# for row in annot.iterrows():
    
#     print(row)
#     print(row)
for i, rows in annot.iterrows():
    print(rows['filename'])

In [None]:
img= cv2.imread(r"C:\Users\varsha\Pictures\AI-Snaps\1200.jpg")

In [None]:
i = cv2.imread(r"D:\Datasets\Aquarium Combined.v2-raw-1024.tensorflow\train\IMG_2284_jpeg_jpg.rf.99de11cb5727748bd3eae3afe7b415e6.jpg")

In [None]:
# Create a Selective Search segmentation object
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()

# Set input image for Selective Search
ss.setBaseImage(i)

# Switch to fast mode (optional, but recommended)
ss.switchToSelectiveSearchFast()

# Perform selective search to generate region proposals
rects = ss.process()


In [None]:
rects

In [None]:

for region in region_proposals:
#     print(region)
    (x, y, w, h)= region