In [1]:
# Download dataset from https://www.kaggle.com/datasets/debasisdotcom/racoon-detection
import pandas as pd
train = pd.read_csv("racoons_train.csv")
train.shape

(173, 8)

In [None]:
train.info(verbose=True)

In [3]:
train.head(5)

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,raccoon-17.jpg,259,194,raccoon,95,60,167,118
1,raccoon-11.jpg,660,432,raccoon,3,1,461,431
2,raccoon-63.jpg,600,400,raccoon,74,107,280,290
3,raccoon-63.jpg,600,400,raccoon,227,93,403,298
4,raccoon-60.jpg,273,185,raccoon,58,33,197,127


In [None]:
IMAGE_SIZE = 128
coords=train[["width","height","xmin","ymin","xmax","ymax"]]
coords = coords.assign(xmin = coords.xmin.mul(IMAGE_SIZE/coords["width"]))
coords = coords.assign(xmax = coords.xmax.mul(IMAGE_SIZE/coords["width"]))
coords = coords.assign(ymin = coords.ymin.mul(IMAGE_SIZE/coords["height"]))
coords = coords.assign(ymax = coords.ymax.mul(IMAGE_SIZE/coords["height"]))

coords.drop(["width","height"],axis =1,inplace=True)
coords.head()

In [None]:
paths = train["filename"]
len(paths)

In [6]:
import numpy as np
from PIL import Image
from keras.applications.mobilenet import MobileNet, preprocess_input

images = "Racoon Images/images/"
batch_images = np.zeros((len(paths), IMAGE_SIZE, IMAGE_SIZE,3), dtype=np.float32)

for i, f in enumerate(paths):
  #print(f)
    img = Image.open(images+f)
    img = img.resize((IMAGE_SIZE, IMAGE_SIZE))
    img = img.convert('RGB')
    batch_images[i] = preprocess_input(np.array(img, dtype=np.float32))

In [7]:
import tensorflow as tf
from keras import Model
from keras.applications.mobilenet import MobileNet, preprocess_input
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback
from keras.layers import Conv2D, Reshape
from keras.utils import Sequence
from keras.backend import epsilon

In [8]:
ALPHA = 1.0
model = MobileNet(input_shape=(IMAGE_SIZE,IMAGE_SIZE,3), include_top=False, alpha=ALPHA)

for layers in model.layers:
  layers.trainable = False

x = model.layers[-1].output                         # remove output layer of default MobileNet model
x = Conv2D(4, kernel_size = 4, name="coords")(x)
x = Reshape((4,))(x)

model = Model(inputs = model.inputs, outputs = x)

In [9]:
# Define customized loss function to minimize: Intersection over Union
# IOU = Area of intersection of boxes / Area of union of boxes
# The aim of this model would be to keep improving until the blue box (prediction) and the
# green box (target) perfectly overlap i.e the IOU between the two boxes becomes equal to 1.

def loss(gt,pred):
    
    intersections = 0
    unions = 0
    
    diff_width = np.minimum(gt[:,0] + gt[:,2], pred[:,0] + pred[:,2]) - np.maximum(gt[:,0], pred[:,0])
    diff_height = np.minimum(gt[:,1] + gt[:,3], pred[:,1] + pred[:,3]) - np.maximum(gt[:,1], pred[:,1])
    intersection = diff_width * diff_height
    
    # Compute union
    area_gt = gt[:,2] * gt[:,3]
    area_pred = pred[:,2] * pred[:,3]
    union = area_gt + area_pred - intersection

    # Compute intersection and union over multiple boxes
    for j, _ in enumerate(union):
        if union[j] > 0 and intersection[j] > 0 and union[j] >= intersection[j]:
            intersections += intersection[j]
            unions += union[j]

    # Compute IOU. Use epsilon to prevent division by zero
    iou = np.round(intersections / (unions + epsilon()), 4)
    iou = iou.astype(np.float32)
    return iou

def IoU(y_true, y_pred):
    iou = tf.py_function(loss, [y_true, y_pred], tf.float32)
    return iou

In [10]:
gt = coords
PATIENCE=10

model.compile(optimizer = "Adam", loss = "mse", metrics = [IoU])

stop = EarlyStopping(monitor='IoU', patience=PATIENCE, mode="max" )

reduce_lr = ReduceLROnPlateau(monitor='IoU',factor=0.2,patience=PATIENCE, min_lr=1e-7, verbose=1, mode="max" )

model.fit(batch_images, gt, epochs=100,callbacks=[stop,reduce_lr], verbose = 2) 

6/6 - 2s - loss: 4079.5444 - IoU: 0.0856 - lr: 0.0010 - 2s/epoch - 290ms/step


<keras.callbacks.History at 0x2aed6335fd0>