In [1]:
import pandas as pd
train = pd.read_csv("racoons_train.csv")
train.shape

(173, 8)

In [2]:
train.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173 entries, 0 to 172
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  173 non-null    object
 1   width     173 non-null    int64 
 2   height    173 non-null    int64 
 3   class     173 non-null    object
 4   xmin      173 non-null    int64 
 5   ymin      173 non-null    int64 
 6   xmax      173 non-null    int64 
 7   ymax      173 non-null    int64 
dtypes: int64(6), object(2)
memory usage: 10.9+ KB


In [3]:
train.head(5)

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,raccoon-17.jpg,259,194,raccoon,95,60,167,118
1,raccoon-11.jpg,660,432,raccoon,3,1,461,431
2,raccoon-63.jpg,600,400,raccoon,74,107,280,290
3,raccoon-63.jpg,600,400,raccoon,227,93,403,298
4,raccoon-60.jpg,273,185,raccoon,58,33,197,127


In [4]:
IMAGE_SIZE = 128
coords=train[["width","height","xmin","ymin","xmax","ymax"]]
coords = coords.assign(xmin = coords.xmin.mul(IMAGE_SIZE/coords["width"]))
coords = coords.assign(xmax = coords.xmax.mul(IMAGE_SIZE/coords["width"]))
coords = coords.assign(ymin = coords.ymin.mul(IMAGE_SIZE/coords["width"]))
coords = coords.assign(ymax = coords.ymax.mul(IMAGE_SIZE/coords["width"]))

coords.drop(["width","height"],axis =1,inplace=True)
coords.head()

Unnamed: 0,xmin,ymin,xmax,ymax
0,46.949807,29.65251,82.532819,58.316602
1,0.581818,0.193939,89.406061,83.587879
2,15.786667,22.826667,59.733333,61.866667
3,48.426667,19.84,85.973333,63.573333
4,27.194139,15.472527,92.3663,59.545788


In [5]:
paths = train["filename"]
len(paths)

173

In [6]:
import numpy as np
from PIL import Image
from keras.applications.mobilenet import MobileNet, preprocess_input

images = "Racoon Images/images/"
batch_images = np.zeros((len(paths), IMAGE_SIZE, IMAGE_SIZE,3), dtype=np.float32)

for i, f in enumerate(paths):
  #print(f)
  img = Image.open(images+f)
  img = img.resize((IMAGE_SIZE, IMAGE_SIZE))
  img = img.convert('RGB')
  batch_images[i] = preprocess_input(np.array(img, dtype=np.float32))

In [7]:
ALPHA = 1.0

model = MobileNet(input_shape=(IMAGE_SIZE,IMAGE_SIZE,3), include_top=False, alpha=ALPHA)

In [8]:
import tensorflow as tf
from keras import Model
from keras.applications.mobilenet import MobileNet, preprocess_input
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback
from keras.layers import Conv2D, Reshape
from keras.utils import Sequence
from keras.backend import epsilon

In [9]:
for layers in model.layers:
  layers.trainable = False

x = model.layers[-1].output
x = Conv2D(4, kernel_size = 4, name="coords")(x)
x = Reshape((4,))(x)

model = Model(inputs = model.inputs, outputs = x)

In [10]:
def loss(gt,pred):
    intersections = 0
    unions = 0
    diff_width = np.minimum(gt[:,0] + gt[:,2], pred[:,0] + pred[:,2]) - np.maximum(gt[:,0], pred[:,0])
    diff_height = np.minimum(gt[:,1] + gt[:,3], pred[:,1] + pred[:,3]) - np.maximum(gt[:,1], pred[:,1])
    intersection = diff_width * diff_height
    
    # Compute union
    area_gt = gt[:,2] * gt[:,3]
    area_pred = pred[:,2] * pred[:,3]
    union = area_gt + area_pred - intersection

#     Compute intersection and union over multiple boxes
    for j, _ in enumerate(union):
        if union[j] > 0 and intersection[j] > 0 and union[j] >= intersection[j]:
            intersections += intersection[j]
            unions += union[j]

    # Compute IOU. Use epsilon to prevent division by zero
    iou = np.round(intersections / (unions + epsilon()), 4)
    iou = iou.astype(np.float32)
    return iou

def IoU(y_true, y_pred):
    iou = tf.py_function(loss, [y_true, y_pred], tf.float32)
    return iou


In [None]:
gt = coords
PATIENCE=10

model.compile(optimizer = "Adam", loss = "mse", metrics = [IoU])

stop = EarlyStopping(monitor='IoU', patience=PATIENCE, mode="max" )

reduce_lr = ReduceLROnPlateau(monitor='IoU',factor=0.2,patience=PATIENCE, min_lr=1e-7, verbose=1, mode="max" )

model.fit(batch_images, gt, epochs=100,callbacks=[stop,reduce_lr], verbose = 2)

Epoch 1/100
6/6 - 2s - loss: 3325.4368 - IoU: 0.1036 - lr: 0.0010 - 2s/epoch - 298ms/step
Epoch 2/100
6/6 - 1s - loss: 664.9849 - IoU: 0.4398 - lr: 0.0010 - 930ms/epoch - 155ms/step
Epoch 3/100
6/6 - 1s - loss: 681.4551 - IoU: 0.4716 - lr: 0.0010 - 924ms/epoch - 154ms/step
Epoch 4/100
6/6 - 1s - loss: 652.9213 - IoU: 0.4792 - lr: 0.0010 - 953ms/epoch - 159ms/step
Epoch 5/100
6/6 - 1s - loss: 357.3033 - IoU: 0.5736 - lr: 0.0010 - 926ms/epoch - 154ms/step
Epoch 6/100
6/6 - 1s - loss: 292.3316 - IoU: 0.5949 - lr: 0.0010 - 902ms/epoch - 150ms/step
Epoch 7/100
6/6 - 1s - loss: 273.5420 - IoU: 0.6269 - lr: 0.0010 - 931ms/epoch - 155ms/step
Epoch 8/100
6/6 - 1s - loss: 204.5267 - IoU: 0.6764 - lr: 0.0010 - 898ms/epoch - 150ms/step
Epoch 9/100
6/6 - 1s - loss: 179.4036 - IoU: 0.6975 - lr: 0.0010 - 935ms/epoch - 156ms/step
Epoch 10/100
6/6 - 1s - loss: 163.8522 - IoU: 0.7214 - lr: 0.0010 - 977ms/epoch - 163ms/step
Epoch 11/100
6/6 - 1s - loss: 141.8685 - IoU: 0.7409 - lr: 0.0010 - 945ms/epoch -