In [1]:
import csv
import os
import math
import pandas as pd
from PIL import Image
import numpy as np
from tensorflow.keras import Model
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback
from tensorflow.keras.layers import Conv2D, Reshape
from tensorflow.keras.utils import Sequence
from tensorflow.keras.backend import epsilon

In [16]:
IMAGE_SIZE = 96
EPOCHS = 50
BATCH_SIZE = 32
PATIENCE = 50

MULTI_PROCESSING = False
THREADS = 1

In [3]:
train_csv = "aug.csv"
valid_csv = "valid.csv"

In [4]:
with open (train_csv,'r') as train:
    reader=csv.reader(train, delimiter=',')
    row_count = sum(1 for row in reader)
with open (valid_csv,'r') as valid:
    reader=csv.reader(valid, delimiter=',')
    v_row_count = sum(1 for row in reader)
print('Total Training Images: {}, Total Validation Images: {}'.format(row_count, v_row_count))

Total Training Images: 11934, Total Validation Images: 400


In [5]:
class DataGenerator(Sequence):

    def __init__(self, csv_file):
        self.paths = []

        with open(csv_file, "r") as file:
            self.coords = np.zeros((sum(1 for line in file), 4))
            file.seek(0)

            reader = csv.reader(file, delimiter=",")
            for index, row in enumerate(reader):
                for i, r in enumerate(row[1:7]):
                    row[i+1] = float(r)

                path, image_height, image_width, x0, y0, x1, y1, _, _ = row
                self.coords[index, 0] = x0 * IMAGE_SIZE / image_width
                self.coords[index, 1] = y0 * IMAGE_SIZE / image_height
                self.coords[index, 2] = (x1 - x0) * IMAGE_SIZE / image_width
                self.coords[index, 3] = (y1 - y0) * IMAGE_SIZE / image_height 

                self.paths.append(path)

    def __len__(self):
        return math.ceil(len(self.coords) / BATCH_SIZE)

    def __getitem__(self, idx):
        batch_paths = self.paths[idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE]
        batch_coords = self.coords[idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE]

        batch_images = np.zeros((len(batch_paths), IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32)
        for i, f in enumerate(batch_paths):
            img = Image.open(f)
            img = img.resize((IMAGE_SIZE, IMAGE_SIZE))
            img = img.convert('RGB')

            batch_images[i] = preprocess_input(np.array(img, dtype=np.float32))
            img.close()

        return batch_images, batch_coords

In [6]:
class Validation(Callback):
    def __init__(self, generator):
        self.generator = generator

    def on_epoch_end(self, epoch, logs):
        mse = 0
        intersections = 0
        unions = 0

        for i in range(len(self.generator)):
            batch_images, gt = self.generator[i]
            pred = self.model.predict_on_batch(batch_images)
            mse += np.linalg.norm(gt - pred, ord='fro') / pred.shape[0]

            pred = np.maximum(pred, 0)

            diff_width = np.minimum(gt[:,0] + gt[:,2], pred[:,0] + pred[:,2]) - np.maximum(gt[:,0], pred[:,0])
            diff_height = np.minimum(gt[:,1] + gt[:,3], pred[:,1] + pred[:,3]) - np.maximum(gt[:,1], pred[:,1])
            intersection = np.maximum(diff_width, 0) * np.maximum(diff_height, 0)

            area_gt = gt[:,2] * gt[:,3]
            area_pred = pred[:,2] * pred[:,3]
            union = np.maximum(area_gt + area_pred - intersection, 0)

            intersections += np.sum(intersection * (union > 0))
            unions += np.sum(union)

        iou = np.round(intersections / (unions + epsilon()), 4)
        logs["val_iou"] = iou

        mse = np.round(mse, 4)
        logs["val_mse"] = mse

        print(" - val_iou: {} - val_mse: {}".format(iou, mse))

In [7]:
def create_model(trainable=False):
    model = MobileNetV2(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), include_top=False, alpha=1.0)

    for layer in model.layers:
        layer.trainable = trainable

    x = model.layers[-1].output
    x = Conv2D(4, kernel_size=3, name="coords")(x)
    x = Reshape((4,))(x)

    return Model(inputs=model.input, outputs=x)

In [8]:
model = create_model()
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 96, 96, 3)]  0                                            
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 48, 48, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 48, 48, 32)   128         Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu (ReLU)               (None, 48, 48, 32)   0           bn_Conv1[0][0]                   
______________________________________________________________________________________________

In [11]:
train_datagen = DataGenerator(train_csv)
validation_datagen = Validation(generator=DataGenerator(valid_csv))

model.compile(loss="mean_squared_error", optimizer="adam", metrics=['accuracy'])

checkpoint = ModelCheckpoint('model.h5', monitor="val_iou", save_best_only=False,
                             save_weights_only=False, mode="max")
stop = EarlyStopping(monitor="val_iou", patience=50, mode="max")
reduce_lr = ReduceLROnPlateau(monitor="val_iou", factor=0.2, patience=10, min_lr=1e-7, verbose=1, mode="max")



In [17]:
history = model.fit_generator(generator=train_datagen,
                                epochs=EPOCHS,
                                callbacks=[validation_datagen, checkpoint, reduce_lr, stop],
                                workers=THREADS,
                                use_multiprocessing=MULTI_PROCESSING,
                                shuffle=True)

Epoch 1/50
 - val_iou: 0.5416 - val_mse: 50.7989
Epoch 2/50
 - val_iou: 0.5416 - val_mse: 50.799
Epoch 3/50
 - val_iou: 0.5416 - val_mse: 50.7991
Epoch 4/50
 - val_iou: 0.5416 - val_mse: 50.7996
Epoch 5/50
 - val_iou: 0.5416 - val_mse: 50.7993
Epoch 6/50
 - val_iou: 0.5416 - val_mse: 50.8001
Epoch 7/50
 - val_iou: 0.5416 - val_mse: 50.7997
Epoch 8/50
 - val_iou: 0.5416 - val_mse: 50.7998
Epoch 9/50
 - val_iou: 0.5416 - val_mse: 50.7997
Epoch 10/50
 - val_iou: 0.5416 - val_mse: 50.8
Epoch 11/50
 - val_iou: 0.5415 - val_mse: 50.7994

Epoch 00011: ReduceLROnPlateau reducing learning rate to 1e-07.
Epoch 12/50
 - val_iou: 0.5415 - val_mse: 50.7996
Epoch 13/50
 - val_iou: 0.5415 - val_mse: 50.7997
Epoch 14/50
 - val_iou: 0.5416 - val_mse: 50.7999
Epoch 15/50
 - val_iou: 0.5416 - val_mse: 50.8
Epoch 16/50
 - val_iou: 0.5415 - val_mse: 50.7998
Epoch 17/50
 - val_iou: 0.5415 - val_mse: 50.8
Epoch 18/50
 - val_iou: 0.5415 - val_mse: 50.8001
Epoch 19/50
 - val_iou: 0.5415 - val_mse: 50.8001
Epoc

In [18]:
model.save('cat_dog_box_v2,4.h5')