In [1]:
import os
from tqdm import tqdm
import pandas as pd
import xml.etree.ElementTree as ET
import numpy as np
import cv2
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam, RMSprop
import tensorflow as tf

import sys

sys.path.append("../")
from obj_detection import model



In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [3]:
print(
    "GPU is available" if tf.test.gpu_device_name() else "GPU is NOT available"
)
if tf.test.gpu_device_name():
    physical_devices = tf.config.list_physical_devices("GPU")
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

GPU is NOT available


In [4]:
annotations_path = "../data/annotations/"
images_path = "../data/images/"

In [5]:
annotations = os.listdir(annotations_path)
images = os.listdir(images_path)

In [6]:
img_name_list = []
width_list = []
height_list = []
label_list = []
xmin_list = []
ymin_list = []
xmax_list = []
ymax_list = []

In [7]:
for i in tqdm(range(len(annotations))):
    tree = ET.parse(os.path.join(annotations_path, annotations[i]))
    root = tree.getroot()
    img_name = root.find("filename").text

    size = root.find("size")
    width = int(size.find("width").text)
    height = int(size.find("height").text)

    for group in root.findall("object"):
        label = group.find("name").text
        bbox = group.find("bndbox")
        xmin = int(bbox.find("xmin").text)
        ymin = int(bbox.find("ymin").text)
        xmax = int(bbox.find("xmax").text)
        ymax = int(bbox.find("ymax").text)

        img_name_list.append(img_name)
        width_list.append(width)
        height_list.append(height)
        xmin_list.append(xmin)
        ymin_list.append(ymin)
        xmax_list.append(xmax)
        ymax_list.append(ymax)
        label_list.append(label)

100%|██████████| 877/877 [00:09<00:00, 94.56it/s] 


In [8]:
dataset = pd.DataFrame(
    {
        "img_name": img_name_list,
        "width": width_list,
        "height": height_list,
        "x_min": xmin_list,
        "y_min": ymin_list,
        "x_max": xmax_list,
        "y_max": ymax_list,
        "label": label_list,
    }
)
dataset.head()

Unnamed: 0,img_name,width,height,x_min,y_min,x_max,y_max,label
0,road0.png,267,400,153,147,110,170,trafficlight
1,road1.png,400,283,206,172,104,218,trafficlight
2,road10.png,400,267,175,133,138,260,trafficlight
3,road100.png,400,385,199,165,328,321,speedlimit
4,road101.png,400,200,293,100,197,187,speedlimit


In [9]:
classes = dataset["label"].unique().tolist()
classes

['trafficlight', 'speedlimit', 'crosswalk', 'stop']

In [10]:
label_encoder = LabelBinarizer()

In [11]:
images = []
labels = []
bboxes = []
for index, row in dataset.iterrows():
    img_path = os.path.join(images_path, row["img_name"])
    img = cv2.imread(img_path)
    x_scale = 300.0 / int(row["width"])
    y_scale = 300.0 / int(row["height"])
    img = cv2.resize(img, (300, 300))
    images.append(img)

    x_min = int(row["x_min"] * x_scale) / 300.0
    y_min = int(row["y_min"] * y_scale) / 300.0
    x_max = int(row["x_max"] * x_scale) / 300.0
    y_max = int(row["y_max"] * y_scale) / 300.0
    bboxes.append((x_min, y_min, x_max, y_max))
    labels.append(row["label"])

In [12]:
images = np.array(images, dtype="float32") / 255.0
labels = np.array(labels)
bboxes = np.array(bboxes, dtype="float32")
labels = label_encoder.fit_transform(labels)

In [13]:
labels

array([[0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       ...,
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0]])

In [14]:
labels

array([[0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       ...,
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0]])

In [13]:
(
    train_images,
    val_images,
    train_bboxes,
    val_bboxes,
    train_labels,
    val_labels,
) = train_test_split(images, bboxes, labels, test_size=0.20, random_state=12)

In [15]:
losses = {
    "class_label": "categorical_crossentropy",
    "bounding_box": "mean_squared_error",
}

train_targets = {
    "class_label": train_labels,
    "bounding_box": train_bboxes,
}

val_targets = {
    "class_label": val_labels,
    "bounding_box": val_bboxes,
}

In [16]:
base_learning_rate = 0.00015

# opt = RMSprop(learning_rate=base_learning_rate)
opt = Adam(base_learning_rate)

In [17]:
ssd = model.SSD_Model(
    num_classes=len(classes),
    input_size=(300, 300, 3),
    optimizer=opt,
    losses=losses,
)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_images (InputLayer)      [(None, 300, 300, 3  0           []                               
                                )]                                                                
                                                                                                  
 block1_conv1 (Conv2D)          (None, 300, 300, 64  1792        ['input_images[0][0]']           
                                )                                                                 
                                                                                                  
 block1_conv2 (Conv2D)          (None, 300, 300, 64  36928       ['block1_conv1[0][0]']           
                                )                                                             

In [18]:
ssd.model_fit(
    train_images=train_images,
    train_targets=train_targets,
    val_images=val_images,
    val_targets=val_targets,
    epochs=15,
    batch_size=64,
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15


In [19]:
ssd.model.save("../models/output/model.h5")
ssd.model.save_weights("../models/output/model_weight.h5")