In [1]:
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import xml.etree.ElementTree as ET
from collections import defaultdict
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import cv2
import shutil
from random import shuffle
import sys

sys.path.append("../")
from obj_detection import model

In [2]:
annotations_path = '../data/annotations/'
images_path = '../data/images/'

In [3]:
annotations = os.listdir(annotations_path)
images = os.listdir(images_path)

In [4]:
img_name_list = []
width_list = []
height_list = []
label_list = []
xmin_list = []
ymin_list = []
xmax_list = []
ymax_list = []

In [5]:
for i in tqdm(range(len(annotations))):
    tree = ET.parse(os.path.join(annotations_path, annotations[i]))
    root = tree.getroot()
    img_name = root.find("filename").text

    size = root.find("size")
    width = int(size.find("width").text)
    height = int(size.find("height").text)

    for group in root.findall("object"):
        label = group.find("name").text
        bbox = group.find("bndbox")
        xmin = int(bbox.find("xmin").text)
        ymin = int(bbox.find("ymin").text)
        xmax = int(bbox.find("xmax").text)
        ymax = int(bbox.find("ymax").text)

        img_name_list.append(img_name)
        width_list.append(width)
        height_list.append(height)
        xmin_list.append(xmin)
        ymin_list.append(ymin)
        xmax_list.append(xmax)
        ymax_list.append(ymax)
        label_list.append(label)

100%|██████████| 877/877 [00:00<00:00, 2093.97it/s]


In [6]:
dataset = pd.DataFrame(
    {
        "img_name": img_name_list,
        "width": width_list,
        "height": height_list,
        "xmin": xmin_list,
        "ymin": ymin_list,
        "xmax": xmax_list,
        "ymax": ymax_list,
        "label": label_list,
    }
)
dataset.head()

Unnamed: 0,img_name,width,height,xmin,ymin,xmax,ymax,label
0,road0.png,267,400,98,62,208,232,trafficlight
1,road1.png,400,283,154,63,258,281,trafficlight
2,road10.png,400,267,106,3,244,263,trafficlight
3,road100.png,400,385,35,5,363,326,speedlimit
4,road101.png,400,200,195,7,392,194,speedlimit


In [7]:
classes = dataset['label'].unique().tolist()
classes

['trafficlight', 'speedlimit', 'crosswalk', 'stop']

In [8]:
label_encoder = LabelEncoder()
dataset['class'] = label_encoder.fit_transform(dataset['label'])
dataset

Unnamed: 0,img_name,width,height,xmin,ymin,xmax,ymax,label,class
0,road0.png,267,400,98,62,208,232,trafficlight,3
1,road1.png,400,283,154,63,258,281,trafficlight,3
2,road10.png,400,267,106,3,244,263,trafficlight,3
3,road100.png,400,385,35,5,363,326,speedlimit,1
4,road101.png,400,200,195,7,392,194,speedlimit,1
...,...,...,...,...,...,...,...,...,...
1239,road95.png,300,400,90,101,268,257,stop,2
1240,road96.png,400,267,209,58,326,179,stop,2
1241,road97.png,400,267,42,66,130,154,stop,2
1242,road98.png,400,267,138,17,376,245,stop,2


In [9]:
preprocessed_df = dataset.drop("label", axis=1)
preprocessed_df.head()

Unnamed: 0,img_name,width,height,xmin,ymin,xmax,ymax,class
0,road0.png,267,400,98,62,208,232,3
1,road1.png,400,283,154,63,258,281,3
2,road10.png,400,267,106,3,244,263,3
3,road100.png,400,385,35,5,363,326,1
4,road101.png,400,200,195,7,392,194,1


In [10]:
images = []
labels = []
for index, row in preprocessed_df.iterrows():
    img_path = os.path.join(images_path, row["img_name"])
    img = cv2.imread(img_path)
    x_scale = 300 / img.shape[1]
    y_scale = 300 / img.shape[0]
    img = cv2.resize(img, (300, 300))
    img = img / 255.0
    images.append(img)

    xmin = int(row["xmin"] * x_scale) / 300
    ymin = int(row["ymin"] * y_scale) / 300
    xmax = int(row["xmax"] * x_scale) / 300
    ymax = int(row["ymax"] * y_scale) / 300
    labels.append({"bbox": [xmin, ymin, xmax, ymax], "label": row["class"]})

In [11]:
(
    train_images,
    val_images,
    train_labels,
    val_labels,
) = train_test_split(images, labels, test_size=0.2, random_state=42)

In [12]:
ssd = model.SSD_Model(len(classes), (300, 300, 3))

In [13]:
ssd.model_fit(
    train_images=train_images,
    train_labels=train_labels,
    val_images=val_images,
    val_labels=val_labels,
    batch_size=128,
)

Epoch 1/20


: 