# YOLO (You Only Look Once) Implementation for Traffic Assesment

## Prerequisites

#### Installing python dependencies

In [1]:
# !pip install -r requirements.txt

#### Importing required libraries

In [2]:
import os
import random

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import layers, models
import numpy as np
import PIL
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image
import matplotlib.patches as patches
import configparser
import ast
from xml.dom import minidom

from src.data_generator import DataGenerator
from src.data_preview import DataPreview

from tensorflow.python.client import device_lib
print('Tensorflow version: {}'.format(tf.__version__))
print('GPU: {}'.format([x.name for x in device_lib.list_local_devices()]))

Tensorflow version: 2.1.0
GPU: ['/device:CPU:0', '/device:XLA_CPU:0', '/device:XLA_GPU:0']


#### Setting YOLO Parameters

In [3]:
with open('./config/classes.names') as class_file:
    LABELS = class_file.read().splitlines()


In [4]:
config = configparser.ConfigParser()
config.read('config/params.config')

IMAGE_W             = int(config['YOLO']['IMAGE_W'])
IMAGE_H             = int(config['YOLO']['IMAGE_H'])
GRID_W              = int(config['YOLO']['GRID_W'])
GRID_H              = int(config['YOLO']['GRID_H'])
BOXES               = int(config['YOLO']['BOXES'])
CLASSES             = int(config['YOLO']['CLASSES'])
MIN_SCORE           = float(config['YOLO']['MIN_SCORE'])
MIN_IOU             = float(config['YOLO']['MIN_IOU'])
ANCHORS             = ast.literal_eval(config['YOLO']['ANCHORS'])

TRAIN_BATCH_SIZE    = int(config['TRAINING']['TRAIN_BATCH_SIZE'])
VAL_BATCH_SIZE      = int(config['TRAINING']['VAL_BATCH_SIZE'])
EPOCHS              = int(config['TRAINING']['EPOCHS'])

IMAGE_FOLDER        = config['DIR']['IMAGES']
ANNOTATION_FOLDER   = config['DIR']['ANNOTATIONS']

## Loading training/validation datasets

In [5]:
dataset = DataGenerator.generate_tf_dataset(
    ANNOTATION_FOLDER,
    IMAGE_FOLDER,
    LABELS,
    BOXES,
    TRAIN_BATCH_SIZE,
    (GRID_W, GRID_H),
    (IMAGE_W, IMAGE_H))

dataset

<PrefetchDataset shapes: ((None, 512, 512, 3), (None, 16, 16, 5, 12)), types: (tf.float32, tf.float32)>

In [6]:
# dataset = DataGenerator.augment_dataset(
#     dataset,
#     (IMAGE_W, IMAGE_H))

# dataset

## Defining and training the neural network

In [7]:

model = models.Sequential([
    # (512, 512, 3) -> conv_0 + norm_0 + relu_0 + max_pool_0 -> (256, 256, 32)
    layers.Conv2D(32, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(512, 512, 3)),
    layers.BatchNormalization(input_shape=(512, 512, 32)),
    layers.LeakyReLU(alpha=0.1, input_shape=(512, 512, 32)),
    layers.MaxPooling2D(pool_size=(2, 2), input_shape=(512, 512, 32)),


    # (256, 256, 32) -> conv_1 + norm_1 + relu_1 + max_pool_1 -> (128, 128, 64)
    layers.Conv2D(64, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(256, 256, 32)),
    layers.BatchNormalization(input_shape=(256, 256, 64)),
    layers.LeakyReLU(alpha=0.1, input_shape=(256, 256, 64)),
    layers.MaxPooling2D(pool_size=(2, 2), input_shape=(256, 256, 64)),


    # (128, 128, 64) -> conv_2 + norm_2 + relu_2 -> (128, 128, 128)
    layers.Conv2D(128, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(128, 128, 128)),
    layers.BatchNormalization(input_shape=(128, 128, 128)),
    layers.LeakyReLU(alpha=0.1, input_shape=(128, 128, 128)),

    # (128, 128, 128) -> conv_3 + norm_3 + relu_3 -> (128, 128, 64)
    layers.Conv2D(64, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(128, 128, 32)),
    layers.BatchNormalization(input_shape=(128, 128, 64)),
    layers.LeakyReLU(alpha=0.1, input_shape=(128, 128, 64)),

    # (128, 128, 64) -> conv_4 + norm_4 + relu_4 + max_pool_2 -> (64, 64, 128)
    layers.Conv2D(128, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(128, 128, 64)),
    layers.BatchNormalization(input_shape=(128, 128, 128)),
    layers.LeakyReLU(alpha=0.1, input_shape=(128, 128, 128)),
    layers.MaxPooling2D(pool_size=(2, 2), input_shape=(128, 128, 128)),


    # (64, 64, 128) -> conv_5 + norm_5 + relu_5 -> (64, 64, 256)
    layers.Conv2D(256, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(64, 64, 128)),
    layers.BatchNormalization(input_shape=(64, 64, 256)),
    layers.LeakyReLU(alpha=0.1, input_shape=(64, 64, 256)),

    # (64, 64, 256) -> conv_6 + norm_6 + relu_6 -> (64, 64, 128)
    layers.Conv2D(128, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(64, 64, 256)),
    layers.BatchNormalization(input_shape=(64, 64, 128)),
    layers.LeakyReLU(alpha=0.1, input_shape=(64, 64, 128)),

    # (64, 64, 128) -> conv_7 + norm_7 + relu_7 + max_pool_3 -> (32, 32, 256)
    layers.Conv2D(256, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(64, 64, 128)),
    layers.BatchNormalization(input_shape=(64, 64, 256)),
    layers.LeakyReLU(alpha=0.1, input_shape=(64, 64, 256)),
    layers.MaxPooling2D(pool_size=(2, 2), input_shape=(64, 64, 256)),


    # (32, 32, 256) -> conv_8 + norm_8 + relu_8 -> (32, 32, 512)
    layers.Conv2D(512, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(32, 32, 256)),
    layers.BatchNormalization(input_shape=(32, 32, 512)),
    layers.LeakyReLU(alpha=0.1, input_shape=(32, 32, 512)),

    # (32, 32, 512) -> conv_9 + norm_9 + relu_9 -> (32, 32, 256)
    layers.Conv2D(256, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(32, 32, 512)),
    layers.BatchNormalization(input_shape=(32, 32, 256)),
    layers.LeakyReLU(alpha=0.1, input_shape=(32, 32, 256)),

    # (32, 32, 256) -> conv_10 + norm_10 + relu_10 -> (32, 32, 512)
    layers.Conv2D(512, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(32, 32, 256)),
    layers.BatchNormalization(input_shape=(32, 32, 512)),
    layers.LeakyReLU(alpha=0.1, input_shape=(32, 32, 512)),

    # (32, 32, 512) -> conv_11 + norm_11 + relu_11 -> (32, 32, 256)
    layers.Conv2D(256, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(32, 32, 512)),
    layers.BatchNormalization(input_shape=(32, 32, 256)),
    layers.LeakyReLU(alpha=0.1, input_shape=(32, 32, 256)),

    # (32, 32, 256) -> conv_12 + norm_12 + relu_12 + max_pool_4 -> (16, 16, 512)
    layers.Conv2D(512, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(32, 32, 256)),
    layers.BatchNormalization(input_shape=(32, 32, 512)),
    layers.LeakyReLU(alpha=0.1, input_shape=(32, 32, 512)),
    layers.MaxPooling2D(pool_size=(2, 2), input_shape=(32, 32, 512)),


    # (16, 16, 512) -> conv_13 + norm_13 + relu_13 -> (16, 16, 1024)
    layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(16, 16, 512)),
    layers.BatchNormalization(input_shape=(16, 16, 1024)),
    layers.LeakyReLU(alpha=0.1, input_shape=(16, 16, 1024)),

    # (16, 16, 1024) -> conv_14 + norm_14 + relu_14 -> (16, 16, 512)
    layers.Conv2D(512, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(16, 16, 1024)),
    layers.BatchNormalization(input_shape=(16, 16, 512)),
    layers.LeakyReLU(alpha=0.1, input_shape=(16, 16, 512)),

    # (16, 16, 512) -> conv_15 + norm_15 + relu_15 -> (16, 16, 1024)
    layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(16, 16, 512)),
    layers.BatchNormalization(input_shape=(16, 16, 1024)),
    layers.LeakyReLU(alpha=0.1, input_shape=(16, 16, 1024)),

    # (16, 16, 1024) -> conv_16 + norm_16 + relu_16 -> (16, 16, 512)
    layers.Conv2D(512, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(16, 16, 1024)),
    layers.BatchNormalization(input_shape=(16, 16, 512)),
    layers.LeakyReLU(alpha=0.1, input_shape=(16, 16, 512)),

    # (16, 16, 512) -> conv_17 + norm_17 + relu_17 -> (16, 16, 1024)
    layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(16, 16, 512)),
    layers.BatchNormalization(input_shape=(16, 16, 1024)),
    layers.LeakyReLU(alpha=0.1, input_shape=(16, 16, 1024)),

    # (16, 16, 1024) -> conv_18 + norm_18 + relu_18 -> (16, 16, 512)
    layers.Conv2D(512, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(16, 16, 1024)),
    layers.BatchNormalization(input_shape=(16, 16, 512)),
    layers.LeakyReLU(alpha=0.1, input_shape=(16, 16, 512)),

    # (16, 16, 512) -> conv_19 + norm_19 + relu_19 -> (16, 16, 1024)
    layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(16, 16, 512)),
    layers.BatchNormalization(input_shape=(16, 16, 1024)),
    layers.LeakyReLU(alpha=0.1, input_shape=(16, 16, 1024)),


    # (16, 16, 1024) -> conv_20 + norm_20 + relu_20 + dropout -> (16, 16, 1024)
    layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', use_bias=False, input_shape=(16, 16, 1024)),
    layers.BatchNormalization(input_shape=(16, 16, 1024)),
    layers.LeakyReLU(alpha=0.1, input_shape=(16, 16, 1024)),
    layers.Dropout(0.3, input_shape=(16, 16, 1024)),

    # (16, 16, 1024) -> output_layer -> (16, 16, 5, 12)
    layers.Conv2D(BOXES * (5 + CLASSES), (1,1), strides=(1,1), padding='same'),
    layers.Reshape((GRID_W, GRID_H, BOXES, 5 + CLASSES))
])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 512, 512, 32)      864       
_________________________________________________________________
batch_normalization (BatchNo (None, 512, 512, 32)      128       
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 512, 512, 32)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 256, 256, 32)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 256, 256, 64)      18432     
_________________________________________________________________
batch_normalization_1 (Batch (None, 256, 256, 64)      256       
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 256, 256, 64)      0

In [8]:
from src.model_training import ModelTraining

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5, beta_1=0.9, beta_2=0.999, epsilon=1e-08),
    loss=ModelTraining.loss,
    metrics=[ModelTraining.IOU])

Tensor("reshape/Identity:0", shape=(None, 16, 16, 5, 12), dtype=float32) Tensor("reshape_target:0", shape=(None, None, None, None, None), dtype=float32)
Tensor("loss/reshape_loss/Square:0", shape=(), dtype=float32)


In [9]:
detector = model.fit(dataset, epochs=EPOCHS, steps_per_epoch=TRAIN_BATCH_SIZE)

Train for 1 steps
Tensor("loss/reshape_loss/Square:0", shape=(), dtype=float32)
Tensor("sequential/reshape/Reshape:0", shape=(None, 16, 16, 5, 12), dtype=float32) Tensor("IteratorGetNext:1", shape=(None, 16, 16, 5, 12), dtype=float32)
Tensor("loss/reshape_loss/Square:0", shape=(), dtype=float32)
Tensor("sequential/reshape/Reshape:0", shape=(None, 16, 16, 5, 12), dtype=float32) Tensor("IteratorGetNext:1", shape=(None, 16, 16, 5, 12), dtype=float32)


In [10]:
from datetime import datetime
model.save('models/{}/'.format(datetime.now().strftime('tb__%Y_%m_%d__%H_%M_%S')))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: models/tb__2020_06_03__17_20_35/assets
