<a href="https://colab.research.google.com/github/balszeg/Thesis/blob/main/Traffic_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Data collection and preprocessing

In [None]:
# clone the OIDv4 Toolkit
!git clone https://github.com/EscVM/OIDv4_ToolKit.git

In [None]:
# install it
%cd /content/OIDv4_ToolKit/
!pip3 install -r requirements.txt

In [None]:
# collect the train set
!python3 main.py downloader -y --classes Car Van Truck Motorcycle --type_csv train --limit 500

In [None]:
# collect the validation set
!python3 main.py downloader -y --classes Car Van Truck Motorcycle --type_csv validation --limit 100

In [None]:
# collect the test set
!python3 main.py downloader -y --classes Car Van Truck Motorcycle --type_csv test --limit 100

In [None]:
# import libraries
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns

import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout 
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.models import load_model
from sklearn.metrics import classification_report,confusion_matrix

import tensorflow as tf

import cv2
import os

import numpy as np

In [None]:
# set the data path
base_dir = '/content/OIDv4_ToolKit/OID/Dataset'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
train_car_dir = os.path.join(train_dir, 'Car')
train_van_dir = os.path.join(train_dir, 'Van')
train_truck_dir = os.path.join(train_dir, 'Truck')
train_motorcyc_dir = os.path.join(train_dir, 'Motorcycle')
train_car_fnames = os.listdir(train_car_dir)
train_van_fnames = os.listdir(train_van_dir)
train_truck_fnames = os.listdir(train_truck_dir)
train_motorcyc_fnames = os.listdir(train_motorcyc_dir)

In [None]:
# examine a few images from the set
nrows = 4
ncols = 4

fig = plt.gcf()
fig.set_size_inches(ncols * 4, nrows * 4)

next_car_pix = [os.path.join(train_car_dir, fname) for fname in train_car_fnames[:int(ncols*nrows/2)]]
next_van_pix = [os.path.join(train_van_dir, fname) for fname in train_van_fnames[:int(ncols*nrows/2)]]
next_truck_pix = [os.path.join(train_truck_dir, fname) for fname in train_truck_fnames[:int(ncols*nrows/2)]]
next_motorcyc_pix = [os.path.join(train_motorcyc_dir, fname) for fname in train_motorcyc_fnames[:int(ncols*nrows/2)]]

for i, img_path in enumerate(next_car_pix+next_van_pix+next_truck_pix+next_motorcyc_pix):
    sp = plt.subplot(nrows, ncols, i + 1)
    img = mpimg.imread(img_path)
    plt.imshow(img)

plt.show()

In [None]:
# setup the labels 
labels = ['Car', 'Van', 'Truck','Motorcycle']
img_size = 224

def get_data(data_dir):
    data = [] 
    for label in labels: 
        path = os.path.join(data_dir, label)
        class_num = labels.index(label)
        for img in os.listdir(path):
          try:
               img_arr = cv2.imread(os.path.join(path, img))[...,::-1] #convert BGR to RGB format
               resized_arr = cv2.resize(img_arr,(img_size, img_size)) # Reshaping images to preferred size
               data.append([resized_arr, class_num])
          except Exception as e:
               print(e)
    return np.array(data)

In [None]:
# arrange the datasets
train = get_data('/content/OIDv4_ToolKit/OID/Dataset/train/')
val = get_data('/content/OIDv4_ToolKit/OID/Dataset/validation/')
test = get_data('/content/OIDv4_ToolKit/OID/Dataset/test/')

In [None]:
# check train set distribution
l = []
for i in train:
    if(i[1] == 0):
        l.append("Car")
    if(i[1] == 1):
        l.append("Van")
    if(i[1] == 2):
        l.append("Truck")
    if(i[1] == 3):
        l.append("Motorcycle")
sns.set_style('darkgrid')
sns.countplot(l)

In [None]:
# check validation set distribution
k = []
for i in val:
    if(i[1] == 0):
        k.append("Car")
    if(i[1] == 1):
        k.append("Van")
    if(i[1] == 2):
        k.append("Truck")
    if(i[1] == 3):
        k.append("Motorcycle")
sns.set_style('darkgrid')
sns.countplot(k)

In [None]:
# check test set distribution
m = []
for i in test:
    if(i[1] == 0):
        m.append("Car")
    if(i[1] == 1):
        m.append("Van")
    if(i[1] == 2):
        m.append("Truck")
    if(i[1] == 3):
        m.append("Motorcycle")
sns.set_style('darkgrid')
sns.countplot(m)

In [None]:
# setup the sets for modifications
x_train = []
y_train = []
x_val = []
y_val = []
x_test = []
y_test = []

for feature, label in train:
  x_train.append(feature)
  y_train.append(label)

for feature, label in val:
  x_val.append(feature)
  y_val.append(label)

for feature, label in val:
  x_test.append(feature)
  y_test.append(label)

# normalize the data
x_train = np.array(x_train) / 255
x_val = np.array(x_val) / 255
x_test = np.array(x_test) / 255

# reshape them to model sufficient sizes
x_train.reshape(-1, img_size, img_size, 1)
y_train = np.array(y_train)

x_val.reshape(-1, img_size, img_size, 1)
y_val = np.array(y_val)

x_test.reshape(-1, img_size, img_size, 1)
y_test = np.array(y_test)

In [None]:
# use datagenerator for augmenting
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.2, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip = True,  # randomly flip images
        vertical_flip=False)  # randomly flip images


datagen.fit(x_train)

## Self-designed model

In [None]:
# build up the self-designed model
model = Sequential()
model.add(Conv2D(32,3,padding="same", activation="relu", input_shape=(224,224,3)))
model.add(MaxPool2D())

model.add(Conv2D(32, 3, padding="same", activation="relu"))
model.add(MaxPool2D())

model.add(Conv2D(64, 3, padding="same", activation="relu"))
model.add(MaxPool2D())
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(128,activation="relu"))
model.add(Dense(4, activation="softmax"))

model.summary()

In [None]:
# configure the learning parameters
opt = Adam(lr=0.000001) #SGD(lr=0.0001, momentum=0.9)
model.compile(optimizer = opt , loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) , metrics = ['accuracy'])
checkpointer=ModelCheckpoint(filepath='HandMade.h5',save_best_only=True,verbose=1,)
earlystopping = EarlyStopping(monitor='val_accuracy', patience=20, verbose=1)

In [None]:
# start training
history = model.fit(x_train, y_train, batch_size=32, epochs = 500, validation_data = (x_val, y_val), shuffle=True, callbacks=[checkpointer,earlystopping])

In [None]:
# draw training graphs
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(90) # early stoppingnál figyelni kell rá

plt.figure(figsize=(15, 15))
plt.subplot(2, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
# calculate metrics
predictions = model.predict_classes(x_val)
predictions = predictions.reshape(1,-1)[0]
print(classification_report(y_val, predictions, target_names = ['Car (Class 0)','Van (Class 1)', 'Truck (Class 2)', 'Motorcycle (Class 3)']))

## VGG16

In [None]:
# download VGG16
baseModel = tf.keras.applications.VGG16(weights="imagenet", include_top=False, input_tensor=tf.keras.layers.Input(shape=(224, 224, 3)))
output = baseModel.output
# removing the outlayers and change it to own
output = tf.keras.layers.GlobalAveragePooling2D()(output)
output = tf.keras.layers.Dense(1024, activation="relu")(output)
output = tf.keras.layers.Dense(4, activation='softmax')(output)
model = tf.keras.Model(inputs=baseModel.input, outputs=output)

In [None]:
# transfer learning:
# compile the model, with froze base model making the training only on the new top layers
for layer in baseModel.layers:
    layer.trainable = False

model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=opt, metrics=["accuracy"])

In [None]:
# start the train
earlystopping = EarlyStopping(monitor='val_accuracy', patience=20, verbose=1)
checkpointer = ModelCheckpoint(filepath='VGG16.h5',save_best_only=True,verbose=1)
history = model.fit(x_train, y_train, batch_size=32, epochs=500, verbose=1,validation_data=(x_val, y_val), shuffle = True,callbacks=[earlystopping,checkpointer])

In [None]:
# draw training graphs
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(178) # the epoch number of range changes according to earlystop

plt.figure(figsize=(15, 15))
plt.subplot(2, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
# calculate graph
predictions = model.predict(x_val)
predictions2 = np.argmax(predictions,axis=1)
print(classification_report(y_val, predictions2, target_names = ['Car (Class 0)','Van (Class 1)', 'Truck (Class 2)', 'Motorcycle (Class 3)']))

In [None]:
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=opt, metrics=["accuracy"])

In [None]:
# freeze the trained top layers and unfreeze the base model for training
for layer in model.layers[:15]:
       layer.trainable = False
for layer in model.layers[15:]:
       layer.trainable = True

In [None]:
# start the training
checkpointer=ModelCheckpoint(filepath='VGG16_unfrost.h5',save_best_only=True,verbose=1)
earlystopping = EarlyStopping(monitor='val_accuracy', patience=20, verbose=1)
history = model.fit(x_train, y_train, batch_size=32, epochs=500, verbose=1,validation_data=(x_val, y_val), shuffle = True,callbacks=[checkpointer,earlystopping])

In [None]:
# draw graphs for the improvement due to transfer learning
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(31) # the epoch number of range changes according to earlystop

plt.figure(figsize=(15, 15))
plt.subplot(2, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
# calculate metrics
predictions = model.predict(x_val)
predictions2 = np.argmax(predictions,axis=1)
print(classification_report(y_val, predictions2, target_names = ['Car (Class 0)','Van (Class 1)', 'Truck (Class 2)', 'Motorcycle (Class 3)']))