In [1]:
from PIL import Image
from zipfile import ZipFile
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelBinarizer

import os
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout
from tensorflow.keras.layers.experimental.preprocessing import Rescaling

# Loading Images from Disk

In [2]:
!rm -rf ../data/train_data; rm ../data/train_data

rm: cannot remove '../data/train_data': No such file or directory


In [3]:
data_path = Path('../data/')
data_zips = [data_path / f for f in os.listdir(data_path)]

for i, f in enumerate(data_zips):
    print(f'{i}: {f}')

0: ../data/test_data
1: ../data/.ipynb_checkpoints
2: ../data/train.zip
3: ../data/test.zip


In [6]:
f_index = 2
data_file = data_zips[f_index]
print(data_file)

zip_folder_name = 'train_data'
data_extract_location = data_path / zip_folder_name

../data/train.zip


In [7]:
with ZipFile(data_file) as z:
    z.extractall(data_extract_location)

In [8]:
inner_folder = os.listdir(data_extract_location)[0];
data_folder_unzip = data_extract_location / inner_folder
os.listdir(data_folder_unzip)

['labels.csv', 'images']

In [9]:
labels = pd.read_csv(data_folder_unzip / 'labels.csv')
labels

Unnamed: 0,timestamp,forward/backward,left/right
0,1637291664326,0,0
1,1637291664353,0,0
2,1637291664404,0,0
3,1637291664547,0,0
4,1637291664573,0,0
...,...,...,...
17803,1637292329140,0,0
17804,1637292329188,0,0
17805,1637292329210,0,0
17806,1637292329259,0,0


In [10]:
label_encoded = labels['left/right'].values
encoder = LabelBinarizer()
encoder.fit([-1, 0, 1])
label_encoded = encoder.transform(label_encoded)
print("-1: ", label_encoded[labels['left/right'] == -1][0])
print(" 0: ", label_encoded[labels['left/right'] == 0][0])
print("-1: ", label_encoded[labels['left/right'] == 1][0])

-1:  [1 0 0]
 0:  [0 1 0]
-1:  [0 0 1]


In [97]:
# original (320, 240)
image_rescale_size = (80, 60)
batch_size = 32
data_path = data_folder_unzip
val_split = 0.1
print(f'{data_path}')


train_ds = tf.keras.utils.image_dataset_from_directory(
    str(data_path),
    labels=list(label_encoded),
    label_mode="int",
    color_mode="rgb",
    batch_size=batch_size,
    image_size=image_rescale_size,
    shuffle=True,
    validation_split=val_split,
    subset='training',
    seed=42
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    str(data_path),
    labels=list(label_encoded),
    label_mode="int",
    color_mode="rgb",
    batch_size=batch_size,
    image_size=image_rescale_size,
    shuffle=True,
    validation_split=val_split,
    subset='validation',
    seed=42
)

../data/test_data/dataTrackV2_5
Found 12691 files belonging to 1 classes.
Using 11422 files for training.
Found 12691 files belonging to 1 classes.
Using 1269 files for validation.


In [99]:
for i, el in enumerate(train_ds):
    # print(data.shape, labels.shape)
    # print(cur_labels[:2])
    print(el[0].shape, el[1].shape)
    if i == 5:
        break


(32, 80, 60, 3) (32, 3)
(32, 80, 60, 3) (32, 3)
(32, 80, 60, 3) (32, 3)
(32, 80, 60, 3) (32, 3)
(32, 80, 60, 3) (32, 3)
(32, 80, 60, 3) (32, 3)


# Preprocessing 

## Filter labels and images where the car was moving

In [100]:
filtered_index = (labels['forward/backward'] != 0)
np_filter = np.array([i for i, index in enumerate(filtered_index) if index == False])
bad_indices = tf.convert_to_tensor(np_filter); bad_indices.shape

TensorShape([786])

In [101]:
# def func(index, val):
#     val_tensor = tf.equal(index, bad_indices)
#     ans_tensor = tf.reduce_any(tf.cast(val_tensor, tf.bool)) 
#     # print(ans_tensor)
#     return ans_tensor
# train_ds = train_ds.enumerate().filter(func)
# #train_ds = test.map(lambda i, el: el[1])

# val_ds = val_ds.enumerate().filter(func)
# # val_ds = val_ds.map(lambda index, value: value)

In [102]:
# for i, el in enumerate(train_ds):
#     # print(data.shape, labels.shape)
#     # print(cur_labels[:2])
#     print(el[1][0].shape, el[1][1].shape)
#     if i == 5:
#         break

# Dense Neural Network


In [109]:
model1 = Sequential()
model1.add(Flatten())
model1.add(Rescaling(scale=1./255))
model1.add(Dense(32))
model1.add(Dropout(0.2)),
model1.add(Dense(64))
model1.add(Dropout(0.2)),
model1.add(Dense(128))
model1.add(Dropout(0.2)),
model1.add(Dense(64))
model1.add(Dropout(0.2)),
model1.add(Dense(32))
model1.add(Dense(3, activation='softmax'))

In [110]:
model_save_path = f'../models/dense.h5'

checkpoint = tf.keras.callbacks.ModelCheckpoint(
    model_save_path,
    monitor="val_loss",
    verbose=0,
    save_best_only=True,
    save_weights_only=False,
    mode="auto",
    save_freq="epoch",
)

es = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy', min_delta=1e-6, patience=10, verbose=2,
    mode='min', baseline=0.8, restore_best_weights=False
)

In [111]:
model1.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              metrics=['accuracy'])
#model.summary()

In [112]:
hist1 = model1.fit(train_ds, validation_data=val_ds, epochs=50, callbacks=[checkpoint])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


# CNN

In [113]:
model2 = Sequential()
model2.add(Rescaling(scale=1./255))
model2.add(Conv2D(32, (3,3)))
model2.add(MaxPool2D(2,2))
model2.add(Conv2D(64, (3,3)))
model2.add(MaxPool2D(2,2))
model2.add(Conv2D(32, (3,3)))
model2.add(Flatten())
model2.add(Dropout(0.2)),
model2.add(Dense(32))
model2.add(Dropout(0.2)),
model2.add(Dense(64))
model2.add(Dense(3, activation='softmax'))

In [114]:
model2.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
              metrics=['accuracy'])

es = tf.keras.callbacks.EarlyStopping(
    monitor='accuracy', min_delta=1e-6, patience=10, verbose=2,
    mode='min', baseline=0.8, restore_best_weights=False
)

model_save_path = f'../models/cnn.h5'

checkpoint = tf.keras.callbacks.ModelCheckpoint(
    model_save_path,
    monitor="val_loss",
    verbose=0,
    save_best_only=True,
    save_weights_only=False,
    mode="auto",
    save_freq="epoch",
)

In [115]:
hist2 = model2.fit(train_ds, validation_data=val_ds, callbacks=[es, checkpoint], epochs=45)

Epoch 1/45
Epoch 2/45
Epoch 3/45
Epoch 4/45
Epoch 5/45
Epoch 6/45
Epoch 7/45
Epoch 8/45
Epoch 9/45
Epoch 10/45
Epoch 11/45
Epoch 00011: early stopping


# Testing

## Loading Dataset

In [116]:
!rm -rf ../data/test_data; rm ../data/test_data

rm: cannot remove '../data/test_data': No such file or directory


In [117]:
data_path = Path('../data/')
data_zips = [data_path / f for f in os.listdir(data_path)]

for i, f in enumerate(data_zips):
    print(f'{i}: {f}')

0: ../data/.ipynb_checkpoints
1: ../data/train_data
2: ../data/train.zip
3: ../data/test.zip


In [118]:
f_index = 3
data_file = data_zips[f_index]
print(data_file)

zip_folder_name = 'test_data'
data_extract_location = data_path / zip_folder_name

../data/test.zip


In [119]:
with ZipFile(data_file) as z:
    z.extractall(data_extract_location)

In [120]:
inner_folder = os.listdir(data_extract_location)[0];
data_folder_unzip = data_extract_location / inner_folder
os.listdir(data_folder_unzip)

['labels.csv', 'images']

In [121]:
labels = pd.read_csv(data_folder_unzip / 'labels.csv')
labels

Unnamed: 0,timestamp,forward/backward,left/right
0,1637292461661,0,0
1,1637292461900,0,0
2,1637292461951,0,0
3,1637292461993,0,0
4,1637292462034,0,0
...,...,...,...
12686,1637292930962,0,0
12687,1637292930986,0,0
12688,1637292931017,0,0
12689,1637292931063,0,0


In [122]:
labels_orig = labels['left/right'].values
encoder = LabelBinarizer()
encoder.fit([-1, 0, 1])
label_encoded = encoder.transform(labels_orig)
print("-1: ", label_encoded[labels['left/right'] == -1][0])
print(" 0: ", label_encoded[labels['left/right'] == 0][0])
print("-1: ", label_encoded[labels['left/right'] == 1][0])

-1:  [1 0 0]
 0:  [0 1 0]
-1:  [0 0 1]


In [123]:
print(data_folder_unzip)

../data/test_data/dataTrackV2_5


In [124]:
image_rescale_size = (80, 60)
batch_size = 64
data_path = data_folder_unzip
val_split = 0.1
print(f'{data_path}')


test_ds = tf.keras.utils.image_dataset_from_directory(
    str(data_path),
    labels=list(label_encoded),
    label_mode="int",
    color_mode="rgb",
    batch_size=batch_size,
    image_size=image_rescale_size,
    shuffle=False,
)

../data/test_data/dataTrackV2_5
Found 12691 files belonging to 1 classes.


In [125]:
for i, el in enumerate(test_ds):
    # print(data.shape, labels.shape)
    # print(cur_labels[:2])
    print(el[0].shape, el[1].shape)
    if i == 5:
        break


(64, 80, 60, 3) (64, 3)
(64, 80, 60, 3) (64, 3)
(64, 80, 60, 3) (64, 3)
(64, 80, 60, 3) (64, 3)
(64, 80, 60, 3) (64, 3)
(64, 80, 60, 3) (64, 3)


## Load Models

In [126]:
from tensorflow.keras.models import load_model

In [127]:
model_dir = Path('../models')
models = [model_dir / model for model in os.listdir(model_dir)]; models

[PosixPath('../models/dense.h5'),
 PosixPath('../models/.ipynb_checkpoints'),
 PosixPath('../models/cnn.h5')]

In [128]:
cnn_model = load_model('../models/cnn.h5'); cnn_model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_8 (Rescaling)     (None, 80, 60, 3)         0         
                                                                 
 conv2d_9 (Conv2D)           (None, 78, 58, 32)        896       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 39, 29, 32)       0         
 2D)                                                             
                                                                 
 conv2d_10 (Conv2D)          (None, 37, 27, 64)        18496     
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 18, 13, 64)       0         
 2D)                                                             
                                                                 
 conv2d_11 (Conv2D)          (None, 16, 11, 32)       

In [129]:
dense_model = load_model('../models/dense.h5'); dense_model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_7 (Flatten)         (None, 14400)             0         
                                                                 
 rescaling_7 (Rescaling)     (None, 14400)             0         
                                                                 
 dense_36 (Dense)            (None, 32)                460832    
                                                                 
 dropout_17 (Dropout)        (None, 32)                0         
                                                                 
 dense_37 (Dense)            (None, 64)                2112      
                                                                 
 dropout_18 (Dropout)        (None, 64)                0         
                                                                 
 dense_38 (Dense)            (None, 128)              

In [130]:
cnn_pred = cnn_model.predict(test_ds); cnn_pred

array([[2.7352865e-04, 9.9937385e-01, 3.5267646e-04],
       [2.6141937e-04, 9.9943012e-01, 3.0850151e-04],
       [2.6141937e-04, 9.9943012e-01, 3.0850151e-04],
       ...,
       [1.1564973e-02, 9.8838753e-01, 4.7527119e-05],
       [1.1564973e-02, 9.8838753e-01, 4.7527119e-05],
       [1.1762273e-02, 9.8819089e-01, 4.6938232e-05]], dtype=float32)

In [131]:
pred_labels_cnn = np.argmax(cnn_pred, axis=1) - 1; pred_labels_cnn
np.unique(pred_labels_cnn, return_counts=True)

(array([-1,  0,  1]), array([ 1623, 10232,   836]))

In [132]:
dense_pred = dense_model.predict(test_ds); dense_pred

array([[3.4124259e-04, 9.9917668e-01, 4.8201802e-04],
       [3.9053327e-04, 9.9922001e-01, 3.8939880e-04],
       [3.9053327e-04, 9.9922001e-01, 3.8939880e-04],
       ...,
       [7.8583835e-03, 9.8975450e-01, 2.3871285e-03],
       [7.8583835e-03, 9.8975450e-01, 2.3871285e-03],
       [9.5988447e-03, 9.8808753e-01, 2.3136293e-03]], dtype=float32)

In [133]:
pred_labels_dense = np.argmax(dense_pred, axis=1) -1; 
np.unique(pred_labels_dense, return_counts=True)

(array([-1,  0,  1]), array([1880, 9641, 1170]))

In [134]:
from sklearn.metrics import balanced_accuracy_score, accuracy_score

In [135]:
print('CNN')
print('balanced: ', balanced_accuracy_score(pred_labels_cnn, labels_orig))
print('accuracy: ', accuracy_score(pred_labels_cnn, labels_orig))

CNN
balanced:  0.8968845497338159
accuracy:  0.9144275470806084


In [136]:
print('DENSE')
print('balanced: ', balanced_accuracy_score(pred_labels_dense, labels_orig))
print('accuracy: ', accuracy_score(pred_labels_dense, labels_orig))

DENSE
balanced:  0.9042426386665419
accuracy:  0.9468914979119061
