In [37]:
import os
from glob import glob
import random
import time
import tensorflow as tf
import datetime

import tempfile
os.environ['KERAS_BACKEND'] = 'tensorflow'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # 3 = INFO, WARNING, and ERROR messages are not printed
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm

import numpy as np
import pandas as pd
from IPython.display import FileLink
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns 
%matplotlib inline
from IPython.display import display, Image
import matplotlib.image as mpimg
import cv2

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_files       
from keras.utils import np_utils
from sklearn.utils import shuffle
from sklearn.metrics import log_loss

from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
#from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint,EarlyStopping



In [2]:
img_rows = 224
img_cols = 224
color_type = 3

In [3]:
df = pd.read_csv("driver_imgs_list.csv")

In [4]:
df.head(5)

Unnamed: 0,subject,classname,img
0,p002,c0,img_44733.jpg
1,p002,c0,img_72999.jpg
2,p002,c0,img_25094.jpg
3,p002,c0,img_69092.jpg
4,p002,c0,img_92629.jpg


In [5]:
by_drivers = df.groupby('subject')
unique_drivers = by_drivers.groups.keys()
print(unique_drivers)

dict_keys(['p002', 'p012', 'p014', 'p015', 'p016', 'p021', 'p022', 'p024', 'p026', 'p035', 'p039', 'p041', 'p042', 'p045', 'p047', 'p049', 'p050', 'p051', 'p052', 'p056', 'p061', 'p064', 'p066', 'p072', 'p075', 'p081'])


In [28]:
# Load the dataset previously downloaded from Kaggle
NUMBER_CLASSES = 10
# Color type: 1 - grey, 3 - rgb

def get_cv2_image(path, img_rows, img_cols, color_type=3):
    # Loading as Grayscale image
    if color_type == 1:
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    elif color_type == 3:
        img = cv2.imread(path, cv2.IMREAD_COLOR)
    # Reduce size
    img = cv2.resize(img, (img_rows, img_cols)) 
    return img

# Training
def load_train(img_rows, img_cols, color_type=3):
    start_time = time.time()
    train_images = [] 
    train_labels = []
    # Loop over the training folder 
    for classed in tqdm(range(NUMBER_CLASSES)):
        print('Loading directory c{}'.format(classed))
        files = glob(os.path.join('/Bangkit/imgs/train', 'c' + str(classed), '*.jpg'))
        for file in files:
            img = get_cv2_image(file, img_rows, img_cols, color_type)
            train_images.append(img)
            train_labels.append(classed)
    print("Data Loaded in {} second".format(time.time() - start_time))
    return train_images, train_labels 
    random.shuffle(train_image)

def read_and_normalize_train_data(img_rows, img_cols, color_type):
    X, labels = load_train(img_rows, img_cols, color_type)
    y = np_utils.to_categorical(labels, 10)
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    x_train = np.array(x_train, dtype=np.uint8).reshape(-1,img_rows,img_cols,color_type)
    x_test = np.array(x_test, dtype=np.uint8).reshape(-1,img_rows,img_cols,color_type)
    
    return x_train, x_test, y_train, y_test

# Validation
def load_test(size=200000, img_rows=64, img_cols=64, color_type=3):
    path = os.path.join('/Bangkit/imgs/test','*.jpg')
    files = sorted(glob(path))
    X_test, X_test_id = [], []
    total = 0
    files_size = len(files)
    for file in tqdm(files):
        if total >= size or total >= files_size:
            #break
          file_base = os.path.basename(file)
        img = get_cv2_image(file, img_rows, img_cols, color_type)
        X_test.append(img)
        X_test_id.append(file_base)
        total += 1
    return X_test, X_test_id

def read_and_normalize_sampled_test_data(size, img_rows, img_cols, color_type=3):
    test_data, test_ids = load_test(size, img_rows, img_cols, color_type)
    
    test_data = np.array(test_data, dtype=np.uint8)
    test_data = test_data.reshape(-1,img_rows,img_cols,color_type)
    
    return test_data, test_ids

In [29]:
x_train, x_test, y_train, y_test = read_and_normalize_train_data(img_rows, img_cols, color_type)
print('Train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

Loading directory c0


 10%|████████▎                                                                          | 1/10 [00:37<05:40, 37.82s/it]

Loading directory c1


 20%|████████████████▌                                                                  | 2/10 [01:10<04:50, 36.36s/it]

Loading directory c2


 30%|████████████████████████▉                                                          | 3/10 [01:44<04:09, 35.71s/it]

Loading directory c3


 40%|█████████████████████████████████▏                                                 | 4/10 [02:20<03:34, 35.75s/it]

Loading directory c4


 50%|█████████████████████████████████████████▌                                         | 5/10 [02:55<02:56, 35.35s/it]

Loading directory c5


 60%|█████████████████████████████████████████████████▊                                 | 6/10 [03:58<02:55, 43.76s/it]

Loading directory c6


 70%|██████████████████████████████████████████████████████████                         | 7/10 [04:41<02:10, 43.54s/it]

Loading directory c7


 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [05:15<01:21, 40.76s/it]

Loading directory c8


 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [05:58<00:41, 41.37s/it]

Loading directory c9


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [06:33<00:00, 39.34s/it]


Data Loaded in 393.45102310180664 second
Train shape: (17939, 224, 224, 3)
17939 train samples


In [None]:
nb_test_samples = 200
test_data,test_ids = read_and_normalize_sampled_test_data(nb_test_samples, img_rows, img_cols, color_type)
print('Test shape:', test_files.shape)
print(test_files.shape[0], 'Test samples')

In [8]:
# Statistics
# Load the list of names
names = [item[17:19] for item in sorted(glob('/Bangkit/imgs/train/*'))]
test_files_size = len(np.array(glob(os.path.join('/Bangkit/imgs/test','*.jpg'))))
x_train_size = len(x_train)
categories_size = len(names)
x_test_size = len(x_test)
print('There are %s total images.\n' % (test_files_size + x_train_size + x_test_size))
print('There are %d training images.' % x_train_size)
print('There are %d total training categories.' % categories_size)
print('There are %d validation images.' % x_test_size)
print('There are %d test images.'% test_files_size)

There are 102150 total images.

There are 17939 training images.
There are 10 total training categories.
There are 4485 validation images.
There are 79726 test images.


In [None]:
# Plot figure size
plt.figure(figsize = (10,10))
# Count the number of images per category
sns.countplot(x = 'classname', data = df)
# Change the Axis names
plt.ylabel('Count')
plt.title('Categories Distribution')
# Show plot
plt.show()

In [None]:
activity_map = {'c0': 'Safe driving', 
                'c1': 'Texting - right', 
                'c2': 'Talking on the phone - right', 
                'c3': 'Texting - left', 
                'c4': 'Talking on the phone - left', 
                'c5': 'Operating the radio', 
                'c6': 'Drinking', 
                'c7': 'Reaching behind', 
                'c8': 'Hair and makeup', 
                'c9': 'Talking to passenger'}

In [None]:
plt.figure(figsize = (12, 20))
image_count = 1
BASE_URL = '/Bangkit/imgs/train/'
for directory in os.listdir(BASE_URL):
    if directory[0] != '.':
        for i, file in enumerate(os.listdir(BASE_URL + directory)):
            if i == 1:
                break
            else:
                fig = plt.subplot(5, 2, image_count)
                image_count += 1
                image = mpimg.imread(BASE_URL + directory + '/' + file)
                plt.imshow(image)
                plt.title(activity_map[directory])

In [9]:
bs =10
ep = 100

desired_accuracy = 0.999
#class myCallback(tf.keras.callbacks.Callback):
  #def on_epoch_end(self, epoch, logs={}):
    #if(logs.get('acc')>desired_accuracy):
     # print("\nReached 99.9% accuracy so cancelling training!")
      #self.model.stop_training = True

#callbacks = myCallback()


In [None]:
model1 = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(512, (3, 3), activation='relu', input_shape=(128, 128, )),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(256, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])

#model = tf.keras.models.Sequential([
 # tf.keras.layers.Flatten(input_shape=(64, 64)),
  #tf.keras.layers.Dense(512, activation=tf.nn.relu),
  #tf.keras.layers.Dense(10, activation=tf.nn.softmax)
#])
model1.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model1.summary()

In [None]:
history =model1.fit(x_train, y_train, validation_data=(x_test, y_test),epochs=ep,batch_size=bs, callbacks=[callbacks])

In [None]:
def plot_train_history(history):
    # Summarize history for accuracy
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    # Summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

In [None]:
plot_train_history(history)


In [None]:
#evaluasi model
train_acc = model1.evaluate(x_train, y_train)
test_acc = model1.evaluate(x_test, y_test)


In [46]:
IMG_SHAPE = (img_rows,img_cols,color_type)

mobnet = tf.keras.applications.MobileNet(input_shape=IMG_SHAPE,
                                        include_top=False, 
                                         weights='imagenet')


In [51]:
model2 = tf.keras.Sequential([
  mobnet,
  tf.keras.layers.Conv2D(1024, 3, activation='relu'),
  #tf.keras.layers.Dropout(0.2),
  tf.keras.layers.GlobalAveragePooling2D(),
  tf.keras.layers.Dense(10, activation='softmax')
])

In [52]:
checkpointer = ModelCheckpoint('mobnet_best.hdf5', verbose=1, save_best_only=True)
earlystop = EarlyStopping(monitor='val_acc', patience=10, verbose=1)
model2.compile(optimizer=tf.keras.optimizers.Adam(), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])
model2.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenet_1.00_224 (Model)   (None, 7, 7, 1024)        3228864   
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 5, 5, 1024)        9438208   
_________________________________________________________________
global_average_pooling2d_6 ( (None, 1024)              0         
_________________________________________________________________
dense_9 (Dense)              (None, 10)                10250     
Total params: 12,677,322
Trainable params: 12,655,434
Non-trainable params: 21,888
_________________________________________________________________


In [53]:
history1 =model2.fit(x_train, y_train, validation_data=(x_test, y_test),epochs=ep,batch_size=bs, callbacks=[earlystop,checkpointer],verbose = 1)

Train on 17939 samples, validate on 4485 samples
Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.09316, saving model to mobnet_best.hdf5
Epoch 2/100

KeyboardInterrupt: 