<a href="https://colab.research.google.com/github/bennykan/ML1020/blob/master/Ml1020_Pretrained_Model_V2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ===========================================================
# ML1020 - Final Project: Mid-Term Proposal
# Team Blue

## Tyler Blakeley
## Benjamin Kan
## Avi Singh
## Justin Kim


# Distracted Driver Detection
# ===========================================================

The project is inspired by a past Kaggle competition hosted by State Farm, an insurance company based in the US.  Competition participants were invited to implement a machine learning algorithm to classify and predict the driver’s behavior based on the images captured from the dashboard cameras installed in the vehicles (https://www.kaggle.com/c/state-farm-distracted-driver-detection/data). The host defined the following 10 driving behavior classifications

| Label | Driver Behavior Descriptions |
| ----- | ---------------------------- |
| c0	| normal driving
| c1	| texting - right
| c2	| talking on the phone - right
| c3	| texting - left
| c4	| talking on the phone - left
| c5	| operating the radio
| c6	| drinking
| c7	| reaching behind
| c8	| hair and makeup
| c9	| talking to passenger


# Import Keras Libraries

In [1]:
import sys
sys.version
import keras

Using TensorFlow backend.


In [2]:
!pwd

/home/jupyter/ML1020


In [3]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

base_dir = '/home/jupyter/Data'

train_dir = os.path.join(base_dir, 'train')
print(train_dir)
#validation_dir = os.path.join(base_dir, 'validation')
#test_dir = os.path.join(base_dir, 'test')

datagen = ImageDataGenerator(rescale=1./255)
batch_size = 10

train_features = np.zeros(shape=(22424, 4, 4, 512))
train_labels = np.zeros(shape=(22424,10))
 
train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(480, 640),
    batch_size=batch_size,
    class_mode='categorical')

/home/jupyter/Data/train
Found 22424 images belonging to 10 classes.


In [4]:
import cv2
import numpy as np
from matplotlib import pyplot as plt

img = cv2.imread(base_dir + '/train/c6/img_380.jpg')
img_cvt=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img_cvt)
plt.show()

<Figure size 640x480 with 1 Axes>

# Loading the VGG16 Pre-Trained Model

In [5]:
#Loading in Pretrained Model

from keras.applications import VGG16

conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(150, 150, 3))

In [6]:
conv_base.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0         
__________

# Adding One Hidden Layer on Top of VGG16

In [7]:
#Model with Conv Base Included
from keras import models
from keras import layers
from keras import optimizers
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu', input_dim=4 * 4 * 512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10, activation = 'softmax'))

model.summary()



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 4, 4, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               2097408   
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                2570      
Total params: 16,814,666
Trainable params: 16,814,666
Non-trainable params: 0
_________________________________________________________________


In [8]:
#Set conv_base training weights to false so we dont re train the weights already learned
print(len(model.trainable_weights))
conv_base.trainable = False
print(len(model.trainable_weights))

30
4


# Set the Last Convolution Layer in VGG16 to be Re-Trainable

In [9]:
conv_base.trainable = True

set_trainable = False
for layer in conv_base.layers:
    if layer.name == 'block5_conv1':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False
print(len(model.trainable_weights))

10


# Train the Model

In [10]:
from keras.preprocessing.image import ImageDataGenerator

base_dir = '/home/jupyter/Data'

train_dir = os.path.join(base_dir, 'train')

train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')



train_generator = train_datagen.flow_from_directory(
        # This is the target directory
        train_dir,
        # All images will be resized to 150x150
        target_size=(150, 150),
        batch_size=20,
        class_mode='categorical')

#validation_generator = test_datagen.flow_from_directory(
        #validation_dir,
        #target_size=(150, 150),
        #batch_size=20,
        #class_mode='binary')

model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-5),
              metrics=['acc'])

history = model.fit_generator(
      train_generator,
      steps_per_epoch=100,
      epochs=20,
      #validation_data=validation_generator,
      #validation_steps=50,
      #use_multiprocessing = True,
      #workers = 4,
      verbose=2)



Found 22424 images belonging to 10 classes.
Epoch 1/20
 - 75s - loss: 2.4885 - acc: 0.1075
Epoch 2/20
 - 59s - loss: 2.3421 - acc: 0.1300
Epoch 3/20
 - 60s - loss: 2.2728 - acc: 0.1535
Epoch 4/20
 - 60s - loss: 2.2062 - acc: 0.1860
Epoch 5/20
 - 61s - loss: 2.1559 - acc: 0.2115
Epoch 6/20
 - 61s - loss: 2.0695 - acc: 0.2660
Epoch 7/20
 - 62s - loss: 1.9949 - acc: 0.2830
Epoch 8/20
 - 62s - loss: 1.8623 - acc: 0.3370
Epoch 9/20
 - 62s - loss: 1.8019 - acc: 0.3635
Epoch 10/20
 - 65s - loss: 1.7373 - acc: 0.3730
Epoch 11/20
 - 66s - loss: 1.6203 - acc: 0.4170
Epoch 12/20
 - 27s - loss: 1.5457 - acc: 0.4450
Epoch 13/20
 - 17s - loss: 1.5102 - acc: 0.4540
Epoch 14/20
 - 17s - loss: 1.4452 - acc: 0.4940
Epoch 15/20
 - 17s - loss: 1.3868 - acc: 0.5130
Epoch 16/20
 - 17s - loss: 1.2859 - acc: 0.5535
Epoch 17/20
 - 17s - loss: 1.2487 - acc: 0.5770
Epoch 18/20
 - 17s - loss: 1.1828 - acc: 0.5890
Epoch 19/20
 - 17s - loss: 1.1635 - acc: 0.5985
Epoch 20/20
 - 17s - loss: 1.0877 - acc: 0.6255


Below is the list of testing labels and their descriptions



*   c0: safe driving
*   c1: texting - right
*   c2: talking on the phone - right
*   c3: texting - left
*   c4: talking on the phone - left
*   c5: operating the radio
*   c6: drinking
*   c7: reaching behind
*   c8: hair and makeup
*   c9: talking to passenger


In [11]:

#Code to run to save model 
model_json = model.to_json()
with open("/home/jupyter/Saved_Models/model_v3.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("/home/jupyter/Saved_Models/model_v3.h5")
print("Saved model to disk")

Saved model to disk


In [12]:
from keras.models import model_from_json
from keras import optimizers
# Code to load json and create model
json_file = open('/home/jupyter/Saved_Models/model_v3.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("/home/jupyter/Saved_Models/model_v3.h5")
print("Loaded model from disk")

loaded_model.compile(optimizer=optimizers.RMSprop(lr=1e-5),
              loss='categorical_crossentropy',
              metrics=['acc'])

Loaded model from disk


In [13]:
loaded_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 4, 4, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               2097408   
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                2570      
Total params: 16,814,666
Trainable params: 9,179,402
Non-trainable params: 7,635,264
_________________________________________________________________


In [14]:
import os
from keras.preprocessing.image import ImageDataGenerator
#test data
base_dir = '/home/jupyter/Data'
test_dir = os.path.join(base_dir, 'test')
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
        '/home/jupyter/Data/TEST',
        target_size=(150, 150),
        batch_size=20,
        classes=['test'],  # only data, no labels
        shuffle=False
       )


Found 79727 images belonging to 1 classes.


# Run the Predictions 

In [15]:
import time
start = time.time()
probabilities = loaded_model.predict_generator(test_generator, 79727,verbose=1)
end = time.time()
print(end - start)

10085.123625278473


# Export the Results

In [18]:
output_dir = '/home/jupyter/Submission/'
import pandas as pd
df_prob = pd.DataFrame(probabilities)
df_label = pd.DataFrame(test_generator.filenames[0:79727])
df = pd.concat([df_label,df_prob],axis=1)
df.columns = ['img','c0','c1','c2','c3','c4','c5','c6','c7','c8','c9']
df.head()
df.to_csv(output_dir + 'sample_submission2.csv',index=False)

In [20]:
len(probabilities)

1594293