<a href="https://colab.research.google.com/github/bennykan/ML1020/blob/master/Ml1020_Pretrained_Model_V2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ===========================================================
# ML1020 - Final Project: Mid-Term Proposal
# Team Blue

## Tyler Blakeley
## Benjamin Kan
## Avi Singh
## Justin Kim


# Distracted Driver Detection
# ===========================================================

The project is inspired by a past Kaggle competition hosted by State Farm, an insurance company based in the US.  Competition participants were invited to implement a machine learning algorithm to classify and predict the driver’s behavior based on the images captured from the dashboard cameras installed in the vehicles (https://www.kaggle.com/c/state-farm-distracted-driver-detection/data). The host defined the following 10 driving behavior classifications

| Label | Driver Behavior Descriptions |
| ----- | ---------------------------- |
| c0	| normal driving
| c1	| texting - right
| c2	| talking on the phone - right
| c3	| texting - left
| c4	| talking on the phone - left
| c5	| operating the radio
| c6	| drinking
| c7	| reaching behind
| c8	| hair and makeup
| c9	| talking to passenger


# Import Keras Libraries

In [1]:
import sys
import os
sys.version
import keras

Using TensorFlow backend.


In [2]:
!pwd

/home/jupyter/ML1020


In [3]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
base_dir = '/home/jupyter/Data'
img = cv2.imread(base_dir + '/train/c6/img_380.jpg')
img_cvt=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img_cvt)
plt.show()

<Figure size 640x480 with 1 Axes>

# Loading the VGG16 Pre-Trained Model

In [4]:
#Loading in Pretrained Model

from keras.applications import VGG16

conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(150, 150, 3))

In [5]:
conv_base.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0         
__________

# Adding One Hidden Layer on Top of VGG16

In [6]:
#Model with Conv Base Included
from keras import models
from keras import layers
from keras import optimizers
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu', input_dim=4 * 4 * 512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10, activation = 'softmax'))

model.summary()



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 4, 4, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               2097408   
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                2570      
Total params: 16,814,666
Trainable params: 16,814,666
Non-trainable params: 0
_________________________________________________________________


In [7]:
#Set conv_base training weights to false so we dont re train the weights already learned
print(len(model.trainable_weights))
conv_base.trainable = False
print(len(model.trainable_weights))

30
4


# Set the Last Convolution Layer in VGG16 to be Re-Trainable

In [None]:
conv_base.trainable = True

set_trainable = False
for layer in conv_base.layers:
    if layer.name == 'block5_conv1':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False
print(len(model.trainable_weights))

# Train the Model

## Splitting Data into Train and Validate Sets (80% Train % 20% Validation)

In [8]:
from keras.preprocessing.image import ImageDataGenerator



train_dir = os.path.join(base_dir, 'train')

train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest',
    validation_split=0.2
    )



train_generator = train_datagen.flow_from_directory(
        # This is the target directory
        train_dir,
        # All images will be resized to 150x150
        target_size=(150, 150),
        batch_size=20,
        class_mode='categorical',
        subset="training",
        shuffle = True
        )

validation_generator = train_datagen.flow_from_directory(
        # This is the target directory
        train_dir,
        # All images will be resized to 150x150
        target_size=(150, 150),
        batch_size=20,
        class_mode='categorical',
        subset="validation",
        shuffle = True
        )






Found 17943 images belonging to 10 classes.
Found 4481 images belonging to 10 classes.


In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-5),
              metrics=['acc'])

history = model.fit_generator(
      train_generator,
      steps_per_epoch=100,
      epochs=5,
      validation_data=validation_generator,
      validation_steps=50,
      #use_multiprocessing = True,
      #workers = 4,
      verbose=2)

Below is the list of testing labels and their descriptions



*   c0: safe driving
*   c1: texting - right
*   c2: talking on the phone - right
*   c3: texting - left
*   c4: talking on the phone - left
*   c5: operating the radio
*   c6: drinking
*   c7: reaching behind
*   c8: hair and makeup
*   c9: talking to passenger


In [None]:

#Code to run to save model 
model_json = model.to_json()
with open("/home/jupyter/Saved_Models/model_50Epoch_1Train_1Hidden.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("/home/jupyter/Saved_Models/model_50Epoch_1Train_1Hidden.h5")
print("Saved model to disk")

# Plot Training and Validation Accuracy of 50 Epoch model 

In [None]:
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

# Model V2

## Add Second Hidden Layer and Re Train the last 2 layers of VGG16

In [9]:
#Model with Conv Base Included
from keras import models
from keras import layers
from keras import optimizers
model_v2 = models.Sequential()
model_v2.add(conv_base)
model_v2.add(layers.Flatten())
model_v2.add(layers.Dense(256, activation='relu', input_dim=4 * 4 * 512))
model_v2.add(layers.Dropout(0.5))
model_v2.add(layers.Dense(128, activation='relu'))
model_v2.add(layers.Dropout(0.5))
model_v2.add(layers.Dense(10, activation = 'softmax'))


model_v2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 4, 4, 512)         14714688  
_________________________________________________________________
flatten_2 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 256)               2097408   
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 128)               32896     
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 10)                1290      
Total para

In [10]:
conv_base.trainable = True

set_trainable = False
for layer in conv_base.layers:
    if layer.name == 'block5_conv1' or layer.name == 'block4_conv1':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False
print(len(model_v2.trainable_weights))

18


In [11]:
model_v2.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-5),
              metrics=['acc'])

history_v2 = model_v2.fit_generator(
      train_generator,
      steps_per_epoch=100,
      epochs=100,
      validation_data=validation_generator,
      validation_steps=50,
      #use_multiprocessing = True,
      #workers = 4,
      verbose=2)


Epoch 1/100
 - 46s - loss: 2.5232 - acc: 0.0865 - val_loss: 2.2986 - val_acc: 0.1140
Epoch 2/100
 - 40s - loss: 2.3769 - acc: 0.0970 - val_loss: 2.2925 - val_acc: 0.1320
Epoch 3/100
 - 40s - loss: 2.3340 - acc: 0.1060 - val_loss: 2.2839 - val_acc: 0.1240
Epoch 4/100
 - 40s - loss: 2.3274 - acc: 0.1140 - val_loss: 2.2828 - val_acc: 0.1290
Epoch 5/100
 - 40s - loss: 2.3131 - acc: 0.1115 - val_loss: 2.2777 - val_acc: 0.1702
Epoch 6/100
 - 40s - loss: 2.3026 - acc: 0.1300 - val_loss: 2.2707 - val_acc: 0.1580
Epoch 7/100
 - 39s - loss: 2.2904 - acc: 0.1260 - val_loss: 2.2511 - val_acc: 0.1930
Epoch 8/100
 - 40s - loss: 2.2794 - acc: 0.1375 - val_loss: 2.2248 - val_acc: 0.2080
Epoch 9/100
 - 40s - loss: 2.2436 - acc: 0.1685 - val_loss: 2.1796 - val_acc: 0.2436
Epoch 10/100
 - 26s - loss: 2.2179 - acc: 0.1585 - val_loss: 2.1238 - val_acc: 0.2840
Epoch 11/100
 - 26s - loss: 2.1546 - acc: 0.1970 - val_loss: 2.0756 - val_acc: 0.2930
Epoch 12/100
 - 26s - loss: 2.1388 - acc: 0.2090 - val_loss: 2.

In [16]:
from keras.models import model_from_json
from keras import optimizers
# Code to load json and create model
json_file = open('/home/jupyter/Saved_Models/model_100epoch_2CNN_2hidden.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("/home/jupyter/Saved_Models/model_100epoch_2CNN_2hidden.h5")
print("Loaded model from disk")

loaded_model.compile(optimizer=optimizers.RMSprop(lr=1e-5),
              loss='categorical_crossentropy',
              metrics=['acc'])

Loaded model from disk


In [None]:
!ls /home/jupyter/Saved_Models


In [15]:
model_json = model_v2.to_json()
with open("/home/jupyter/Saved_Models/model_100epoch_2CNN_2hidden.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model_v2.save_weights("/home/jupyter/Saved_Models/model_100epoch_2CNN_2hidden.h5")
print("Saved model to disk")

Saved model to disk


In [None]:
loaded_model.summary()

In [17]:
import os
from keras.preprocessing.image import ImageDataGenerator
#test data
base_dir = '/home/jupyter/Data'
test_dir = os.path.join(base_dir, 'test')
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
        '/home/jupyter/Data/TEST',
        target_size=(150, 150),
        batch_size=20,
        classes=['test'],  # only data, no labels
        shuffle=False
       )


Found 79727 images belonging to 1 classes.


# Run the Predictions 

In [18]:
import time
start = time.time()
probabilities = loaded_model.predict_generator(test_generator, len(test_generator),verbose=1)
end = time.time()
print(end - start)

489.02042961120605


# Export the Results

In [19]:
output_dir = '/home/jupyter/Submission/'
import pandas as pd
df_prob = pd.DataFrame(probabilities)
df_label = pd.DataFrame(test_generator.filenames[0:79727])
df = pd.concat([df_label,df_prob],axis=1)
df.columns = ['img','c0','c1','c2','c3','c4','c5','c6','c7','c8','c9']
df.head()
df.to_csv(output_dir + 'sample_submission6.csv',index=False)

In [20]:
len(probabilities)

79727

In [21]:
df.shape

(79727, 11)