In [17]:
import os
from glob import glob #used to access a file specified by a path
import random
import tensorflow
os.environ['KERAS_BACKEND'] = 'tensorflow'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # 3 = INFO, WARNING, and ERROR messages are not printed

from tqdm import tqdm  #Used to print progress bars

import numpy as np
import pandas as pd
from IPython.display import FileLink
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
from IPython.display import display, Image
import matplotlib.image as mpimg
import cv2 #Computer Vision Library

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_files       
from keras.utils import np_utils
from sklearn.utils import shuffle
from sklearn.metrics import log_loss


In [18]:
import csv
data={}  #This dictionary will contain the classes as keys and a list of images belonging to that class as values
with open('../input/state-farm-distracted-driver-detection/driver_imgs_list.csv') as f:
    reader = csv.reader(f)
    next(reader) #to avoid taking column names
    for row in reader:
        #print(row[1])
        key = row[1].lower()
        if(key in data):
            #print(data[key])
            data[key].append(row[2])
        else:
            data[key] = [row[2]]
        

In [19]:
class_list = list(data.keys())
class_list

['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']

In [20]:
import os
os.mkdir("Master_Data")
os.mkdir("Master_Data/Training")
os.mkdir("Master_Data/Testing")

In [21]:
for x in class_list:
    os.mkdir(os.path.join("Master_Data/Training",x))
    os.mkdir(os.path.join("Master_Data/Testing",x))

In [22]:
import shutil as sh
split_size = 0.8

In [23]:
#This Code Snippet would copy 80% of the images in original input folder to the Training Folder and 20% to the Testing folder
for clas,images in data.items():
  train_size = int(len(images)*split_size)
  train_images=images[:train_size]
  test_images=images[train_size:]
  for image in train_images:
    source = os.path.join("../input/state-farm-distracted-driver-detection/imgs/train",clas,image)
    dest = os.path.join("./Master_Data/Training",clas)
    sh.copy(source,dest)
  for image in test_images:
    source = os.path.join("../input/state-farm-distracted-driver-detection/imgs/train",clas,image)
    dest = os.path.join("./Master_Data/Testing",clas)
    sh.copy(source,dest)   

Now we have Two directories, train and test under Master Data, each of which has 10 subdirectories which contain images belonging to that category

In [24]:
# Load the dataset previously downloaded from Kaggle
NUMBER_CLASSES = 10
# Color type: 1 - grey, 3 - rgb

def get_cv2_image(path, img_rows, img_cols, color_type=3):
    # Loading as Grayscale image
    if color_type == 1:
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)  #Converts a jpeg image to its pixel matrix
    elif color_type == 3:
        img = cv2.imread(path, cv2.IMREAD_COLOR)
    # Reduce size
    img = cv2.resize(img, (img_rows, img_cols)) 
    return img

# Training
def load_train(img_rows, img_cols, color_type=3):
    train_images = [] 
    train_labels = []
    # Loop over the training folder 
    for classed in tqdm(range(NUMBER_CLASSES)):   #prints the progress bar as well
        print('Loading directory c{}'.format(classed))
        files = glob(os.path.join('..', 'input', 'state-farm-distracted-driver-detection','imgs','train', 'c' + str(classed), '*.jpg')) # This will fetch all files which end with .jpg
        for file in files:
            img = get_cv2_image(file, img_rows, img_cols, color_type)
            train_images.append(img)
            train_labels.append(classed)
    return train_images, train_labels 

def read_and_normalize_train_data(img_rows, img_cols, color_type):
    X, labels = load_train(img_rows, img_cols, color_type)
    y = np_utils.to_categorical(labels, 10)  #Used to one hot encode the labels
    x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    
    x_train = np.array(x_train, dtype=np.uint8).reshape(-1,img_rows,img_cols,color_type) #reshaping (rows,cols) to (rows,cols,1) to match the CNN input
    x_val = np.array(x_val, dtype=np.uint8).reshape(-1,img_rows,img_cols,color_type)
    
    return x_train, x_val, y_train, y_val



In [25]:
img_rows = 64
img_cols = 64
color_type = 1

In [26]:
x_train, x_val, y_train, y_val = read_and_normalize_train_data(img_rows, img_cols, color_type)
print('Train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')


  0%|          | 0/10 [00:00<?, ?it/s]

Loading directory c0


 10%|█         | 1/10 [00:07<01:06,  7.37s/it]

Loading directory c1


 20%|██        | 2/10 [00:13<00:53,  6.68s/it]

Loading directory c2


 30%|███       | 3/10 [00:19<00:44,  6.40s/it]

Loading directory c3


 40%|████      | 4/10 [00:26<00:39,  6.50s/it]

Loading directory c4


 50%|█████     | 5/10 [00:32<00:32,  6.46s/it]

Loading directory c5


 60%|██████    | 6/10 [00:39<00:26,  6.65s/it]

Loading directory c6


 70%|███████   | 7/10 [00:46<00:19,  6.66s/it]

Loading directory c7


 80%|████████  | 8/10 [00:51<00:12,  6.24s/it]

Loading directory c8


 90%|█████████ | 9/10 [00:57<00:05,  5.97s/it]

Loading directory c9


100%|██████████| 10/10 [01:02<00:00,  6.28s/it]

Train shape: (17939, 64, 64, 1)
17939 train samples





In [27]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout,BatchNormalization,MaxPooling2D
from keras.regularizers import *

In [28]:
model = Sequential()

## CNN 1
model.add(Conv2D(32,(3,3),activation='relu',input_shape=(img_rows, img_cols, color_type)))
model.add(BatchNormalization())
model.add(Conv2D(32,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization(axis = 3))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.3))

## CNN 2
model.add(Conv2D(64,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(64,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization(axis = 3))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.3))

## CNN 3
model.add(Conv2D(128,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(128,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization(axis = 3))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.5))

## Output
model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(10,activation='softmax')) # We use softmax in the last layer because it convertsoutput of last layer into probability distribution

model.summary()
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 62, 62, 32)        320       
_________________________________________________________________
batch_normalization_7 (Batch (None, 62, 62, 32)        128       
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 62, 62, 32)        9248      
_________________________________________________________________
batch_normalization_8 (Batch (None, 62, 62, 32)        128       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 31, 31, 32)        0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 31, 31, 64)       

In [29]:
batch_size = 40
nb_epoch = 10
model.fit(x_train, y_train, 
          validation_data=(x_val, y_val),
          epochs=nb_epoch, batch_size=batch_size, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f8d484af110>

In [30]:
#Accessing the test images

test_images = [] 
test_labels = []
# Loop over the training folder 
for classed in tqdm(range(NUMBER_CLASSES)): 
    files = glob(os.path.join('.', 'Master_Data','Testing', 'c' + str(classed), '*.jpg'))
    for file in files:
        img = get_cv2_image(file, img_rows, img_cols, color_type)
        test_images.append(img)
        test_labels.append(classed)

x_test_final = test_images
x_test_final = np.array(x_test_final, dtype=np.uint8).reshape(-1,img_rows,img_cols,color_type)
y_test = np_utils.to_categorical(test_labels, 10)

100%|██████████| 10/10 [00:07<00:00,  1.27it/s]


In [31]:
model.evaluate(x_test_final,y_test)



[0.01966572366654873, 0.9937639236450195]

99% Accuracy

In [32]:
a = [1,2,3,4,5,6,7,8,9,0]
s = 7
t = 8

e = a[s:t]
e

[8]