In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint
from keras_preprocessing.image import ImageDataGenerator
import os
import pandas as pd
import numpy as np

In [2]:
dir = "../input/state-farm-distracted-driver-detection"
train_dir = os.path.join(dir , 'imgs/train/')
test_dir = os.path.join(dir , 'imgs/test/')
data = pd.read_csv(os.path.join(dir , 'driver_imgs_list.csv'))

In [3]:
data.head()

Unnamed: 0,subject,classname,img
0,p002,c0,img_44733.jpg
1,p002,c0,img_72999.jpg
2,p002,c0,img_25094.jpg
3,p002,c0,img_69092.jpg
4,p002,c0,img_92629.jpg


In [4]:
class_counts = data.classname.value_counts()
classes=len(class_counts)
classes

10

**Using Image Data Generator to generate data in format as required by our CNN model**

In [5]:
train_gen = ImageDataGenerator(rescale = 1.0/255, 
                                   shear_range = 0.2, 
                                   zoom_range = 0.2, 
                                   horizontal_flip = True, 
                                   validation_split = 0.2)

train_data=train_gen.flow_from_directory(train_dir,
                                              target_size=(240,240),
                                              class_mode="categorical",
                                              batch_size=32,
                                        subset="training")

val_data=train_gen.flow_from_directory(train_dir,
                                              target_size=(240,240),
                                              class_mode="categorical",
                                              batch_size=32,
                                             subset="validation")

test_gen=ImageDataGenerator(rescale=1.0/255)
test_dir = os.path.join(dir , 'imgs')
test_data = test_gen.flow_from_directory(test_dir ,
                                        target_size=(240,240),
                                        batch_size=32,
                                        classes = ['test'],
                                        shuffle = False)

Found 17943 images belonging to 10 classes.
Found 4481 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.


**Creating Model-CNN**

In [11]:
model=tf.keras.models.Sequential([
    Conv2D(128,(3,3),activation="relu",input_shape=(240,240,3)),
    MaxPooling2D(2,2),
    Dropout(0.3),
    Conv2D(64,(3,3),activation="relu"),
    MaxPooling2D(2,2),
    Dropout(0.3),
    Conv2D(32,(3,3),activation="relu"),
    MaxPooling2D(2,2),
    Dropout(0.3),
    Flatten(),
    Dense(1024,activation="relu"),
    Dense(256,activation="relu"),
    Dense(10,activation="sigmoid")
])

In [12]:
es=EarlyStopping(monitor="val_acc",patience=3)
ck = ModelCheckpoint('best_model.hdf5' , save_best_only=True,monitor='val_acc',mode='min')

In [13]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 238, 238, 128)     3584      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 119, 119, 128)     0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 119, 119, 128)     0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 117, 117, 64)      73792     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 58, 58, 64)        0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 58, 58, 64)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 56, 56, 32)       

In [14]:
model.compile(loss="categorical_crossentropy",metrics=["accuracy"],optimizer='Adam')
# rmsprop

In [15]:
model.fit(train_data ,
          steps_per_epoch = 17943/32,
          epochs = 10 ,
          validation_data = val_data,
          validation_steps = 4481/32,
          callbacks=[es,ck],
          verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f1e64402350>

**Predicting**

In [16]:
from tensorflow.keras.models import load_model
# model = load_model('best_model.hdf5')
preds = model.predict(test_data)
preds

array([[6.46880686e-01, 4.58720897e-05, 9.76889161e-04, ...,
        2.02701576e-02, 4.95244265e-01, 9.93355036e-01],
       [1.94926232e-01, 1.89310056e-03, 7.08464026e-01, ...,
        6.58268400e-05, 4.48939830e-01, 2.03704804e-01],
       [6.64337456e-01, 4.12233084e-01, 2.27838054e-01, ...,
        3.90346825e-01, 5.33773363e-01, 6.65712297e-01],
       ...,
       [6.95628822e-01, 1.04422055e-01, 2.56466836e-01, ...,
        4.64484215e-01, 4.33876574e-01, 4.77398127e-01],
       [6.59257546e-03, 1.29442720e-03, 9.99132693e-01, ...,
        6.27145842e-02, 9.96552348e-01, 1.56140134e-01],
       [2.33540982e-02, 2.55510258e-06, 2.69264448e-04, ...,
        6.65695146e-02, 3.65564436e-01, 9.93071973e-01]], dtype=float32)

In [17]:
len(preds)

79726

In [18]:
test_imgs = os.path.join(dir , 'imgs/test')
test_ids = sorted(os.listdir(test_imgs))
pred_df = pd.DataFrame(columns=['img','c0','c1','c2','c3','c4','c5','c6','c7','c8','c9'])
for i in range(len(preds)):
    pred_df.loc[i,'img'] = test_ids[i]
    pred_df.loc[i , 'c0':'c9'] = preds[i]

In [19]:
pred_df

Unnamed: 0,img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,img_1.jpg,0.646881,0.000046,0.000977,0.00022,0.004171,1.0,0.000315,0.02027,0.495244,0.993355
1,img_10.jpg,0.194926,0.001893,0.708464,0.000441,0.304661,1.0,0.051414,0.000066,0.44894,0.203705
2,img_100.jpg,0.664337,0.412233,0.227838,0.409344,0.646531,0.303283,0.599469,0.390347,0.533773,0.665712
3,img_1000.jpg,0.678483,0.05955,0.663945,0.278761,0.654814,0.561894,0.942971,0.00028,0.992025,0.487847
4,img_100000.jpg,0.995611,0.019094,0.000316,0.830371,0.993961,0.96931,0.001428,0.010055,0.370838,0.957253
...,...,...,...,...,...,...,...,...,...,...,...
79721,img_99994.jpg,0.728385,0.463186,0.802038,0.003107,0.040564,0.973405,0.258449,0.320177,0.672092,0.770201
79722,img_99995.jpg,0.91086,0.137871,0.076285,0.978727,0.936109,0.09272,0.154008,0.045749,0.616675,0.805338
79723,img_99996.jpg,0.695629,0.104422,0.256467,0.210666,0.946382,0.170593,0.856107,0.464484,0.433877,0.477398
79724,img_99998.jpg,0.006593,0.001294,0.999133,0.000022,0.025104,0.058454,0.99999,0.062715,0.996552,0.15614
