# Mount google drive for data

In [0]:
from google.colab import drive
drive.mount('/content/drive')

# Imports

In [0]:
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2 as cv2
from sklearn.model_selection import train_test_split
from PIL import Image
from sklearn.utils import shuffle
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import MobileNetV2
from keras.layers import Dense, GlobalAveragePooling2D, Flatten
from keras.models import Model
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy
import math
import h5py

#see what GPU is in use
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

# Load paths to dataframes

In [0]:
root = '/content/drive/My Drive/Colab Notebooks/Data/vehicle/train/train'
data = []
for category in sorted(os.listdir(root)):
    for file in sorted(os.listdir(os.path.join(root, category))):
        data.append((category, os.path.join(root, category,  file)))
df_train = pd.DataFrame(data, columns=['class', 'file_path'])


root2 = '/content/drive/My Drive/Colab Notebooks/Data/vehicle/test/testset'
test_data = []
for file in sorted(os.listdir(root2)):
      test_data.append(os.path.join(root2,  file))
df_test = pd.DataFrame(test_data, columns=['file_path'])

# Split train data to train and validation (0.2 test size), then convert back to dataframe for generators

In [0]:
X_data = df_train['file_path'].to_numpy()
y_data = df_train['class'].to_numpy()

#X_data, y_data = shuffle(X_data, y_data)

X_train, X_validation, y_train, y_validation = train_test_split(X_data,
                                                                y_data,
                                                                test_size=0.2,
                                                                random_state=42)

train_data = np.array([X_train, y_train])
validation_data = np.array([X_validation, y_validation])

dataframe_train = pd.DataFrame({'file_path': train_data[0,:], 'class': train_data[1,:]})
dataframe_validation = pd.DataFrame({'file_path': validation_data[0,:], 'class': validation_data[1,:]})
dataframe_test = df_test

# Compute class weights because of unbalanced dataset

In [0]:
'''
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(y_train),
                                                 y_train)
class_weights = dict(enumerate(class_weights))
class_weights
'''

# Reducing size of dataset



In [0]:
'''
dataframe_train = dataframe_train[:1500]
dataframe_validation = dataframe_validation[:1500]
dataframe_test = dataframe_test[:1500]
'''

# Image data generators

In [0]:
batch_size = 8
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
        dataframe_train,
        x_col = 'file_path',
        y_col = 'class',
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical')

validation_generator = test_datagen.flow_from_dataframe(
        dataframe_validation,
        x_col = 'file_path',
        y_col = 'class',
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical')

test_generator = test_datagen.flow_from_dataframe(
        dataframe_test,
        x_col = 'file_path',
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode=None)

# Creating model

In [0]:
base_model = MobileNetV2(include_top = False, input_shape=(224, 224, 3), weights='imagenet')
out = base_model.output
out = GlobalAveragePooling2D()(out)

out = Dense(512, activation="relu")(out)
predictions = Dense(17, activation="softmax")(out)

model = Model(inputs = base_model.input, outputs = predictions)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(0.0005), loss='categorical_crossentropy', metrics=['accuracy'])


# Training

In [0]:
model.fit_generator(train_generator, 
                steps_per_epoch = math.ceil(len(X_train) / batch_size), 
                epochs=10,
                validation_data=validation_generator,
                validation_steps=math.ceil(len(X_validation) / batch_size))

# Predicting

In [0]:
predicted = model.predict_generator(test_generator, verbose=1)

# Saving prediction in csv format

In [0]:
predictedList = list(map(lambda x : np.argmax(x),predicted))
classes = list(dict.fromkeys(list(np.array(df_train["class"]))))
stringPredictedList = list(map(lambda x : classes[x-1],predictedList))
predClasses = pd.DataFrame({'Category': stringPredictedList})
ids = pd.DataFrame({'Id':np.arange(len(df_test))})
output = pd.concat([ids, predClasses], axis=1)

output.to_csv('/content/drive/My Drive/Colab Notebooks/mobile_net_v2_prediction.csv', index = None, header=True)

# Saving model for future use

In [0]:
model.save('/content/drive/My Drive/Colab Notebooks/mobilenet_v2.h5')

# Accuracy


<table>
<tr>
<th>Number of epochs</th>
<th>Batch size</th>
<th>Optimizer</th>
<th>Use of class weights</th>
<th>Validation accuracy</th>
<th>Dataset size</th>
</tr>
<tr>
<td>3</td>
<td>32</td>
<td>Adam(0.001)</td>
<td>Yes</td>
<td>65%</td>
<td>15%</td>
</tr>
<tr>
<td>3</td>
<td>64</td>
<td>Adam(0.001)</td>
<td>Yes</td>
<td>69%</td>
<td>15%</td>

</tr>
<tr>
<td>4</td>
<td>32</td>
<td>Adam(0.001)</td>
<td>No</td>
<td>75%</td>
<td>15%</td>

</tr>
<tr>
<td>4</td>
<td>64</td>
<td>Adam(0.001)</td>
<td>No</td>
<td>76%</td>
<td>15%</td>

</tr>
<tr>
<td>4</td>
<td>8</td>
<td>Adam(0.001)</td>
<td>No</td>
<td>77%</td>
<td>15%</td>

</tr>
<tr>
<td>4</td>
<td>32</td>
<td>Adam(0.0001)</td>
<td>No</td>
<td>74%</td>
<td>15%</td>

</tr>
<tr>
<td>10</td>
<td>8</td>
<td>Adam(0.0005)</td>
<td>No</td>
<td>81%</td>
<td>100%</td>

</tr>


</table>

