In [None]:
import os
import imageio
import numpy as np
import keras
import pandas as pd
from sklearn.model_selection import  train_test_split
from skimage.feature import hog
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, InputLayer
from keras.layers import Conv2D, MaxPooling2D


In [None]:
##Loading the data into numpy arrays
data = []
labels = []

num_classes = 43

for i in range(num_classes):
    training_directory = './MyData/' + str(i) + '/'
    for filename in os.listdir(training_directory):
        if (filename.endswith('.png')):
            image = imageio.imread(training_directory + filename)

            #Populating arrays
            data.append(np.array(image))
            labels.append(i)
    print(str(i), end=' ')

## Force it to be np.array
data = np.array(data)
labels = np.array(labels)

In [None]:
#Splitting the data
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.3)

In [None]:
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [None]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

In [None]:
def one_hot_to_label(one_hot):
     return np.argmax(one_hot)

In [None]:
classes = pd.read_csv('./labels.csv')
classes = np.array(classes)

In [None]:
from matplotlib import pyplot as plt
%matplotlib inline

for i in range(4):
    label = one_hot_to_label(y_train[i])
    plt.figure()
    plt.title("Class id: "+ str(label)+ " and class name: " + classes[label][1])
    plt.imshow(x_train[i])

In [None]:
#Model 1

model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
from IPython.display import Image
from keras.utils.vis_utils import model_to_dot

Image(model_to_dot(model).create(prog='dot', format='png'))

In [None]:
batch_size = 128
epochs = 30

history = model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)

In [None]:
model.save('k_model.h5')

In [None]:
import numpy as np
from numpy import argmax

y_pred = model.predict(x_test)
test = y_pred.round()

results = []

for x in range(len(test)):
  results.append([np.argmax(test[x])])

In [None]:
#Small test to visualise the model's incorrect guesses
#This may indicate what it finds difficult & where it struggles
for i in range(1000):
    pred = np.argmax(test[i])
    ans = np.argmax(y_test[i])
    
    if (pred != ans):
        plt.figure()
        plt.title("prediction: " + classes[pred][1] + " -- actual: " + classes[ans][1])
        plt.imshow(x_test[i])

In [None]:
# These show the model doesnt improve much beyond ~10 epochs

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
# Trying out a similar model from paper [5]
# I don't think this is as good - just for comparison

model2 = Sequential()
model2.add(Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:]))
model2.add(Activation('relu'))
model2.add(MaxPooling2D(pool_size=(2, 2)))
model2.add(Dropout(0.2))

model2.add(Conv2D(32, (3, 3), padding='valid', input_shape=x_train.shape[1:]))
model2.add(MaxPooling2D(pool_size=(2, 2)))

model2.add(Flatten())
model2.add(Dense(128))
model2.add(Activation('relu'))
model2.add(Dropout(0.5))

model2.add(Dense(num_classes))
model2.add(Activation('softmax'))

model2.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model2.summary()

In [None]:
batch_size = 128
epochs = 20

history2 = model2.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)