In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

import multiprocessing
from multiprocessing.pool import ThreadPool

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import confusion_matrix

import keras
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.preprocessing.image import ImageDataGenerator

import datetime

import cv2

import os


In [None]:
## Set local data path
dataPath = 'all'
localPath = ''
print(os.listdir(dataPath))

images = np.load(dataPath + '/train_images.npy', encoding="bytes")
labels = pd.read_csv(dataPath + '/train_labels.csv')
images.shape


In [None]:
## Contour filtering
def preProcessImage(image, cutoff=127, areaCutoff=14, maxContours=4, fliplr=False):
    image = np.uint8(image)
    im = np.uint8(image)
    red, thresh = cv2.threshold(im, cutoff, 255, 0)
    im2, contours, hierarchy= cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    mask = np.zeros(im.shape, np.uint8)
    largest_contours = sorted(contours, key=cv2.contourArea, reverse=True)
    
    for ind, contour in enumerate(largest_contours[:maxContours]):
        if cv2.contourArea(contour) > areaCutoff:
            x, y, w, h = cv2.boundingRect(contour)
            mask[y:y+h, x:x+w] = 255
        
    filteredImage = cv2.bitwise_and(image, image, mask=mask)
    #plt.imshow(filteredImage)
    #plt.figure()
    #plt.imshow(thresh)
    #plt.figure()
    #plt.imshow(mask)
    if fliplr:
        return np.fliplr(filteredImage).reshape((image.shape))
    return filteredImage.reshape((image.shape))
    

In [None]:
num = 100
image = images[num][1].reshape(100, 100)
betterImage = preProcessImage(np.fliplr(image), areaCutoff=30, maxContours=1)
plt.imshow(betterImage)
plt.figure()
plt.imshow(image)

In [None]:
allData = pd.DataFrame(np.array(list(images[:,1]))).assign(label=labels['Category'])

In [None]:
num_classes = len(labels['Category'].unique())
lb = LabelBinarizer()
lb.fit(labels['Category'].unique())
valueCounts = labels['Category'].value_counts()

In [None]:
epochs = 250
batch_size = 128

In [None]:
xTrain, xValid = train_test_split(allData, stratify=labels['Category'], test_size=0.15, random_state=12345)
trainInds = xTrain.index
validInds = xValid.index
xTrainRaw = xTrain.drop('label', axis=1).values.reshape((xTrain.shape[0], 100, 100, 1))
xValidRaw = xValid.drop('label', axis=1).values.reshape((xValid.shape[0], 100, 100, 1))

In [None]:
yTrainString = labels.iloc[trainInds, 1].values
yValidString = labels.iloc[validInds, 1].values

In [None]:
yTrain = lb.transform(yTrainString)
yValid = lb.transform(yValidString)

In [None]:
trainIndsPerClass = {}
validIndsPerClass = {}

for i, label in enumerate(lb.classes_):
    trainIndsPerClass.update({label:np.where(yTrain[:, i] == 1)[0]})
    validIndsPerClass.update({label:np.where(yValid[:, i] == 1)[0]})



In [None]:
pool = ThreadPool(multiprocessing.cpu_count())
inds = trainIndsPerClass['empty']
example = pool.map(preProcessImage, [xTrainRaw[i] for i in range(xTrainRaw.shape[0]) if i in inds])


In [None]:
pool = ThreadPool(multiprocessing.cpu_count())
xTrainUnflipped = pool.map(preProcessImage, [xTrainRaw[i] for i in range(xTrainRaw.shape[0])])
xTrainUnflipped = np.array(xTrainUnflipped)
xTrainFlipped = pool.map(lambda im: preProcessImage(im, fliplr=True), [xTrainRaw[i] for i in range(xTrainRaw.shape[0])])
xTrainFlipped = np.array(xTrainFlipped)

xValidUnflipped = pool.map(preProcessImage, [xValidRaw[i] for i in range(xValidRaw.shape[0])])
xValidUnflipped = np.array(xValidUnflipped)
xValidFlipped = pool.map(lambda im: preProcessImage(im, fliplr=True), [xValidRaw[i] for i in range(xValidRaw.shape[0])])
xValidFlipped = np.array(xValidFlipped)

xTrain = np.concatenate((xTrainUnflipped, xTrainFlipped), axis=0)
xValid = np.concatenate((xValidUnflipped, xValidFlipped), axis=0)

yTrain = np.concatenate((yTrain, yTrain), axis=0)
yValid = np.concatenate((yValid, yValid), axis=0)


pool.close()
pool.join()

In [None]:
num = 1011
plt.imshow(xTrain[num][:,:,0])
plt.figure()
plt.imshow(xTrainRaw[num][:,:,0])
yTrainString[num]

In [None]:
## Basic model
modelBasic = Sequential()
modelBasic.add(Flatten(input_shape=(100, 100, 1)))

modelBasic.add(Dense(64, activation='relu'))
modelBasic.add(Dropout(0.25))

modelBasic.add(Dense(10, activation='relu'))
modelBasic.add(Dropout(0.25))

modelBasic.add(Dense(num_classes, activation='softmax'))
modelBasic.compile(loss = keras.losses.categorical_crossentropy,
             optimizer = keras.optimizers.Adadelta(),
             metrics=['accuracy'])

In [None]:
## General model taken from online
model1 = Sequential()
model1.add(Conv2D(15, kernel_size=(3, 3), activation='relu', input_shape=(100, 100, 1)))
model1.add(Conv2D(64, (3, 3), activation='relu'))
model1.add(MaxPooling2D(pool_size=(5, 5)))
model1.add(Dropout(0.25))
model1.add(Dense(10, activation='relu'))
model1.add(Dropout(0.5))
model1.add(Flatten())
model1.add(Dense(num_classes, activation='softmax'))
model1.compile(loss = keras.losses.categorical_crossentropy,
             optimizer = keras.optimizers.Adadelta(),
             metrics=['accuracy'])

In [None]:
## Multiple conv layers, no dropout layers

model2 = Sequential()
model2.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(100, 100, 1)))
model2.add(Conv2D(64, (3, 3), activation='relu'))
model2.add(MaxPooling2D(pool_size=(10, 10)))

model2.add(Conv2D(64, (3, 3), activation='relu'))
model2.add(Conv2D(64, (3, 3), activation='relu'))
model2.add(MaxPooling2D(pool_size=(2, 2)))

model2.add(Flatten())
model2.add(Dense(num_classes, activation='softmax'))
model2.compile(loss = keras.losses.categorical_crossentropy,
             optimizer = keras.optimizers.Adadelta(),
             metrics=['accuracy'])


In [None]:
## Multiple conv layers, no dropout layers, batch normalization

model3 = Sequential()
model3.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(100, 100, 1)))
model3.add(BatchNormalization())
model3.add(Conv2D(64, (3, 3), activation='relu'))
model3.add(BatchNormalization())
model3.add(MaxPooling2D(pool_size=(10, 10)))

model3.add(Conv2D(64, (3, 3), activation='relu'))
model3.add(BatchNormalization())
model3.add(Conv2D(64, (3, 3), activation='relu'))
model3.add(BatchNormalization())
model3.add(MaxPooling2D(pool_size=(2, 2)))

model3.add(Flatten())
model3.add(Dense(num_classes, activation='softmax'))
model3.compile(loss = keras.losses.categorical_crossentropy,
             optimizer = keras.optimizers.Adadelta(),
             metrics=['accuracy'])


In [None]:
## Multiple conv layers, dropout layers, batch normalization

model4 = Sequential()
model4.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(100, 100, 1)))
model4.add(BatchNormalization())
model4.add(Dropout(0.25))

model4.add(Conv2D(64, (3, 3), activation='relu'))
model4.add(BatchNormalization())
model1.add(Dropout(0.25))


model4.add(Conv2D(64, (3, 3), activation='relu'))
model4.add(BatchNormalization())
model4.add(Dropout(0.25))

model4.add(MaxPooling2D(pool_size=(10, 10)))

model4.add(Conv2D(64, (3, 3), activation='relu'))
model4.add(BatchNormalization())
model4.add(Dropout(0.25))

model4.add(MaxPooling2D(pool_size=(2, 2)))

model4.add(Flatten())
model4.add(Dense(num_classes, activation='softmax'))
model4.compile(loss = keras.losses.categorical_crossentropy,
             optimizer = keras.optimizers.Adadelta(),
             metrics=['accuracy'])


In [None]:
## Multiple conv layers, dropout layers, batch normalization, average pooling

model5 = Sequential()
model5.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(100, 100, 1)))
model5.add(BatchNormalization())
model5.add(Dropout(0.25))

model5.add(Conv2D(64, (3, 3), activation='relu'))
model5.add(BatchNormalization())
model1.add(Dropout(0.25))


model5.add(Conv2D(64, (3, 3), activation='relu'))
model5.add(BatchNormalization())
model5.add(Dropout(0.25))

model5.add(AveragePooling2D(pool_size=(10, 10)))

model5.add(Conv2D(64, (3, 3), activation='relu'))
model5.add(BatchNormalization())
model5.add(Dropout(0.25))

model5.add(AveragePooling2D(pool_size=(2, 2)))

model5.add(Flatten())
model5.add(Dense(num_classes, activation='softmax'))
model5.compile(loss = keras.losses.categorical_crossentropy,
             optimizer = keras.optimizers.Adadelta(),
             metrics=['accuracy'])


In [None]:
## Online example of similar problem (modified slightly. Currently has exploding parameter issue)
adam = Adam(lr=1e-4, decay=1e-6)


model6 = Sequential()
model6.add(Conv2D(128, kernel_size=(15, 15), padding='same', activation='relu', input_shape=(100, 100, 1)))
model6.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model6.add(MaxPooling2D(pool_size=(10, 10), strides=(7, 7), padding='same'))
model6.add(Dropout(0.5))


model6.add(Conv2D(256, kernel_size=(5, 5), padding='same'))
model6.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model6.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same'))
model6.add(Dropout(0.5))


model6.add(Flatten())

model6.add(Dense(384, activation='relu'))
model6.add(Dropout(0.5))

model6.add(Dense(192, activation='relu'))
model6.add(Dropout(0.5))



model6.add(Dense(num_classes, activation='softmax'))
model6.compile(loss = keras.losses.categorical_crossentropy,
             optimizer = adam,
             metrics=['accuracy'])


In [None]:
## Model3 + leaky relu

model7 = Sequential()
model7.add(Conv2D(32, kernel_size=(3, 3), input_shape=(100, 100, 1)))
model7.add(LeakyReLU(alpha=0.1))
model7.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model7.add(Conv2D(64, (3, 3)))
model7.add(LeakyReLU(alpha=0.1))
model7.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model7.add(MaxPooling2D(pool_size=(8, 8), padding='same'))

model7.add(Conv2D(64, (3, 3)))
model7.add(LeakyReLU(alpha=0.1))
model7.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model7.add(Conv2D(64, (3, 3)))
model7.add(LeakyReLU(alpha=0.1))
model7.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model7.add(MaxPooling2D(pool_size=(2, 2), padding='same'))

model7.add(Flatten())
model7.add(Dense(num_classes, activation='softmax'))
model7.compile(loss = keras.losses.categorical_crossentropy,
             optimizer = keras.optimizers.Adadelta(),
             metrics=['accuracy'])


In [None]:
## Model3 + leaky relu -1 conv2d layers + dropout layers between layers + SGD

#opt = SGD(lr=0.0001)
 
model8 = Sequential()
model8.add(Conv2D(32, kernel_size=(5, 5), input_shape=(100, 100, 1)))
model8.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model8.add(LeakyReLU(alpha=0.1))

#model8.add(Dropout(0.5))


model8.add(Conv2D(64, (3, 3)))
model8.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model8.add(LeakyReLU(alpha=0.1))

model8.add(Dropout(0.3))
model8.add(MaxPooling2D(pool_size=(8, 8), padding='same'))

model8.add(Conv2D(64, (3, 3)))
model8.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model8.add(LeakyReLU(alpha=0.1))

model8.add(Dropout(0.3))
model8.add(MaxPooling2D(pool_size=(2, 2), padding='same'))

model8.add(Flatten())
#model8.add(Dropout(0.5))

model8.add(Dense(512))
model8.add(LeakyReLU(alpha=0.1))
model8.add(Dropout(0.5))

model8.add(Dense(num_classes, activation='softmax'))
model8.compile(loss = keras.losses.categorical_crossentropy,
               optimizer = 'adam',
               metrics=['accuracy'])

In [None]:
## Something from online https://www.iioab.org/articles/IIOABJ_7.S5_337-341.pdf 
## inspired by the ts-cnn (For best performance include the BN layers. Got to 
## about 0.6 accuracy on validation), and use 512 for the second to last dense layer
## and use MaxPooling not AveragePooling! Maybe try adding another dense layer at the end if you want. tk
## Also use 'adam' as optimizer not the object called adam

#opt = SGD(lr=0.0001)
 
adam = Adam(lr=1e-4, decay=1e-6)

model9 = Sequential()
model9.add(Conv2D(96, kernel_size=(15, 15), input_shape=(100, 100, 1)))
model9.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model9.add(MaxPooling2D(pool_size=(3, 3), padding='same'))
model9.add(LeakyReLU(alpha=0.1))

model9.add(Dropout(0.5))

model9.add(Conv2D(192, (15, 15)))
model9.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model9.add(MaxPooling2D(pool_size=(3, 3), padding='same'))
model9.add(LeakyReLU(alpha=0.1))

model9.add(Dropout(0.5))

model9.add(Conv2D(192, (3, 3)))
model9.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model9.add(LeakyReLU(alpha=0.1))

model9.add(Flatten())
model9.add(Dropout(0.5))

model9.add(Dense(512))
model9.add(LeakyReLU(alpha=0.1))
model9.add(Dropout(0.5))

model9.add(Dense(256))
model9.add(LeakyReLU(alpha=0.1))


model9.add(Dense(num_classes, activation='softmax'))
model9.compile(loss = keras.losses.categorical_crossentropy,
               optimizer = adam,
               metrics=['accuracy'])

In [None]:
## Interesting model
adam = Adam(lr=1e-4, decay=1e-6)

model10 = Sequential()
model10.add(Conv2D(96, kernel_size=(7, 7), input_shape=(100, 100, 1)))
model10.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model10.add(MaxPooling2D(pool_size=(3, 3), padding='same'))
model10.add(LeakyReLU(alpha=0.1))

model10.add(Dropout(0.5))

model10.add(Conv2D(192, (5, 5)))
model10.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model10.add(MaxPooling2D(pool_size=(3, 3), padding='same'))
model10.add(LeakyReLU(alpha=0.1))

model10.add(Dropout(0.5))

model10.add(Conv2D(192, (3, 3)))
model10.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model10.add(LeakyReLU(alpha=0.1))

model10.add(Dropout(0.5))

model10.add(Conv2D(192, (3, 3)))
model10.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
model10.add(LeakyReLU(alpha=0.1))

model10.add(Flatten())
model10.add(Dropout(0.5))

model10.add(Dense(512))
model10.add(LeakyReLU(alpha=0.1))
model10.add(Dropout(0.5))

model10.add(Dense(256))

model10.add(Dense(num_classes, activation='softmax'))
model10.compile(loss = keras.losses.categorical_crossentropy,
               optimizer = adam,
               metrics=['accuracy'])

In [None]:
## Best model as of yet (200 or so epochs, 300/400 give more overfit but best overall results still,
## max_contours=2, cutoff=127)
adam = Adam(lr=1e-3, decay=1e-6)

modelBest = Sequential()
modelBest.add(Conv2D(192, kernel_size=(15, 15), input_shape=(100, 100, 1), padding='same'))
modelBest.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
modelBest.add(MaxPooling2D(pool_size=(3, 3), padding='same'))
modelBest.add(LeakyReLU(alpha=0.1))

modelBest.add(Dropout(0.5))

modelBest.add(Conv2D(192, (15, 15), padding='same'))
modelBest.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
modelBest.add(MaxPooling2D(pool_size=(3, 3), padding='same'))
modelBest.add(LeakyReLU(alpha=0.1))

modelBest.add(Dropout(0.5))

modelBest.add(Conv2D(192, (3, 3), padding='same'))
modelBest.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
modelBest.add(LeakyReLU(alpha=0.1))

modelBest.add(Flatten())
modelBest.add(Dropout(0.5))

modelBest.add(Dense(512))
modelBest.add(LeakyReLU(alpha=0.1))
modelBest.add(Dropout(0.5))

modelBest.add(Dense(256))

modelBest.add(Dense(num_classes, activation='softmax'))
modelBest.compile(loss = keras.losses.categorical_crossentropy,
               optimizer = adam,
               metrics=['accuracy'])

In [None]:
load = False
modelName = 'modelBest'
model = modelBest

if load:
    model.load_weights('drive/My Drive/Colab Notebooks/' + modelName + 'weights.hdf5')

checkpoints = ModelCheckpoint(filepath='drive/My Drive/Colab Notebooks/' + modelName + 'weights.hdf5', verbose=1, save_best_only=True)
model.summary()

In [None]:
datagen = ImageDataGenerator(
    rotation_range=24,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    horizontal_flip=False)

# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(xTrain)

In [None]:
checkpoints = ModelCheckpoint(filepath='drive/My Drive/Colab Notebooks/' + modelName + 'weights.hdf5', verbose=1, save_best_only=False)


In [None]:

# fits the model on batches with real-time data augmentation:
model.fit_generator(datagen.flow(xTrain, yTrain, batch_size=128),
                    steps_per_epoch=len(xTrain) / 128, epochs=1000, 
                    verbose=1, validation_data=(xValid, yValid),
                    callbacks=[checkpoints])

In [None]:
## Without augmentation
history = model.fit(xTrain, 
                    yTrain, 
                    batch_size=batch_size,
                    epochs=epochs, verbose=1, 
                    validation_data=(xValid, yValid),
                    callbacks=[checkpoints])

In [None]:
score = model.evaluate(xValid, yValid, verbose=0)
print('Validation loss:', score[0])
print('Validation accuracy:', score[1])

In [None]:
plt.figure(figsize=[8,6])
plt.plot(history.history['loss'],'r',linewidth=3.0)
plt.plot(history.history['val_loss'],'b',linewidth=3.0)
plt.legend(['Training loss', 'Validation Loss'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Loss',fontsize=16)
plt.title('Loss Curves',fontsize=16)
 
# Accuracy Curves
plt.figure(figsize=[8,6])
plt.plot(history.history['acc'],'r',linewidth=3.0)
plt.plot(history.history['val_acc'],'b',linewidth=3.0)
plt.legend(['Training Accuracy', 'Validation Accuracy'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Accuracy',fontsize=16)
plt.title('Accuracy Curves',fontsize=16)


In [None]:
## Train classifiers for 
## Paintbrush vs pencil vs screwdriver,
## Pool vs mouth
## Mustache vs empty vs squiggle
## Skateboard vs rifle
## Also CHECK PENCILS AND OTHER SMALL CLASSES FOR THE CONTOUR AREA THING

In [None]:
## Paintbrush Pencil screwdriver discriminator
adam = Adam(lr=1e-4, decay=1e-6)

paintbrushPencilScrewdriver = Sequential()
paintbrushPencilScrewdriver.add(Conv2D(192, kernel_size=(7, 7), input_shape=(100, 100, 1)))
paintbrushPencilScrewdriver.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
paintbrushPencilScrewdriver.add(MaxPooling2D(pool_size=(3, 3), padding='same'))
paintbrushPencilScrewdriver.add(LeakyReLU(alpha=0.1))

paintbrushPencilScrewdriver.add(Dropout(0.5))

paintbrushPencilScrewdriver.add(Conv2D(192, (5, 5)))
paintbrushPencilScrewdriver.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
paintbrushPencilScrewdriver.add(MaxPooling2D(pool_size=(3, 3), padding='same'))
paintbrushPencilScrewdriver.add(LeakyReLU(alpha=0.1))

paintbrushPencilScrewdriver.add(Dropout(0.5))

paintbrushPencilScrewdriver.add(Conv2D(192, (3, 3)))
paintbrushPencilScrewdriver.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
paintbrushPencilScrewdriver.add(LeakyReLU(alpha=0.1))

paintbrushPencilScrewdriver.add(Flatten())
paintbrushPencilScrewdriver.add(Dropout(0.5))

paintbrushPencilScrewdriver.add(Dense(512))
paintbrushPencilScrewdriver.add(LeakyReLU(alpha=0.1))
paintbrushPencilScrewdriver.add(Dropout(0.5))

paintbrushPencilScrewdriver.add(Dense(256))

paintbrushPencilScrewdriver.add(Dense(num_classes, activation='softmax'))
paintbrushPencilScrewdriver.compile(loss = keras.losses.categorical_crossentropy,
               optimizer = adam,
               metrics=['accuracy'])


In [None]:
load = False
discName = 'paintbrushPencilScrewdriver'

if load:
    paintbrushPencilScrewdriver.load_weights('drive/My Drive/Colab Notebooks/' + discName + 'weights.hdf5')

checkpoints = ModelCheckpoint(filepath='drive/My Drive/Colab Notebooks/' + discName + 'weights.hdf5', verbose=1, save_best_only=True)
paintbrushPencilScrewdriver.summary()

In [None]:
paintbrushPencilScrewdriver.fit(xTrain[np.concatenate((trainIndsPerClass['pencil'], trainIndsPerClass['paintbrush'], trainIndsPerClass['screwdriver']))], 
                                yTrain[np.concatenate((trainIndsPerClass['pencil'], trainIndsPerClass['paintbrush'], trainIndsPerClass['screwdriver']))], batch_size=batch_size,
                                epochs=epochs, verbose=1, 
                                validation_data=(xValid[np.concatenate((validIndsPerClass['pencil'], validIndsPerClass['paintbrush'], validIndsPerClass['screwdriver']))], 
                                                 yValid[np.concatenate((validIndsPerClass['pencil'], validIndsPerClass['paintbrush'], validIndsPerClass['screwdriver']))]),
                                callbacks=[checkpoints])

In [None]:
## Paintbrush Pencil screwdriver discriminator
adam = Adam(lr=1e-4, decay=1e-6)

squiggleMoustacheEmpty = Sequential()
squiggleMoustacheEmpty.add(Conv2D(96, kernel_size=(7, 7), input_shape=(100, 100, 1)))
squiggleMoustacheEmpty.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
squiggleMoustacheEmpty.add(MaxPooling2D(pool_size=(3, 3), padding='same'))
squiggleMoustacheEmpty.add(LeakyReLU(alpha=0.1))

squiggleMoustacheEmpty.add(Dropout(0.5))

squiggleMoustacheEmpty.add(Conv2D(192, (5, 5)))
squiggleMoustacheEmpty.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
squiggleMoustacheEmpty.add(MaxPooling2D(pool_size=(3, 3), padding='same'))
squiggleMoustacheEmpty.add(LeakyReLU(alpha=0.1))

squiggleMoustacheEmpty.add(Dropout(0.5))

squiggleMoustacheEmpty.add(Conv2D(192, (3, 3)))
squiggleMoustacheEmpty.add(BatchNormalization(momentum=0.99, epsilon=0.00001))
squiggleMoustacheEmpty.add(LeakyReLU(alpha=0.1))

squiggleMoustacheEmpty.add(Flatten())
squiggleMoustacheEmpty.add(Dropout(0.5))

squiggleMoustacheEmpty.add(Dense(512))
squiggleMoustacheEmpty.add(LeakyReLU(alpha=0.1))
squiggleMoustacheEmpty.add(Dropout(0.5))

squiggleMoustacheEmpty.add(Dense(256))

squiggleMoustacheEmpty.add(Dense(num_classes, activation='softmax'))
squiggleMoustacheEmpty.compile(loss = keras.losses.categorical_crossentropy,
               optimizer = adam,
               metrics=['accuracy'])


In [None]:
load = True
discName = 'squiggleMoustacheEmpty'

if load:
    squiggleMoustacheEmpty.load_weights('drive/My Drive/Colab Notebooks/' + discName + 'weights.hdf5')

checkpoints = ModelCheckpoint(filepath='drive/My Drive/Colab Notebooks/' + discName + 'weights.hdf5', verbose=1, save_best_only=True)
squiggleMoustacheEmpty.summary()

In [None]:
squiggleMoustacheEmpty.fit(xTrain[np.concatenate((trainIndsPerClass['squiggle'], trainIndsPerClass['moustache'], trainIndsPerClass['empty']))], 
                           yTrain[np.concatenate((trainIndsPerClass['squiggle'], trainIndsPerClass['moustache'], trainIndsPerClass['empty']))], batch_size=batch_size,
                           epochs=1, verbose=1, 
                           validation_data=(xValid[np.concatenate((validIndsPerClass['squiggle'], validIndsPerClass['moustache'], validIndsPerClass['empty']))],
                                            yValid[np.concatenate((validIndsPerClass['squiggle'], validIndsPerClass['moustache'], validIndsPerClass['empty']))]),
                                callbacks=[checkpoints])

In [None]:

testImages = np.load(dataPath + '/test_images.npy', encoding="bytes")
testImages.shape


In [None]:
num = 5160
image = testImages[num][1].reshape(100, 100)
betterImage = preProcessImage(np.fliplr(image), maxContours=1)
plt.imshow(betterImage)
plt.figure()
plt.imshow(image)

In [None]:
testData = pd.DataFrame(np.array(list(testImages[:,1]))).assign(label='')

In [None]:
xTestRaw = testData.drop('label', axis=1).values.reshape((testData.shape[0], 100, 100, 1))


In [None]:
pool = ThreadPool(multiprocessing.cpu_count())
xTest = pool.map(preProcessImage, [xTestRaw[i] for i in range(xTestRaw.shape[0])])
xTest = np.array(xTest)

pool.close()
pool.join()

In [None]:
yTestProbs = model.predict(xTest)
yPreds = yTestProbs.argmax(axis=-1)
print(yPreds)

In [None]:
sub = pd.DataFrame(list(zip(list(range(10000)), [lb.classes_[x] for x in yPreds])), columns=['Id', 'Category'])
now=datetime.datetime.now()

sub.to_csv(localPath + '/submission' + str(now.day) + str(now.hour) + '.csv', index=False, header=True)

In [None]:
## Confusion matrix

yValidProbs = model.predict(xValid)
yValidPreds = yValidProbs.argmax(axis=-1)
print(yValidPreds)
print(yValid.argmax(axis=-1))
confMat = pd.DataFrame((confusion_matrix([lb.classes_[x] for x in yValid.argmax(axis=-1)], [lb.classes_[x] for x in yValidPreds])), index=['true' + cl for cl in lb.classes_], columns=['pred' + cl for cl in lb.classes_])
confMat.to_csv(localPath + '/confusion.csv')
print(confMat)

In [None]:
sub = pd.read_csv(localPath + '/submission2715.csv')
sub

In [None]:
def predSquig(i, cat):
    if cat in ['squiggle', 'moustache', 'empty']:
        testProb = squiggleMoustacheEmpty.predict(xTest[i].reshape((1, 100, 100, 1)))
        testPreds = testProb.argmax(axis=-1)
        newClass = lb.classes_[testPreds]
        return newClass[0]
    else:
        return cat

sub['Category'] = pd.Series([predSquig(i, cat) for i, cat in enumerate(sub['Category'])])

In [None]:
now=datetime.datetime.now()

sub.to_csv(localPath + '/submission' + str(now.day) + str(now.hour) + '.csv', index=False, header=True)