In [None]:
import os
import random
import keras
import numpy as np
from numpy.random import seed
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
seed(101)
tf.random.set_seed(101)

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
import zipfile
import cv2

train_zip = zipfile.ZipFile("/content/drive/My Drive/Colab Notebooks/train.zip", 'r')
train_zip.extractall()

image_files = os.listdir("./train/")
image_files.sort()
image_files.sort(key=lambda x:int(x[:-4])) # Sort file paths by name, such that e.g. 0.jpg comes before 1.jpg

x_train = []

for i in range(len(image_files)):
  train_image = cv2.imread("./train/" + image_files[i])
  x_train.append(train_image)

In [None]:
f = open("/content/drive/My Drive/Colab Notebooks/train.txt", "r")
content = f.read()
lines = content.splitlines()
f.close()

y_train = []

for line in lines:
    label = line.split(' ')[1]
    y_train.append(int(label))

In [None]:
def preprocess(image):
  image = cv2.resize(image, (100,100)) # Resize to 100x100 pixels
  image = image / 255.0 # Normalise data
  return image

In [None]:
plt.figure()
plt.imshow(x_train[0])
plt.figure()
plt.imshow(preprocess(x_train[0]))

In [None]:
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

gen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.15, zoom_range=0.1, channel_shift_range=10., horizontal_flip=True)

In [None]:
def plotAugmented(original, augmented):

  plt.figure(figsize = (30,2))
  
  plt.subplot(1,10,1)
  plt.imshow(original)
  plt.xticks([])
  plt.yticks([])
  plt.xlabel("Original image")

  for i in range(len(augmented)):
    plt.subplot(1,10,i+2)
    plt.imshow(augmented[i])
    plt.xticks([])
    plt.yticks([])
    plt.xlabel("Augmented image " + str(i + 1))

  plt.show()

In [None]:
aug_iter = gen.flow(np.expand_dims(x_train[62],0))
aug_images = [next(aug_iter)[0].astype(np.uint8) for i in range(3)]
plotAugmented(x_train[62], aug_images)

In [None]:
augmented_x_train = []

for i in range(len(x_train)):
  x = 3 # Number of augmented images
  aug_iter = gen.flow(np.expand_dims(x_train[i],0))
  aug_images = [next(aug_iter)[0].astype(np.uint8) for i in range(x)]

  for k in range(x):
    augmented_x_train.append(preprocess(aug_images[k]))

  for j in range(x):
    y_train.append(y_train[i])

In [None]:
for i in range(len(x_train)):
  x_train[i] = preprocess(x_train[i])

x_train.extend(augmented_x_train)

In [None]:
# Free up RAM - the elements of this array have been added to x_train so it is no longer needed
del augmented_x_train

In [None]:
x_train = np.array(x_train)
y_train = np.array(y_train)
print(x_train.shape, y_train.shape)

In [None]:
from keras.utils import to_categorical
y_train = to_categorical(y_train, 23)
y_train = y_train.astype(np.int32)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout, BatchNormalization

def create_model():
    model = Sequential()

    model.add(Conv2D(32, kernel_size=3, padding="same", activation="relu", input_shape=(100,100,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Conv2D(64, kernel_size=3, padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Conv2D(128, kernel_size=3, padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())
    
    model.add(Conv2D(256, kernel_size=3, padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Conv2D(512, kernel_size=3, padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Flatten())
    model.add(Dropout(0.2))

    model.add(Dense(1024, activation="relu"))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Dense(512, activation="relu"))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Dense(256, activation="relu"))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Dense(128, activation="relu"))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Dense(64, activation="relu"))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Dense(32, activation="relu"))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Dense(23, activation="softmax"))
    
    return model

In [None]:
#!pip install keras_sequential_ascii

In [None]:
#from keras_sequential_ascii import keras2ascii
model = create_model()
#keras2ascii(model)

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(x_train, y_train, validation_split=0.2, epochs=100, batch_size=64)

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

In [None]:
# Delete training data to free up RAM
del x_train
del y_train

In [None]:
test_zip = zipfile.ZipFile("/content/drive/My Drive/Colab Notebooks/test.zip", 'r')
test_zip.extractall()

image_files = os.listdir("./test/")
image_files.sort()
image_files.sort(key=lambda x:int(x[:-4])) # Sort file paths by name, such that e.g. 0.jpg comes before 1.jpg

x_test = []

for i in range(len(image_files)):
  test_image = cv2.imread("./test/" + image_files[i])
  x_test.append(preprocess(test_image))

In [None]:
plt.imshow(x_test[7504])

In [None]:
x_test = np.array(x_test)
print(x_test.shape)

In [None]:
y_pred = np.argmax(model.predict(x_test), axis=1)

In [None]:
import pandas as pd

pred_results = pd.DataFrame(y_pred, columns=['label'])
pred_results.index.name = 'id'
pred_results.to_csv('submission.csv')