In [1]:
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from keras.optimizers import Adam
from keras.callbacks import TensorBoard
from sklearn.model_selection import train_test_split as tts
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np
import psutil
import glob
import os

Using TensorFlow backend.


In [2]:
def getMinData(dir):
    minValue = 9999999
    for file in os.listdir(dir):
        fileValue = np.load(dir + file).shape[0]
        if minValue > fileValue:
            minValue = fileValue
    return minValue

# defining global variabals

In [3]:
# All images are 28 * 28 * 1 = 784
# 1 because images are grey scalded images
IMAGE_ROWS = 28
IMAGE_COLS = 28
IMAGE_SIZE = IMAGE_ROWS * IMAGE_ROWS

# Path to data
IMAGE_DIR = "./data/"

# Total number of numpy file
NUM_IMAGES = len(glob.glob1(IMAGE_DIR, "*.npy"))

# How many element in each image
TOTAL_ELEMENTS_PER_IMAGE = 500

MAX_NUM_ELEMENTS = getMinData(IMAGE_DIR)

NUM_ITERATON = MAX_NUM_ELEMENTS // TOTAL_ELEMENTS_PER_IMAGE

BATCH_SIZE = 512
IMAGE_SHAPE = (IMAGE_ROWS, IMAGE_COLS, 1)

# setting for all labels a unique value
label_map = {}

for i, file in enumerate(os.listdir(IMAGE_DIR)):
    label_map[file[:-4]] = i

# Creating the Model
## Defining the model

In [4]:
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=3,
                 activation="relu", input_shape=IMAGE_SHAPE))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Conv2D(filters=64, kernel_size=3,
                 activation="relu"))
model.add(Dropout(0.25))
model.add(Conv2D(filters=128, kernel_size=3,
                 activation="relu"))
model.add(Dropout(0.3))
model.add(Conv2D(filters=256, kernel_size=3,
                 activation="relu"))
model.add(Dropout(0.35))
model.add(Conv2D(filters=512, kernel_size=3,
                 activation="relu"))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(NUM_IMAGES, activation="relu"))
model.add(Dense(NUM_IMAGES, activation="relu"))
model.add(Dense(NUM_IMAGES, activation="softmax"))

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
dropout_2 (Dropout)          (None, 11, 11, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 9, 9, 128)         73856     
_________________________________________________________________
dropout_3 (Dropout)          (None, 9, 9, 128)         0         
__________

## Compiling the model

In [5]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=Adam(lr=0.001),
              metrics=['accuracy'])

# Loading the data

In [None]:
for i in range(0, NUM_ITERATON):
    print("{} of {}".format(i, NUM_ITERATON))
    minBound = i * TOTAL_ELEMENTS_PER_IMAGE
    maxBound = minBound + TOTAL_ELEMENTS_PER_IMAGE

    x_train = np.ndarray(shape=(0, 784), dtype=np.float32)
    y_train = np.array([], dtype=np.uint16)

    for file in os.listdir(IMAGE_DIR):
        elements = np.load(IMAGE_DIR + file)[minBound:maxBound] / 255
        x_train = np.concatenate((x_train, elements))
        labels = np.full(elements.shape[0], label_map[file[:-4]])
        y_train = np.append(y_train, labels)

    # Spliting the loaded data into testing and validating
    x_train, x_validate, y_train, y_validate = tts(x_train, y_train, test_size=.2)

    # Reshape the data

    x_train = x_train.reshape(x_train.shape[0], *IMAGE_SHAPE)
    x_validate = x_validate.reshape(x_validate.shape[0], *IMAGE_SHAPE)

    # Fiting the model

    model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=128, verbose=1,
              validation_data=(x_validate, y_validate))

    # Saving the model after every fit
    model.save('model_first.h5')