In [1]:
from keras.models import Sequential, load_model
from keras.layers import (Conv2D, MaxPooling2D, Dense, Flatten,
                          GlobalAveragePooling2D, BatchNormalization,
                          Dropout, GaussianNoise, AveragePooling2D)
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2
import numpy as np
import cv2
import matplotlib.pyplot as plt
import json
import os
import pickle

Using TensorFlow backend.


In [2]:
def to_onehot(idx, size):
    res = [0] * size
    res[idx] = 1
    return res


labels_fname = './labels.json'
with open(labels_fname) as f:
    fname_to_label = {
        row['fname']: to_onehot(row['label'] or 0, size=10)
        for row in list(map(json.loads, f))
    }

In [3]:
digits_dir = './digits/'

images = []
labels = []

fnames = os.listdir(digits_dir)
np.random.shuffle(fnames)
for fname in fnames:
    images.append(cv2.imread(digits_dir + fname, cv2.IMREAD_GRAYSCALE))
    labels.append(fname_to_label[fname])

print('Loaded %d digits' % len(images))

Loaded 567 digits


In [4]:
max(x.shape[0] for x in images), max(x.shape[1] for x in images)

(28, 28)

In [5]:
def preprocess_image(img):
    width, height = img.shape
    margin_x = int((28 - width) / 2)
    margin_y = int((28 - height) / 2)
    
    res = np.zeros((28, 28, 1))
    res[margin_x:margin_x+width, margin_y:margin_y+height, 0] = img
    
    res = (res - res.mean()) / res.std()
    
    return res


X_all = np.array(list(map(preprocess_image, images)))
y_all = np.array(labels)

print('Data shape: %s - Labels shape: %s' % (X_all.shape, y_all.shape))

Data shape: (567, 28, 28, 1) - Labels shape: (567, 10)


check class (im)balance

In [6]:
y_all.sum(axis=0)

array([122,  53,  45,  47,  51,  48,  45,  55,  55,  46])

only keep a random subset of no-digit images

In [7]:
mask = (y_all[:, 0] == 0) | (np.random.random(len(y_all)) < 0.4)
X_all, y_all = X_all[mask], y_all[mask]
y_all.sum(axis=0)

array([52, 53, 45, 47, 51, 48, 45, 55, 55, 46])

The next two cells are a temporary fix since apparently I have two versions of OpenMP installed, and the kernel dies when calling model.fit after using the ImageDataGenerator. Therefore, generate the augmented dataset and save it to a file, then restart the kernel. The next time the cell is executed, the dataset will be loaded from the file without creating the ImageDataGenerator.

In [8]:
augmented_fname = 'augmented.mat'
if os.path.exists(augmented_fname):
    with open(augmented_fname, 'rb') as f:
        X_train, y_train = pickle.load(f)
    print('loaded from', augmented_fname)
else:
    idg = ImageDataGenerator(
        samplewise_center=True,
        samplewise_std_normalization=True,
        rotation_range=10,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.1,
        zoom_range=0.1,
    )

    idg.fit(X_all)

    example = next(idg.flow(X_all, batch_size=24))
    plt.figure(figsize=(10, 5))
    for i, img in enumerate(example):
        # every sample is normalized to have zero mean
        img[img > 0] = 1
        img[img < 0] = 0

        plt.subplot(4, 6, i + 1)
        plt.imshow(img[:, :, 0])
        plt.xticks([])
        plt.yticks([])

    plt.tight_layout()
    plt.show()
    
    augmented_dataset_x = []
    augmented_dataset_y = []
    for batch_x, batch_y in idg.flow(X_all, y_all, batch_size=24):
        augmented_dataset_x.extend(batch_x)
        augmented_dataset_y.extend(batch_y)
        if len(augmented_dataset_x) > 10000:
            break

    X_train, y_train = np.array(augmented_dataset_x), np.array(augmented_dataset_y)
    X_train.shape, y_train.shape
    
    with open(augmented_fname, 'wb') as f:
        pickle.dump((X_train, y_train), f)
    
    print('dumped to %s, please restart the kernel' % augmented_fname)

loaded from augmented.mat


In [9]:
y_train.sum(axis=0)

array([ 883, 1089,  925,  968, 1052,  978,  922, 1129, 1133,  945])

In [10]:
l2_strength = 0.0001
conv_kwargs = {
    'kernel_regularizer': l2(l2_strength),
    'activation': 'relu',
    'padding': 'same',
}

model = Sequential([
    GaussianNoise(0.025, input_shape=X_all[0].shape),
    Conv2D(8, (3, 3), input_shape=X_all[0].shape, **conv_kwargs), BatchNormalization(),
    Conv2D(8, (3, 3), **conv_kwargs), BatchNormalization(),
    MaxPooling2D((2, 2)), BatchNormalization(),
    Dropout(0.25),
    Conv2D(16, (3, 3), **conv_kwargs), BatchNormalization(),
    Conv2D(16, (3, 3), **conv_kwargs), BatchNormalization(),
    MaxPooling2D((2, 2)), BatchNormalization(),
    Dropout(0.25),
    Conv2D(32, (3, 3), **conv_kwargs), BatchNormalization(),
    Conv2D(32, (3, 3), **conv_kwargs), BatchNormalization(),
    Dropout(0.5),
    GlobalAveragePooling2D(),
    Dense(10, activation='softmax', kernel_regularizer=l2(l2_strength))
])

model.compile('adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gaussian_noise_1 (GaussianNo (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 8)         80        
_________________________________________________________________
batch_normalization_1 (Batch (None, 28, 28, 8)         32        
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 8)         584       
_________________________________________________________________
batch_normalization_2 (Batch (None, 28, 28, 8)         32        
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 8)         0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 14, 14, 8)         32        
__________

In [11]:
hist = model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=10,
    validation_data=(X_all, y_all),
)

Train on 10024 samples, validate on 497 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
preds = model.predict(X_all)

In [13]:
pred_cls = np.argmax(preds, axis=1)
true_cls = np.argmax(y_all, axis=1)

confusion = np.zeros((10, 10))
for p, t in zip(pred_cls, true_cls):
    confusion[p, t] += 1
    
confusion

array([[45.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1., 53.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0., 45.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 3.,  0.,  0., 47.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0., 51.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0., 48.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0., 45.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0., 55.,  0.,  0.],
       [ 3.,  0.,  0.,  0.,  0.,  0.,  0.,  0., 55.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., 46.]])

In [14]:
model.save('model.h5')

In [50]:
class DigitPredictor:
    def __init__(self, model_fname='model.h5'):
        self.model = load_model(model_fname)
    
    def predict_digits(self, images):
        ''' being trained for sodoku, this does not predict zeros, and can
            tell if the image is not a digit
        '''
        preds = self.model.predict(np.array(list(map(preprocess_image, images))))
        preds_cls = np.argmax(preds, axis=1)
        return [
            c if c > 0 else None
            for c in preds_cls
        ]

In [51]:
pp = DigitPredictor().predict_digits(images)