Trains a simple convnet on the MNIST dataset.

In [1]:
import numpy as np

from kerasy.datasets import mnist
from kerasy.models import Sequential
from kerasy.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Input
from kerasy.utils import CategoricalEncoder

In [2]:
# Training Parameters.
num_classes = 10
n_samples = 1_000

batch_size = 16
epochs = 12
keep_prob1 = 0.75
keep_prob2 = 0.5

In [3]:
# input image dimensions
img_rows, img_cols = 28, 28
input_shape = (img_rows, img_cols, 1)

In [4]:
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [5]:
# Add dimensions to fit the format
# (batch_size, img_rows, img_cols) -> (batch_size, img_rows, img_cols, 1)
x_train = np.expand_dims(x_train, axis=-1).astype('float32')
x_test  = np.expand_dims(x_test,  axis=-1).astype('float32')

In [6]:
# Extract only "n_samples" samples.
x_train = x_train[:n_samples]
y_train = y_train[:n_samples]
x_test = x_test[:n_samples]
y_test = y_test[:n_samples]

In [7]:
# Standardization.
x_train /= 255
x_test /= 255

In [8]:
# convert class vectors to binary class matrices
encoder = CategoricalEncoder()
y_train = encoder.to_onehot(y_train, num_classes)
y_test  = encoder.to_onehot(y_test, num_classes)

Dictionaly for Encoder is already made.


In [9]:
print(f'x_train shape: {x_train.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'{x_train.shape[0]} train samples')
print(f'{x_test.shape[0]} test samples')

x_train shape: (1000, 28, 28, 1)
y_train shape: (1000, 10)
1000 train samples
1000 test samples


In [10]:
# Build the Convolutional Model.
model = Sequential()
model.add(Input(input_shape=input_shape))
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(keep_prob=keep_prob1))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(keep_prob=keep_prob2))
model.add(Dense(num_classes, activation='softmax'))

In [11]:
model.compile(
    optimizer='sgd', 
    loss='categorical_crossentropy', 
    metrics=['categorical_accuracy']
)

------------------------------------------------------------
When calculating the [34mCategoricalCrossentropy[0m loss and the derivative of the [34mSoftmax[0m layer, the gradient disappears when backpropagating the actual value, so the [34mSoftmaxCategoricalCrossentropy[0m is implemented instead.
------------------------------------------------------------
  "so the \033[34mSoftmaxCategoricalCrossentropy\033[0m is implemented instead.\n" + '-'*60)


In [12]:
model.summary()

-----------------------------------------------------------------
Layer (type)                 Output Shape              Param #   
input_1 (Input)              (None, 28, 28, 1)         0         
-----------------------------------------------------------------
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
-----------------------------------------------------------------
conv2d_2 (Conv2D)            (None, 24, 24, 64)        18496     
-----------------------------------------------------------------
maxpooling2d_1 (MaxPooling2D (None, 12, 12, 64)        0         
-----------------------------------------------------------------
dropout_1 (Dropout)          (None, 12, 12, 64)        0         
-----------------------------------------------------------------
flatten_1 (Flatten)          (None, 9216)              0         
-----------------------------------------------------------------
dense_1 (Dense)              (None, 128)               1179776   
----------

In [13]:
model.fit(
    x_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    verbose=1,
    validation_data=(x_test, y_test)
)

Epoch 01/12 | 63/63[####################]100.00% - 1441.164[s]   [01mcategorical_accuracy[0m: [34m10.2%[0m, [01mcategorical_crossentropy[0m: [34m2302.703[0m, [01mval_categorical_accuracy[0m: [34m16.3%[0m, [01mval_categorical_crossentropy[0m: [34m2298.746[0m
Epoch 02/12 | 63/63[####################]100.00% - 1051.074[s]   [01mcategorical_accuracy[0m: [34m12.9%[0m, [01mcategorical_crossentropy[0m: [34m2298.568[0m, [01mval_categorical_accuracy[0m: [34m18.4%[0m, [01mval_categorical_crossentropy[0m: [34m2293.045[0m
Epoch 03/12 | 63/63[####################]100.00% - 1026.383[s]   [01mcategorical_accuracy[0m: [34m17.4%[0m, [01mcategorical_crossentropy[0m: [34m2295.509[0m, [01mval_categorical_accuracy[0m: [34m18.4%[0m, [01mval_categorical_crossentropy[0m: [34m2288.278[0m
Epoch 04/12 | 63/63[####################]100.00% - 1016.532[s]   [01mcategorical_accuracy[0m: [34m15.5%[0m, [01mcategorical_crossentropy[0m: [34m2292.625[0m, [01mval_cat