
# CNN number detector for CompVis assignment 1
Student: adrian.willi@hslu.ch


In [1]:
from numpy import mean
from numpy import std
from matplotlib import pyplot as plt
from sklearn.model_selection import KFold
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
from numpy import argmax


In [3]:
def load_dataset() -> None:
  """
  loads mnist dataset and splits it into train and test set

  :return trainX, trainY, testX, testY
  """
  (trainX, trainY), (testX, testY) = mnist.load_data()
  trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
  testX = testX.reshape((testX.shape[0], 28, 28, 1))
  trainY = to_categorical(trainY)
  testY = to_categorical(testY)
  return trainX, trainY, testX, testY

In [4]:
def scale_pixels(train, test):
  """
  scales pixels and normalizes images

  :train training data
  :test testing data
  :return normalized training data, normalized testing data
  """
  train_norm = train.astype('float32')
  test_norm = test.astype('float32')
  train_norm = train_norm / 255.0
  test_norm = test_norm / 255.0
  return train_norm, test_norm

In [10]:
def augment_images():
  """
  augments the images 

  :return img_data_generator 
  """
  img_data_generator = ImageDataGenerator(
      rotation_range=20,
      zoom_range=0.15,
      horizontal_flip=True,
      vertical_flip=True
  )
  return img_data_generator

In [7]:
def define_model():
  """
  defines the cnn model

  :return keras model
  """
  model = Sequential()
  model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
  model.add(MaxPooling2D((2, 2)))
  model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
  model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Flatten())
  model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
  model.add(Dense(10, activation='softmax'))
 
  opt = SGD(learning_rate=0.01, momentum=0.9)
  model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [8]:
# load data
trainX, trainY, testX, testY = load_dataset()
# scale pixels and normalize images
trainX, testX = scale_pixels(trainX, testX)
# define model
model = define_model()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 conv2d_2 (Conv2D)           (None, 9, 9, 64)          36928     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 4, 4, 64)         0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 1024)              0

In [15]:
batch_size = 32
epochs = 10

# get image data generator 
img_data_generator = augment_images()
# fit model
model.fit(
      x = img_data_generator.flow(trainX, trainY, batch_size=batch_size), 
      epochs=epochs,
      validation_data=(testX, testY)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f3986149210>

In [16]:
# save model
model.save("final_model.h5")

## Make prediction on new image

In [17]:
def load_image(filename):
  """
  loads and preprocesses image

  :filename name of the new image
  :return preprocessed image
  """
  img = tf.keras.preprocessing.image.load_img(filename, grayscale=True, target_size=(28, 28))
  img = tf.keras.preprocessing.image.img_to_array(img)
  img = img.reshape(1, 28, 28, 1)
  img = img.astype('float32')
  img = img / 255.0
  return img

In [22]:
# load image
img = load_image('test.png')
# load model
model = load_model('final_model.h5')
# predict class
predict_value = model.predict(img)
digit = argmax(predict_value)
print(f"Predicted digit: {digit}")



Predicted digit: 3
