In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 1.x
except Exception:
  pass
import tensorflow as tf

from tensorflow.keras import datasets, layers, models

In [0]:
from sklearn.model_selection import train_test_split
import cv2
import numpy as np
from sklearn.metrics import accuracy_score
from keras.layers import Dense, Dropout, Flatten
import keras

Using TensorFlow backend.


In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
digit_labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

# Processing images

In [0]:
def load(images_filename, labels_filename):
    images = np.fromfile(images_filename, dtype=np.uint8)
    labels = np.fromfile(labels_filename, dtype=np.uint8)
    return np.reshape(images[128:], (-1, 50, 50)), labels[128:]

In [0]:
print("Loading images and labels ...")
images, labels = load('/content/drive/My Drive/Colab Notebooks/digit_images.npy', '/content/drive/My Drive/Colab Notebooks/digit_labels.npy')
images = images[:, :, :, None]
print("Images and labels loaded!!! Done! \n")

Loading images and labels ...
Images and labels loaded!!! Done! 



# Data augmentation

In [0]:
def augment_data(dataset, dataset_labels, augementation_factor=1, use_random_rotation=True,\
                 use_random_shear=True, use_random_shift=True, use_random_zoom=True):
	augmented_image = []
	augmented_image_labels = []

	for num in range (0, dataset.shape[0]):
		if num % 1000 == 0:
				print(num)

		for i in range(0, augementation_factor):
			# original image:
			augmented_image.append(dataset[num])
			augmented_image_labels.append(dataset_labels[num])

			if use_random_rotation:
				augmented_image.append(tf.keras.preprocessing.image.random_rotation(dataset[num], 20, row_axis=0, col_axis=1, channel_axis=2))
				augmented_image_labels.append(dataset_labels[num])

			if use_random_shear:
				augmented_image.append(tf.keras.preprocessing.image.random_shear(dataset[num], 0.2, row_axis=0, col_axis=1, channel_axis=2))
				augmented_image_labels.append(dataset_labels[num])

			if use_random_shift:
				augmented_image.append(tf.keras.preprocessing.image.random_shift(dataset[num], 0.2, 0.2, row_axis=0, col_axis=1, channel_axis=2))
				augmented_image_labels.append(dataset_labels[num])

			if use_random_zoom:
				augmented_image.append(tf.keras.preprocessing.image.random_zoom(dataset[num], (0.9,0.9), row_axis=0, col_axis=1, channel_axis=2))
				augmented_image_labels.append(dataset_labels[num])

	return np.array(augmented_image), np.array(augmented_image_labels)

In [0]:
print(images.shape)

(25410, 50, 50, 1)


In [0]:
print(labels.shape)

(25410,)


# Train val test split

In [0]:
print(len(images),len(labels))

25410 25410


In [0]:
train_images1, test_images, train_labels1, test_labels = train_test_split(images, labels, test_size=0.2, random_state=42)
train_images, X_val, train_labels, y_val = train_test_split(train_images1, train_labels1, test_size=0.2, random_state=42)

In [0]:
print(len(train_images), len(train_labels), len(test_images), len(test_labels))

16262 16262 5082 5082


In [0]:
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)
y_val = np.array(y_val)

In [0]:
%%time
train_images, train_labels = augment_data(train_images, train_labels, augementation_factor=5)

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
CPU times: user 1min 22s, sys: 5.88 s, total: 1min 28s
Wall time: 1min 28s


In [0]:
train_images, test_images, X_val = train_images / 255.0, test_images / 255.0, X_val / 255.0

In [0]:
print(len(train_images))

406550


# Train CNN

In [0]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(50, 50, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [0]:
model.add(layers.Flatten())
model.add(layers.Dense(80, activation='relu'))
model.add(layers.Dense(34, activation='softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 48, 48, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 24, 24, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 22, 22, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 11, 11, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 9, 9, 64)          36928     
_________________________________________________________________
flatten (Flatten)            (None, 5184)              0         
_________________________________________________________________
dense (Dense)                (None, 80)                4

In [0]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [0]:
%%time
model.fit(train_images, train_labels, batch_size=8, epochs=5, validation_data=(X_val, y_val))

Train on 406550 samples, validate on 4066 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 3h 49min 45s, sys: 21min 43s, total: 4h 11min 29s
Wall time: 2h 26min 41s


<tensorflow.python.keras.callbacks.History at 0x7fd5fd3eca20>

In [0]:
%%time
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(test_acc)

0.9866194
CPU times: user 8.57 s, sys: 184 ms, total: 8.75 s
Wall time: 4.89 s


In [0]:
model.save('/content/drive/My Drive/Colab Notebooks/digitsFF.h5')
print("Model saved")

Model saved


In [27]:
%%time
model.fit(train_images, train_labels, batch_size=8, epochs=1, validation_data=(X_val, y_val))

Train on 406550 samples, validate on 4066 samples
CPU times: user 47min 2s, sys: 6min 50s, total: 53min 52s
Wall time: 31min 50s


<tensorflow.python.keras.callbacks.History at 0x7fd5f24e6d30>

In [28]:
%%time
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(test_acc)

0.985242
CPU times: user 8.53 s, sys: 153 ms, total: 8.68 s
Wall time: 4.87 s
