## MNIST Handwritten Image Classification Dataset

In [3]:
# Load MNIST dataset
from keras.datasets import mnist

# Load dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [7]:
print(train_images.shape)
print(train_labels.shape)
print(test_images.shape)
print(test_labels.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [8]:
# Summarize Train pixel values
print('Train min', train_images.min())
print('Train max', train_images.max())
print('Train mean', train_images.mean())
print('Train std', train_images.std())

('Train min', 0)
('Train max', 255)
('Train mean', 33.318421449829934)
('Train std', 78.56748998339798)


In [9]:
# Summarize Test Pixel values
print('Test min', test_images.min())
print('Test max', test_images.max())
print('Test mean', test_images.mean())
print('Test std', test_images.std())

('Test min', 0)
('Test max', 255)
('Test mean', 33.791224489795916)
('Test std', 79.17246322228644)


## Normalise pixel value on MNIST dataset

In [32]:
from keras.preprocessing.image import ImageDataGenerator

# load dataset
(train_x, train_y), (test_x, test_y) = mnist.load_data()
width, height, channel = train_x.shape[1], train_x.shape[2], 1
print('before reshaping', train_x.shape)
print('width', width)
print('height', height)
print('channel', channel)
# reshape the channels to  have a single channel
train_x = train_x.reshape(train_x.shape[0], width, height, channel)
print('after reshaping', train_x.shape)
test_x = test_x.reshape(test_x.shape[0], width, height, channel)
print('test shape', test_x.shape)

# confirm the scale
print("train min is {} and train max is {}".format(train_x.min(), train_x.max()))
print("test min is {} and test max is {}".format(test_x.min(), test_x.max()))

# create a generator to rescale the values in between 0 and 1
datagen = ImageDataGenerator(rescale=1.0/255.0)

# generate batches ofaugmented data using iterators
train_iterator = datagen.flow(train_x, train_y, batch_size=64)
test_iterator = datagen.flow(test_x, test_y, batch_size=64)

print('length of train iterator', len(train_iterator))
print('length of test iterator', len(test_iterator))

# check if scling works
batchX, batchY = train_iterator.next()
print('shape of batchX is {} and min of batchX is {} and max of batchX is {}'.format(batchX.shape, batchX.min(), batchX.max()))

('before reshaping', (60000, 28, 28))
('width', 28)
('height', 28)
('channel', 1)
('after reshaping', (60000, 28, 28, 1))
('test shape', (10000, 28, 28, 1))
train min is 0 and train max is 255
test min is 0 and test max is 255
('length of train iterator', 938)
('length of test iterator', 157)
shape of batchX is (64, 28, 28, 1) and min of batchX is 0.0 and max of batchX is 1.0


## Center Images with ImageDataGenerator

In [45]:
from keras.preprocessing.image import ImageDataGenerator

# Load data
(train_x, train_y),(test_x, test_y) = mnist.load_data()
width, height, channel = train_x.shape[1], train_x.shape[2], 1

#reshape to have a single channel
train_x = train_x.reshape(train_x.shape[0], width, height, channel)
test_x = test_x.reshape(test_x.shape[0], width, height, channel)

# per-image mean
print("train per image mean", train_x.mean())
print('test per image mean', test_x.mean())

# Feature-wise will check for the entire dataset
datagen = ImageDataGenerator(featurewise_center=True)

# calculate the mean on the training datasets
datagen.fit(train_x)

print('mean of dataset', datagen.mean)

# generate the effect on single batch
train_iterator = datagen.flow(train_x, train_y, batch_size=64)

# get a batch
batchX, batchY = train_iterator.next()
print('batchX shape is {} and batchX mean is {}'.format(batchX.shape, batchX.mean()))

# generate the effect on the entire dataset
train_iterator = datagen.flow(train_x, train_y, batch_size=len(train_x), shuffle=False)

# get a batch
batchX, batchY = train_iterator.next()
print('batchX shape after effect on entire dataset is {} and batchX mean is {}'.format(batchX.shape, batchX.mean()))

('train per image mean', 33.318421449829934)
('test per image mean', 33.791224489795916)
('mean of dataset', array([[[33.318447]]], dtype=float32))
batchX shape is (64, 28, 28, 1) and batchX mean is 0.192991495132
batchX shape after effect on entire dataset is (60000, 28, 28, 1) and batchX mean is -1.95129177882e-05


## Standardize Image with ImageDataGenerator

In [51]:
# Load dataset
(train_x, train_y), (test_x, test_y) = mnist.load_data()

width, height, channel = train_x.shape[1], train_x.shape[2], 1

# reshape train datasets
train_x = train_x.reshape(train_x.shape[0], width, height, channel)

# reshape test  datasets
test_x = test_x.reshape(test_x.shape[0], width, height, channel)

print('train mean is {} and train std is {}'.format(train_x.mean(), train_x.std()))

# Create generator
datagen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)

# calculate the mean of training datasets
datagen.fit(test_x)

print('datagen mean is {} and datagen std is {}'.format(datagen.mean, datagen.std))

# generate the effect on single batch
train_iterator = datagen.flow(train_x, train_y, batch_size=64)

# get a batch
batchX, batchY = train_iterator.next()
print('batchX shape is {} and batchX mean is {} and batchX std is {}'.format(batchX.shape, batchX.mean(), batchX.std()))

# effect on entire dataset
train_iterator = datagen.flow(train_x, train_y, batch_size=len(train_x), shuffle=False)
batchX, batchY = train_iterator.next()
print('for entire dataset batchX shape is {} and batchX mean is {} and batchX std is {}'.format(batchX.shape, batchX.mean(), batchX.std()))


train mean is 33.3184214498 and train std is 78.5674899834
datagen mean is [[[33.79124]]] and datagen std is [[[79.172455]]]
batchX shape is (64, 28, 28, 1) and batchX mean is 0.00768111879006 and batchX std is 1.00811982155
for entire dataset batchX shape is (60000, 28, 28, 1) and batchX mean is -0.0059720184654 and batchX std is 0.992359101772
