### Improving fashion MNIST with convolutions (convolutional neural networks)

In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs = {}):
        if (logs.get('acc') > 0.998):
            print('Accuracy is above 99.8%, training will be stopped')
            self.model.stop_training = True
            
callbacks = myCallback()

In [3]:
mnist = tf.keras.datasets.mnist
(training_images, training_labels), (test_images, test_labels) = mnist.load_data()

training_images, test_images = training_images.reshape(60000, 28, 28, 1), test_images.reshape(10000, 28, 28, 1)
training_images, training_labels = training_images/255.0, training_labels/255.0

# CNN
### Conv2D is basically the first convolution, 32 filters, each 3 by 3, relu = ignore negative values, input shape is 28 by 28 as reshaped earlier. The additional 1 is just using a single byte for color depth (grayscale)

### maxpooling means taking the maximum (vs average pooling) 2 by 2 (means 4 pixels, take max), reduced size, then flatten to fit into nn

In [4]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation = 'relu', input_shape = (28, 28, 1)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation = 'relu'),
    tf.keras.layers.Dense(10, activation = 'softmax')
])

Instructions for updating:
Colocations handled automatically by placer.


### Noted the shape of 26, 26 in conv, filter is 3 by 3, so theres a difference in 2 (since no neighbours to use filter) = cannot use margin around the picture. resolution is havled into 13x13

In [5]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
flatten (Flatten)            (None, 5408)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               692352    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 693,962
Trainable params: 693,962
Non-trainable params: 0
_________________________________________________________________


In [6]:
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

#### noted that if i double the filter (64 x 64), time taken to train data is longer, almost twice as long

#### too many epochs may cause overfitting of training sets

In [7]:
model.fit(training_images, training_labels, epochs = 10, callbacks = [callbacks])

Epoch 1/10


<tensorflow.python.keras.callbacks.History at 0x1b58784a358>

In [8]:
model.evaluate(test_images, test_labels)



[14.538523742675782, 0.098]