In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import math

In [None]:
# The dataset we are using

import tensorflow_datasets as tfds
tfds.disable_progress_bar()

dataset, metadata = tfds.load('fashion_mnist', as_supervised=True, with_info=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

class_names = metadata.features['label'].names

In [None]:
# Bring pixel from 0, 255 to 0,1 range

def noramlize(image, labels):
    images = tf.cast(image, tf.float32)
    images /= 255
    return images, labels

train_dataset.map(normalize)
test_dataset.map(normalize)

# Avoids fetching again and again from disk

train_dataset =  train_dataset.cache()
test_dataset  =  test_dataset.cache()

In [None]:
for image, label in test_dataset.take(1):
    break

plt.figure() # This created a figure rather than a plot -> difference is that plot has axes
plt.imshow(image, map=plt.cm.binary)
plt.colorbar()
plt.grid(False)
plt.show()

In [None]:
# image array format 28x28 but with color so to bring it to grayscale

# Plotting 25 images for test

plt.figure(figsize=(10,10))
for i, (image, label) in enumerate(train_dataset.take(25)):
    image = image.numpy().reshape((28,28))
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(image, cmap=plt.cm.binary)
    plt.xlabel(class_names[label])
plt.show()

In [None]:
# Flattening explained in readme.md

# Relu cause this is a non liner function

# Softmax cause we need probability distribution

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

In [None]:
# datasaet.repeat() will go on forever if we do not put epochs param

# Epochs 5 means full 5 time processing of dataset => 60,000*5 tests

# dataset is shuffled to avoid learnng from the order of the sample

# Doing processing in batches of 32

BATCH_SIZE = 32
train_dataset = train_dataset.cache().repeat().shuffle(num_train_examples).batch(BATCH_SIZE)
test_dataset = test_dataset.cache().batch(BATCH_SIZE)

model.fit(train_dataset, epochs=5, steps_per_epoch=math.ceil(num_train_examples/BATCH_SIZE))

In [None]:
test_loss, test_accuracy = model.evaluate(test_dataset, steps=math.ceil(num_test_examples/32))
print('Accuracy on test dataset:', test_accuracy)

In [None]:
for test_images, test_labels in test_dataset.take(1):
  test_images = test_images.numpy()
  test_labels = test_labels.numpy()
  predictions = model.predict(test_images)

print(predictions.shape)
print(predictions[0])
print(np.argmax(predictions[0]))
print(test_labels[0])