In [1]:
import tensorflow as tf
from tensorflow import keras

In [2]:
print(tf.__version__)

2.0.0-rc1


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
mnist = keras.datasets.fashion_mnist

In [5]:
type(mnist)

module

In [6]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [7]:
x_train.shape, y_train.shape

((60000, 28, 28), (60000,))

# Input Data
Values are between 0 and 255 because they are RGB values. Each image is 28x28 so the input shape is (28,28)

[Further information about dataset](https://github.com/zalandoresearch/fashion-mnist)

In [8]:
class_names = ['top', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']

The algorithm we use later on requires the input data to be values ranging from 0 to 1

In [9]:
x_train = x_train / 255.0
x_test = x_test / 255.0

# Building Model

In [10]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense

### Model Description

- flatten layer: transforms data into 1-dimension so we can feed it into the next layer
- dense layer-1: units(neurons)=128. 
- dense layer-2: this is the output layer, thus the number of units must equal the number of possible classes(10).

TODO: understand when to use which activation function

In [11]:
model = Sequential()
model.add(Flatten(input_shape = (28, 28)))
model.add(Dense(128, activation = 'relu'))
model.add(Dense(10, activation = 'softmax'))

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________


### model compilation
- optimizer: how the NN updates it's weights based on the value returned from the loss function
- loss function: calculates the difference between the predicted and target values
- metrics: what to optimize for. In our case accuracy of predictions

epochs: the weights of NN are being updated 10 times using all the training data.

In [18]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [19]:
model.fit(x_train, y_train, epochs = 10)

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x14a25e9b0>

In [20]:
test_loss, test_acc = model.evaluate(x_test, y_test)



In [21]:
print(test_acc)

0.882


# Evaluation result
This NN has a 88% accuracy with the given test data. The Github page says that 99% can be achieved with a CNN. And 97% can be achieved easily with classical ML algorithms.

In [23]:
from sklearn.metrics import accuracy_score

In [25]:
y_pred = model.predict_classes(x_test)

### Same result as tf.model.evaluate

In [27]:
accuracy_score(y_test, y_pred)

0.882

In [33]:
y_pred

array([9, 2, 1, ..., 8, 1, 5])

In [34]:
y_pred.shape

(10000,)

# Difference between .predict and .predict_classes

### .predict_classes
returns tensor of same size as input. Each element is the predicted class for the corresponding input.

### .predict
returns a tensor of shape (input_size, num_classes) where each row is an array of likelihoods that the given input is a certain class. The index with the highest percentage is the predicted class returned by .predict_classes.

In [35]:
pred = model.predict(x_test)

In [36]:
pred.shape

(10000, 10)

In [37]:
pred[0]

array([7.1728257e-13, 1.6329113e-14, 1.1007589e-09, 1.9908847e-12,
       2.5530683e-15, 8.6576118e-05, 1.3613730e-09, 7.0085312e-04,
       3.2584606e-12, 9.9921250e-01], dtype=float32)

In [38]:
np.argmax(pred[0])

9