In [1]:
import tensorflow as tf
from tensorflow import keras

print(tf.__version__, keras.__version__)

2.12.0 2.12.0


### Downloading Fashion mnist data

In [2]:
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()

print(X_train_full.shape, X_test.shape)

(60000, 28, 28) (10000, 28, 28)


### Create Validation Data Set

Also in addition to that do a very basic scaling (ie. dividing data by 255)

In [3]:
X_train, X_valid  = X_train_full[5000:] / 255.0, X_train_full[:5000] / 255.0
y_train, y_valid  = y_train_full[5000:], y_train_full[:5000]

print(X_train.shape, X_valid.shape)

(55000, 28, 28) (5000, 28, 28)


In [4]:
class_names = ["T-shirt/pop", "Trouser", "Pullover", "Dresss", "Coat",
               "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

## Building Keras Model

In [5]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation = "relu"))
model.add(keras.layers.Dense(100, activation = "relu"))
model.add(keras.layers.Dense(10, activation = "softmax"))

In [6]:
## Same Effect as:
model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=[28, 28]),
        keras.layers.Dense(300, activation = "relu"),
        keras.layers.Dense(100, activation = "relu"),
        keras.layers.Dense(10, activation = "softmax")
])

# Building PyTorch Model

In [36]:
import torch
import torch.nn as nn
import torch.optim as optim
from pytorchsummary import summary as pt_summary
print(torch.__version__)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 300)
        self.fc2 = nn.Linear(300, 100)
        self.fc3 = nn.Linear(100, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = nn.functional.softmax(self.fc3(x))
        return x

p_model = Net()
optimiser = optim.SGD(p_model.parameters(), lr = 0.01)
criterion = nn.CrossEntropyLoss()

num_epochs = 10
label = torch.tensor(y_train)
input = torch.tensor(X_train).to(torch.float32)


for epoch in range(num_epochs):
    # Forward pass
    outputs = p_model(input)
    loss = criterion(outputs, label)

    # Backward and optimize
    optimiser.zero_grad()
    loss.backward()
    optimiser.step()
    print(loss)
    
pt_summary((1, 28, 28), p_model)

2.0.0+cpu


  x = nn.functional.softmax(self.fc3(x))


tensor(2.3024, grad_fn=<NllLossBackward0>)
tensor(2.3024, grad_fn=<NllLossBackward0>)
tensor(2.3024, grad_fn=<NllLossBackward0>)
tensor(2.3024, grad_fn=<NllLossBackward0>)
tensor(2.3024, grad_fn=<NllLossBackward0>)
tensor(2.3024, grad_fn=<NllLossBackward0>)
tensor(2.3024, grad_fn=<NllLossBackward0>)
tensor(2.3024, grad_fn=<NllLossBackward0>)
tensor(2.3024, grad_fn=<NllLossBackward0>)
tensor(2.3023, grad_fn=<NllLossBackward0>)
               Layer	Output Shape        	    Kernal Shape    	#params             	#(weights + bias)   	requires_grad
------------------------------------------------------------------------------------------------------------------------------------------------------
           Flatten-1	[1, 784]            	                    	                    	                    	          
            Linear-2	[1, 300]            	     [300, 784]     	235500              	(235200 + 300)      	True True 
            Linear-3	[1, 100]            	     [100, 300]     	30100

(266610, 266610, 0)

### Print Model Summary
The model summary can be easily viewed by calling **summary()** function. The number of params in Dense Layer is:

- Flatten is basically doing np.reshape(-1, 28*28). Changing the dimention of the input observation to 2 dimensions (square to a line)
- Dense is a layer of fully connected neurons
- The final layer consists of 10 class as output. Placing a softmax at the end. (using argmax to find the predicted class.

#Neurons from previous layer times #Neurons of the current layer + #Neurons of the current layer (biases)

Additionally each layer can be accessed via **layers** property.

Weights can be accessed via **get_weights()** function for each of the layer



In [47]:
a = [1,2,3]
b = [4,5,6]

x = zip(a,b)
for a,b in x:
    print(a,b)

1 4
2 5
3 6


In [62]:
weights, bias = model.layers[1].get_weights()
print(weights.shape, bias.shape)

(784, 300) (300,)


### Compiling Model

Before model can be trained, it needs to be compiled. The key arguments are:

- loss functions: for multiclass use sparse_categorical_crossentropy, for binar class, we can use catagorical_crossentroy. (https://keras.io/loss for more details (equivalent to tf.keras.loss.sparse_categorical_crossentropy)
- optimizer: for example use SGD, Adam (https://keras.io/optimizers
- merics: can specify "accuracy" which is equivalent to tf.keras.metrics.sparse_categorical_accuracy

Also note the dense layers are automatically initialised with random weight (0 bias), there maybe different initialisation should there be problem with converging (due to vanishing gradients) https://keras.io/initializers

In [63]:
# both statements are the same

model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
#model.compile(optimizer=tf.keras.optimizers.SGD(lr = 0.3), 
#              loss = tf.keras.losses.sparse_categorical_crossentropy, 
#              metrics = [tf.keras.metrics.sparse_top_k_categorical_accuracy])

### Training

**Validation**
Instead passing a validation dataset, we can also pass in a **validation_split** argument. (For example, validation_split = 0.1 tells Keras to use at least 10% of data for validation)

**Class Weight**
If the classes are skewed, we can use class_weight argument which will give larger weight to underrepresented classes and a lower weight to overrepresented classes. These weigts are used to caculate loss. 

Can use also sample_weight for per-instance weights. This could be useful if some instances were labeled by *experts* and other classes are labeled via *crowd sourcing* in this case more weight should be in the former. 

If both weights are specified, they will be mulitiplied.

**batch_size**
batch_side by default is 32.

**evaluate**
The model can simply evaluated via a evaluate call.

In [65]:
NUM_EPOCHS = 10
print(X_train.shape)
history = model.fit(X_train, y_train, epochs=NUM_EPOCHS, validation_data=(X_valid, y_valid), batch_size = 32)
#history = model.fit(X_train_full, y_train_full, epochs=NUM_EPOCHS, validation_split = 0.2)

(55000, 28, 28)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### History Object

Fit function returns a history object

In [12]:
import pandas as pd
import matplotlib.pyplot as plt
pd.DataFrame(history.history).plot(figsize=(8,5))
plt.grid(True)
plt.gca().set_ylim(0,1)
plt.show()

ModuleNotFoundError: No module named 'matplotlib'

In [13]:
model.evaluate(X_test, y_test)



[91.00779724121094, 0.7904000282287598]

## Making Predictions


In [14]:
X_new = X_test[:3]
y_proba = model.predict(X_new)
y_proba.round(2)



array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

In [15]:
import numpy as np

#model.predict_classes(X_new)
y_pred = np.argmax(model.predict(X_test[:3]), axis = -1)
np.array(class_names)[y_pred]



array(['Ankle boot', 'Pullover', 'Trouser'], dtype='<U11')

In [16]:
import pandas as pd
pd.DataFrame(history.history)

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,0.725055,0.764273,0.498144,0.835
1,0.489142,0.830491,0.451548,0.8436
2,0.444989,0.844291,0.43319,0.8492
