<a href="https://colab.research.google.com/github/carranza96/DLSeminar/blob/master/notebooks/IntroDL_Mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
%pip install tensorflow==2.1
import tensorflow.keras as keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras import backend as K
import numpy as np
import matplotlib.pyplot as plt

Collecting tensorflow==2.1
[?25l  Downloading https://files.pythonhosted.org/packages/85/d4/c0cd1057b331bc38b65478302114194bd8e1b9c2bbc06e300935c0e93d90/tensorflow-2.1.0-cp36-cp36m-manylinux2010_x86_64.whl (421.8MB)
[K     |████████████████████████████████| 421.8MB 40kB/s 
Collecting tensorboard<2.2.0,>=2.1.0
[?25l  Downloading https://files.pythonhosted.org/packages/40/23/53ffe290341cd0855d595b0a2e7485932f473798af173bbe3a584b99bb06/tensorboard-2.1.0-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 48.9MB/s 
Collecting tensorflow-estimator<2.2.0,>=2.1.0rc0
[?25l  Downloading https://files.pythonhosted.org/packages/18/90/b77c328a1304437ab1310b463e533fa7689f4bfc41549593056d812fab8e/tensorflow_estimator-2.1.0-py2.py3-none-any.whl (448kB)
[K     |████████████████████████████████| 450kB 66.2MB/s 
Collecting google-auth<2,>=1.6.3
[?25l  Downloading https://files.pythonhosted.org/packages/1c/6d/7aae38a9022f982cf8167775c7fc299f203417b698c27080ce09060bba07/google_

In [0]:
# Fix random seed to allow reproducible experiments



# Load and inspect MNIST data

In [0]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()


In [0]:
print("Shape X_train: ", x_train.shape)
print("Shape y_train: ", y_train.shape)
print("Shape X_test: ", x_test.shape)
print("Shape y_test: ", y_test.shape)

Values of one instance

In [0]:
print("y:", y_train[0])
print("x:", x_train[0])

Visualize some examples

In [0]:
plt.figure(figsize=(16,8))
for i in range(10):
    plt.subplot(1, 10, i+1)
    plt.imshow(x_train[i])
    plt.text(0, 0, y_train[i], color='black', bbox=dict(facecolor='white', alpha=1))
    plt.axis('off')

Class frequency

In [0]:
from collections import Counter
Counter(y_train)

In [0]:
NUM_CLASSES = 10

# Multi-Layer Perceptron

![texto alternativo](https://miro.medium.com/proxy/1*eloYEyFrblGHVZhU345PJw.jpeg)

## Data preprocessing

In [0]:
# Convert matrices to vector (images are 28x28 pixels)
X_train = x_train.reshape(60000, 784) # 28 * 28 = 784
X_test = x_test.reshape(10000, 784)

In [0]:
# Normalize data
# Transform from range [0., 255. ] to range [0., 1.]
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

In [0]:
# Transform y to one-hot enconding vectors
from keras.utils import np_utils
Y_train = keras.utils.to_categorical(y_train, NUM_CLASSES)
Y_test = keras.utils.to_categorical(y_test, NUM_CLASSES)

In [0]:
# Show example of one-hot enconding vector
print("Class of first example:", y_train[0])
print("One-hot enconding:")
print(list(range(NUM_CLASSES)))
print(Y_train[0])

NOTE: One-hot encoding not strictly necessary, it depends on the selected loss function.

If categorical_cross_entropy -> one-hot enconding

If sparse_categorical_cross_entropy -> integer class

## Create model

In [0]:
# Create model with three dense layers
model = Sequential()
model.add(Dense(64, input_shape=(784,), activation='relu', use_bias=True)) # Input vector shape (28x28=784)
model.add(Dense(128, activation='relu', use_bias=True)) 
model.add(Dense(NUM_CLASSES, activation='softmax')) # 10 output neurons, one associated to each digit. Softmax activation function

model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.SGD(learning_rate=0.01), 
              metrics=['accuracy'])


## Train model

In [0]:
history = model.fit(X_train, Y_train,
          batch_size=64,
          epochs=20,
          verbose=1,
          validation_data=(X_test, Y_test))

In [0]:
## Visualize training stats 

plt.figure()
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['Training', 'Validation'])

plt.figure()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(['Training', 'Validation'], loc='lower right')
plt.show()

## Evaluate model


In [0]:
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

### Visualize predictions

In [0]:
predictions = model.predict(X_test)

In [0]:
# Example of predictions for first image
# Output is vector of size 10, one value for each digit. Maximum value is the predicted digit
predictions[0]

In [0]:
from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sns


df = pd.DataFrame(confusion_matrix(np.argmax(Y_test,axis=1), np.argmax(predictions,axis=1)), columns=list(range(10)))
plt.figure(figsize=(16,8))
sns.heatmap(df, annot=True, fmt='g', cmap="Blues")

In [0]:
## Visualize some wrong predictions 
integer_predictions = np.argmax(predictions,axis=1)
integer_labels = np.argmax(Y_test,axis=1)
wrong_predictions_indices = np.where( integer_predictions !=  integer_labels)[0]
plt.figure(figsize=(16,8))
for i in range(10):
    index = wrong_predictions_indices[i]
    plt.subplot(1, 10, i+1)
    plt.imshow(x_train[index])
    plt.text(0, 0, integer_predictions[i], color='black', bbox=dict(facecolor='white', alpha=1))
    plt.axis('off')

### MLP with Dropout

In [0]:
model = Sequential()
model.add(Dense(64, input_shape=(784,), activation='relu', use_bias=True)) # Input vector shape (28x28=784)
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu', use_bias=True)) 
model.add(Dropout(0.2))
model.add(Dense(NUM_CLASSES, activation='softmax')) # 10 output neurons, one associated to each digit. Softmax activation function

model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.SGD(learning_rate=0.01), 
              metrics=['accuracy'])

history = model.fit(X_train, Y_train,
          batch_size=64,
          epochs=5,
          verbose=1,
          validation_data=(X_test, Y_test))

### How to improve results?

*   Adjust parameters in dense layers (number of neurons, activation function)
*   Add more dense layers
*   Dropout
*   Change optimizer: SGD, Adam, Adagrad, RMSProp
*   Change architecture: CNN, LSTM





# Convolutional Neural Network

![texto alternativo](https://miro.medium.com/max/3744/1*SGPGG7oeSvVlV5sOSQ2iZw.png)

## Preprocessing

In [0]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Input shape now is 3-dimensional (num_examples, 28, 28, 1). We need to reshape to indicate number of channels
X_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
X_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)

# Normalization
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

# One-hot encoding
Y_train = keras.utils.to_categorical(y_train, 10)
Y_test = keras.utils.to_categorical(y_test, 10)

## Create CNN model

In [0]:
model = Sequential()

# Conv2D with con 32 convolutional filters with kernel size 3x3
model.add(Conv2D(32, (3, 3), padding='same', input_shape=(input_shape), activation='relu')) 
model.add(Flatten()) # Convert from matrices to vector before Fully Connected Layers
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.SGD(learning_rate=0.01),
              metrics=['accuracy'])

# Print model architecture
model.summary()

## Train CNN

In [0]:
hist = model.fit(X_train, Y_train, batch_size=64, 
                 epochs=5, verbose=1, 
                 validation_data=(X_test, Y_test))

In [0]:
## Visualize training stats 
plt.figure()
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['Training', 'Validation'])

plt.figure()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(['Training', 'Validation'], loc='lower right')
plt.show()

## Evaluate model

In [0]:
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

## Visualize predictions

In [0]:
predictions = model.predict(X_test)

In [0]:
## Visualize some wrong predictions 
integer_predictions = np.argmax(predictions,axis=1)
integer_labels = np.argmax(Y_test,axis=1)
wrong_predictions_indices = np.where( integer_predictions !=  integer_labels)[0]
plt.figure(figsize=(16,8))
for i in range(10):
    index = wrong_predictions_indices[i]
    plt.subplot(1, 10, i+1)
    plt.imshow(x_train[index])
    plt.text(0, 0, integer_predictions[i], color='black', bbox=dict(facecolor='white', alpha=1))
    plt.axis('off')

## Exercise: Create CNN model with the following characteristics

2 convolution+pooling blocks with increasing number of filters and relu activation function

Dropout after convolution

Fully connected block at the end

Change optimizer (search in Keras docs for available optimizers)






In [0]:
# model = Sequential()

# model.add(....)


# model.compile(...)


        
#  hist = model.fit(X_train, Y_train, batch_size=64, 
#                  epochs=5, verbose=1, 
#                  validation_data=(X_test, Y_test))