<a href="https://colab.research.google.com/github/carranza96/DLSeminar/blob/master/IntroDL_Mnist_%2B_CIFAR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
import numpy as np



In [None]:
# Fix random seed to allow reproducible experiments
np.random.seed(1)
tf.random.set_seed(1)

# Load and inspect MNIST data

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()


In [None]:
print("Shape X_train: ", x_train.shape)
print("Shape y_train: ", y_train.shape)
print("Shape X_test: ", x_test.shape)
print("Shape y_test: ", y_test.shape)

Values of one instance

In [None]:
print("y:", y_train[0])
print("x:", x_train[0])

Visualize some examples

In [None]:
plt.figure(figsize=(16,8))
for i in range(10):
    plt.subplot(1, 10, i+1)
    plt.imshow(x_train[i])
    plt.text(0, 0, y_train[i], color='black', bbox=dict(facecolor='white', alpha=1))
    plt.axis('off')

Class frequency

In [None]:
from collections import Counter
Counter(y_train)

In [None]:
NUM_CLASSES = 10

# Multi-Layer Perceptron

![texto alternativo](https://miro.medium.com/proxy/1*eloYEyFrblGHVZhU345PJw.jpeg)

## Data preprocessing

In [None]:
# Convert matrices to vector (images are 28x28 pixels)
X_train = x_train.reshape(60000, 784) # 28 * 28 = 784
X_test = x_test.reshape(10000, 784)

In [None]:
# Normalize data
# Transform from range [0., 255. ] to range [0., 1.]
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

In [None]:
# Transform y to one-hot enconding vectors
Y_train = keras.utils.to_categorical(y_train, NUM_CLASSES)
Y_test = keras.utils.to_categorical(y_test, NUM_CLASSES)

In [None]:
# Show example of one-hot enconding vector
print("Class of first example:", y_train[0])
print("One-hot enconding:")
print(list(range(NUM_CLASSES)))
print(Y_train[0])

NOTE: One-hot encoding not strictly necessary, it depends on the selected loss function.

If categorical_cross_entropy -> one-hot enconding

If sparse_categorical_cross_entropy -> integer class

## Create model

In [None]:
# Create model with three dense layers
inp = Input(shape=(784,)) # Input vector shape (28x28=784)
x = Dense(64, activation='relu', use_bias=True)(inp)
x = Dense(128, activation='relu', use_bias=True)(x)
x = Dense(NUM_CLASSES, activation='softmax')(x) # 10 output neurons, one associated to each digit. Softmax activation function

model = keras.Model(inputs=inp, outputs=x)
model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.SGD(learning_rate=0.01),
              metrics=['accuracy'])


## Train model

In [None]:
history = model.fit(X_train, Y_train,
          batch_size=64,
          epochs=20,
          verbose=1,
          validation_data=(X_test, Y_test))

In [None]:
## Visualize training stats

plt.figure()
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['Training', 'Validation'])

plt.figure()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(['Training', 'Validation'], loc='lower right')
plt.show()

## Evaluate model


In [None]:
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

### Visualize predictions

In [None]:
predictions = model.predict(X_test)

In [None]:
# Example of predictions for first image
# Output is vector of size 10, one value for each digit. Maximum value is the predicted digit
predictions[0]

In [None]:
from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sns


df = pd.DataFrame(confusion_matrix(np.argmax(Y_test,axis=1), np.argmax(predictions,axis=1)), columns=list(range(10)))
plt.figure(figsize=(16,8))
sns.heatmap(df, annot=True, fmt='g', cmap="Blues")

In [None]:
## Visualize some wrong predictions
integer_predictions = np.argmax(predictions,axis=1)
integer_labels = np.argmax(Y_test,axis=1)
wrong_predictions_indices = np.where( integer_predictions !=  integer_labels)[0]
plt.figure(figsize=(16,8))
for i in range(10):
    index = wrong_predictions_indices[i]
    plt.subplot(1, 10, i+1)
    plt.imshow(x_test[index])
    plt.text(0, 0, integer_predictions[index], color='black', bbox=dict(facecolor='white', alpha=1))
    plt.axis('off')

### MLP with Dropout



In [None]:
inp = Input(shape=(784,))
x = Dense(64, activation='relu', use_bias=True)(inp)
x = Dropout(0.2)(x)
x = Dense(128, activation='relu', use_bias=True)(x)
x = Dropout(0.2)(x)
x = Dense(NUM_CLASSES, activation='softmax')(x) # 10 output neurons, one associated to each digit. Softmax activation function

model = keras.Model(inputs=inp, outputs=x)

model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.SGD(learning_rate=0.01),
              metrics=['accuracy'])

history = model.fit(X_train, Y_train,
          batch_size=64,
          epochs=20,
          verbose=1,
          validation_data=(X_test, Y_test))

### How to improve results?

*   Adjust parameters in dense layers (number of neurons, activation function)
*   Add more dense layers
*   Dropout
*   Change optimizer: SGD, Adam, Adagrad, RMSProp
*   Change architecture: CNN, LSTM





# Convolutional Neural Network

![texto alternativo](https://miro.medium.com/max/3744/1*SGPGG7oeSvVlV5sOSQ2iZw.png)

## Preprocessing

In [None]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Input shape now is 3-dimensional (num_examples, 28, 28, 1). We need to reshape to indicate number of channels
X_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
X_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)

# Normalization
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

# One-hot encoding
Y_train = keras.utils.to_categorical(y_train, 10)
Y_test = keras.utils.to_categorical(y_test, 10)

## Create CNN model

In [None]:
# Conv2D with con 32 convolutional filters with kernel size 3x3
inp = Input(shape=input_shape)
x = Conv2D(32, (3, 3), padding='same', activation='relu')(inp)
x = Flatten()(x)    # Convert from matrices to vector before Fully Connected Layers
x = Dense(NUM_CLASSES, activation='softmax')(x)

model = keras.Model(inputs=inp, outputs=x)

model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.SGD(learning_rate=0.01),
              metrics=['accuracy'])

# Print model architecture
model.summary()

## Train CNN

In [None]:
hist = model.fit(X_train, Y_train, batch_size=64,
                 epochs=20, verbose=1,
                 validation_data=(X_test, Y_test))

In [None]:
## Visualize training stats
plt.figure()
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['Training', 'Validation'])

plt.figure()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(['Training', 'Validation'], loc='lower right')
plt.show()

## Evaluate model

In [None]:
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

## Visualize predictions

In [None]:
predictions = model.predict(X_test)

In [None]:
## Visualize some wrong predictions
integer_predictions = np.argmax(predictions,axis=1)
integer_labels = np.argmax(Y_test,axis=1)
wrong_predictions_indices = np.where( integer_predictions !=  integer_labels)[0]
plt.figure(figsize=(16,8))
for i in range(10):
    index = wrong_predictions_indices[i]
    plt.subplot(1, 10, i+1)
    plt.imshow(x_test[index])
    plt.text(0, 0, integer_predictions[index], color='black', bbox=dict(facecolor='white', alpha=1))
    plt.axis('off')

## Exercise: Create CNN model with the following characteristics

2 convolution+pooling blocks with increasing number of filters and relu activation function

Dropout after convolution

Fully connected block at the end

Change optimizer (search in Keras docs for available optimizers)






In [None]:
# inp =
# x =

# model = keras.Model(...)


# model.compile(...)



#  hist = model.fit(X_train, Y_train, batch_size=64,
#                  epochs=5, verbose=1,
#                  validation_data=(X_test, Y_test))

## Transfer learning with CIFAR-10 dataset

In [None]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [None]:
print("Shape X_train: ", x_train.shape)
print("Shape y_train: ", y_train.shape)
print("Shape X_test: ", x_test.shape)
print("Shape y_test: ", y_test.shape)

In [None]:
plt.figure(figsize=(16,16))
for i in range(10):
    plt.subplot(1, 10, i+1)
    plt.imshow(x_train[i])
    plt.text(0, 0, classes[y_train[i][0]], color='black', bbox=dict(facecolor='white', alpha=1))
    plt.axis('off')

In [None]:
input_shape=(32,32,3)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
# x_train /= 255
# x_test /= 255

# Custom normalization depending on base model
x_train = keras.applications.vgg16.preprocess_input(x_train)
x_test = keras.applications.vgg16.preprocess_input(x_test)

# One-hot encoding
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

In [None]:
print("Shape X_train: ", x_train.shape)
print("Shape y_train: ", y_train.shape)
print("Shape X_test: ", x_test.shape)
print("Shape y_test: ", y_test.shape)

[Keras applications API](https://keras.io/api/applications/)

In [None]:
base_model = tf.keras.applications.VGG19(include_top=False,weights='imagenet',input_shape=(32,32,3),classes=y_train.shape[1])
# Freeze the base model
base_model.trainable = False

In [None]:
inp = Input(shape=input_shape)
x = base_model(inp)
x = Flatten()(x)
x = Dense(256,activation=('relu'))(x)
x = Dropout(0.2)(x)  # Regularize with dropout
outputs = Dense(10, activation='softmax')(x)
model = keras.Model(inp, outputs)


model = keras.Model(inputs=inp, outputs=outputs)

model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.SGD(learning_rate=0.01),
              metrics=['accuracy'])

# Print model architecture
model.summary(show_trainable=True)

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.SGD(learning_rate=0.01),
              metrics=['accuracy'])

In [None]:
hist = model.fit(x_train, y_train, batch_size=64,
                 epochs=20, verbose=1,
                 validation_data=(x_test, y_test))

##Exercise:
Try to unfreeze the base model and train all layers after fine-tuning (with a low learning rate 1e-5 for example).