# Classify  MNIST digits using a CNN with Keras

In [None]:
pip install -r requirements.txt

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers
import numpy as np
import os

### Pipeline parameters
To be used for Hyperparameter tuning with Katib

In [None]:
num_nodes = 100
learning_rate = 0.001

### Load Data

Load MNIST handwritten digits using the Keras library

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data(path="mnist.npz")

### Preprocess the images

In [None]:
# Rescale pixels in the range [0,1]
x_train = x_train.reshape((x_train.shape[0],28,28,1))/255.0
x_test = x_test.reshape(x_test.shape[0],28,28,1)/255.0

# One-hot encode labels
y_train = to_categorical(y_train,10)
y_test = to_categorical(y_test,10)

### Define the CNN model
Here we are using a vanilla CNN model with the following architechture<br>
- 2D Convolution Layer 
- 2D Max Pooling Layer
- Flatten Layer
- Dense Layer 
- Dense Layer

In [None]:
model = Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=x_train.shape[1:]))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(num_nodes, activation='relu', kernel_initializer='he_uniform'))
model.add(layers.Dense(10, activation='softmax'))
model.summary()

### Compile the model

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

### Training

In [None]:
# Train for 1 epoch when using a CPU.
epochs = 1

os.makedirs('saved_models', exist_ok=True)
# Save the model with the best validation accuracy
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.hdf5', 
                               verbose=1, save_best_only=True)
model.fit(x_train, y_train, 
          validation_data=(x_test, y_test),
          epochs=epochs, batch_size=32, callbacks=[checkpointer], verbose=1)

### Load the model with the best validation loss

In [None]:
model.load_weights('saved_models/weights.best.hdf5')

### Test the model
Evaluate the model's performance on test data, and store the accuracy

In [None]:
# Predict digits from test set
predictions = [np.argmax(model.predict(np.expand_dims(x, axis=0))) for x in x_test]

# Calculate test accuracy
test_accuracy = 100*np.sum(np.array(predictions)==np.argmax(y_test, axis=1))/len(predictions)

print('Test accuracy: %.4f%%' % test_accuracy)

### Pipeline Metrics
Use it to define the pipeline metrics that KFP will produce for every pipeline run. Kale will associate each one of these metrics to the steps that produced them. Also, you will have to choose one these metrics as the Katib search objective metric.

In [None]:
print(test_accuracy)