In [1]:
import numpy as np
import random as python_random
import os
import tensorflow as tf
print(tf.__version__)
os.environ["PYTHONHASHSEED"] = "0"
np.random.seed(123)
python_random.seed(123)
tf.random.set_seed(123)


import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from mlxtend.evaluate import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import auc, roc_auc_score, roc_curve


from tensorflow.keras.datasets import mnist
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.models import Sequential




2.3.0


In [2]:
print("[INFO] Reading MNIST...")
((train_images, train_labels), (testX, testY)) = mnist.load_data()


[INFO] Reading MNIST...


In [3]:
class_types = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]

In [4]:
# scale data to the range of [0, 1]
train_images = train_images.astype("float32") / 255.0
testX = testX.astype("float32") / 255.0

In [5]:
# sample out validation data from trainX
(trainX, valX, trainY, valY) = train_test_split(train_images, train_labels,
                                                test_size=0.2, stratify=train_labels, random_state=42)

In [6]:
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
valX = valX.reshape((valX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28 , 1))

In [7]:
# convert the labels from integers to vectors
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
valY = lb.transform(valY)
testY = lb.transform(testY)

In [8]:
def build_model(activation, n_classes):
    # Task 1
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=(28, 28, 1)))
    model.add(Activation(activation))
    model.add(Flatten())
    model.add(Dense(n_classes))
    model.add(Activation("softmax"))
    return model

In [9]:
activation = "sigmoid"
learning_rate = 0.01
opt = SGD(learning_rate)
batch_size = 128
n_epochs = 5
n_classes = len(class_types)
metrics = ["accuracy"]
loss = "categorical_crossentropy"

In [10]:
print(trainX.shape, trainY.shape)

(48000, 28, 28, 1) (48000, 10)


In [15]:
%timeit
model = build_model(activation, n_classes)
model.compile(loss=loss, optimizer=opt,
            metrics=metrics)
H = model.fit(trainX, trainY,
              epochs=n_epochs, batch_size=128)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [16]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
activation_2 (Activation)    (None, 26, 26, 32)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 21632)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                216330    
_________________________________________________________________
activation_3 (Activation)    (None, 10)                0         
Total params: 216,650
Trainable params: 216,650
Non-trainable params: 0
_________________________________________________________________


In [17]:
# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(testX, batch_size=128)

cr = classification_report(testY.argmax(axis=1),
                           predictions.argmax(axis=1),
                           target_names=[str(x) for x in lb.classes_])
print(cr)

[INFO] evaluating network...
              precision    recall  f1-score   support

           0       0.90      0.98      0.94       980
           1       0.93      0.97      0.95      1135
           2       0.94      0.82      0.88      1032
           3       0.88      0.87      0.88      1010
           4       0.88      0.92      0.90       982
           5       0.79      0.89      0.83       892
           6       0.89      0.93      0.91       958
           7       0.90      0.90      0.90      1028
           8       0.91      0.78      0.84       974
           9       0.89      0.85      0.87      1009

    accuracy                           0.89     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.89      0.89      0.89     10000



In [18]:
print(roc_auc_score(testY, predictions, multi_class='ovr'))

0.9908455207150713
