Léo Innocenzi, Quentin Arnaud, Victor Jacquemet and Violette Suter (2022) IPSA: A deep learning framework for image segmentation. 

In [1]:
import pickle
import numpy as np

from src.activation import relu, softmax, sigmoid
from src.cost import softmax_cross_entropy
from src.optimizer import adam, gradient_descent

from src.layers.conv import Conv
from src.layers.dense import Dense
from src.layers.pooling import Pool
from src.layers.dropout import Dropout
from src.layers.flatten import Flatten

from src.nn import NeuralNetwork

Importing and preprocessing the MNIST data

In [2]:
def load():
    with open("mnist.pkl", 'rb') as f:
        mnist = pickle.load(f)
    return mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]

def one_hot(y, num_classes=10):
    y_onehot = np.zeros((y.shape[0], num_classes))
    y_onehot[np.arange(y.shape[0]), y] = 1
    return y_onehot


def preprocess(x_train, y_train, x_test, y_test):
    x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype(np.float32)
    x_test = x_test.reshape(x_test.shape[0], 28, 28, 1).astype(np.float32)
    y_train = one_hot(y_train.reshape(y_train.shape[0], 1))
    x_train /= 255
    x_test /= 255
    return x_train, y_train, x_test, y_test

x_train, y_train, x_test, y_test = preprocess(*load())

Neural Network settings and training loop

In [3]:
cnn = NeuralNetwork(
        input_dim=(28,28,1),
        layers=[
            Conv(3, 1, 32, activation=relu),
            Pool(2, 2),
            Dropout(0.75),
            Flatten(),
            Dense(64, activation=relu),
            Dropout(0.8),
            Dense(10, activation=softmax)
            ],
        cost_function=softmax_cross_entropy,
        optimizer=adam,
)

cnn.summary()


cnn.train(x_train=x_train, y_train=y_train,
          batch_size=256,
          num_epochs=2,
          learning_rate=1e-3,
          validation_data=(x_test, y_test))

Neural network summary:
Conv with (26, 26, 32) units
Pool with (13, 13, 32) units
Dropout with (13, 13, 32) units
Flatten with 5408 units
Dense with 64 units
Dropout with 64 units
Dense with 10 units
Started training with 2 epochs and batch size 256
Epoch 1
Epoch 1, cost: 355.4070
Validation accuracy: 0.0982
Epoch 2
Epoch 2, cost: 379.4830
Validation accuracy: 0.0982
Training finished


Saving the model

In [8]:
cnn.save(r"./models/mnist_model.pkl")

Testing the saved model

In [5]:
nn = NeuralNetwork(
        input_dim=(28,28,1),
        layers=[
            Conv(3, 1, 32, activation=relu),
            Pool(2, 2),
            Dropout(0.75),
            Flatten(),
            Dense(64, activation=relu),
            Dropout(0.8),
            Dense(10, activation=softmax)
            ],
        cost_function=softmax_cross_entropy,
        optimizer=adam,
)
cnn_loaded = nn.load(r"./models/mnist_model.pkl")

Various tests for visualizing the model

In [6]:
acc = 0
pred_test = cnn_loaded.predict(x_test[0:1])
print(y_test[0:1])
print(pred_test)
print(np.argmax(pred_test, axis=1))
if np.argmax(pred_test, axis=1) == y_test[0:1]:
    print("Correct")
    acc += 1

[7]
[[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]
[4]


In [7]:
preds = []
acc = 0
for i in range(x_test.shape[0]):
    pred = cnn.predict(x_test[i:i+1])
    preds.append(pred)
    print("\n", pred, " : ", np.argmax(pred, axis=1))
    print(y_test[i:i+1])
    if np.argmax(pred, axis=1) == y_test[i:i+1]:
        print("Correct")
        acc += 1
accuracy = acc / x_test.shape[0]
print("Accuracy: ", accuracy)
# accuracy = np.mean(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1))
# print(accuracy)


 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]  :  [4]
[7]

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]  :  [4]
[2]

 [[0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
  1.00000000e+000 0.00000000e+000 0.00000000e+000 1.07681304e-259
  0.00000000e+000 0.00000000e+000]]  :  [4]
[1]

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]  :  [4]
[0]

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]  :  [4]
[4]
Correct

 [[0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
  1.00000000e+000 0.00000000e+000 0.00000000e+000 4.31983932e-304
  0.00000000e+000 0.00000000e+000]]  :  [4]
[1]

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]  :  [4]
[4]
Correct

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]  :  [4]
[9]

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]  :  [4]
[5]

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]  :  [4]
[9]

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]  :  [4]
[0]

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]  :  [4]
[6]

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]  :  [4]
[9]

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]  :  [4]
[0]

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]  :  [4]
