In [1]:
"""
Example of neural networks using the MNIST dataset, which is composed of 70000 28 * 28 grayscale images from 1 - 10. Here we will be
teaching a neural network to learn how to recognize digits.

This is probably the trickiest module in SeaLion to learn given how many classes 
you'll be working with - so I hope this tutorial will make it clear. 
"""
import sealion as sl  # first import this, under sl alias
from sealion.neural_networks.optimizers import SGD, Adam  # we'll use these 2, but feel free to try some more
from sealion.neural_networks.loss import CrossEntropy  # this is the loss function we'll use (classification problem)
from sealion.utils import one_hot  # one_hot function for our data

In [2]:
# Step 1 : Load and preprocess data!

from tensorflow.keras.datasets import mnist # we'll use this in the interest of time
(X_train, y_train), (X_test, y_test) = mnist.load_data() # unpack it

X_train = X_train / 255.0 # divide by 255 for faster convergence (normalization, if you care about that)
X_test = X_test / 255.0

# one hot our labels (to make them work with the softmax)
y_train = one_hot(y_train, depth = 10) # 10 classes
y_test = one_hot(y_test, depth = 10)

In [3]:
# Step 2 : Build the neural network

# the first part is to just build the architecture like the following :

model = sl.neural_networks.models.NeuralNetwork()
model.add(sl.neural_networks.layers.Flatten())
model.add(sl.neural_networks.layers.Dense(784, 64, activation=sl.neural_networks.layers.LeakyReLU(leak=0.2)))
model.add(sl.neural_networks.layers.Dense(64, 32, activation=sl.neural_networks.layers.LeakyReLU(leak=0.2)))
model.add(sl.neural_networks.layers.Dense(32, 10, activation=sl.neural_networks.layers.Softmax()))

In [4]:
# or you could build it like such :
model = sl.neural_networks.models.NeuralNetwork([
    sl.neural_networks.layers.Flatten(),
    sl.neural_networks.layers.Dense(784, 64, activation=sl.neural_networks.layers.LeakyReLU(leak=0.2)),
    sl.neural_networks.layers.Dense(64, 32, activation=sl.neural_networks.layers.LeakyReLU(leak=0.2)),
    sl.neural_networks.layers.Dense(32, 10, activation=sl.neural_networks.layers.Softmax())
])

# just a few notes. Here we created first a flatten() layer because we are taking the 28 * 28 input to just a 784 (
# *1) vector. So our input size for the dense layer is 784. everything after that with the 64 outputs in the first
# hidden layer, then 32 is arbitrary, and based on hyperparameter tuning. we used the leaky relu, instead of the
# typical relu, to avoid the dying neurons problems and Softmax() activation in the end to turn this into
# probabilities.

In [5]:
# Lastly we'll need to finalize the model, like such :

model.finalize(loss=CrossEntropy(), optimizer=SGD(lr=0.3, momentum=0.2, nesterov=True))  # "finalize" just
# means set the loss function and optimizer

# here we used CrossEntropy (must be used with softmax or vice versa) and the SGD optimizer. It has a learning rate
# of 0.3, momentum of 0.2, and utilizes nesterov accelerated gradient.

In [6]:
# To see how complex our model is : 
num_parameters = model.num_parameters()
print("Number of parameters for the 1st model : ", num_parameters)

Number of parameters for the 1st model :  [52650]


In [7]:
# Step 3 : Training!

model.train(X_train, y_train, epochs=10)  # we can then train this model for 10 epochs

Acc : 91.84%: 100%|█████████████████████████████████████████████████| 10/10 [02:54<00:00, 17.48s/it]


In [8]:
# Step 4 : Evaluate!

'''Here we will evaluate our model and see how well it did.'''
print("Loss : ", model.evaluate(X_test, y_test))  # this is just the loss
print("Regression accuracy : ", model.regression_evaluate(X_test, y_test))  # regression way of calculating (horrible here,
# because this is classification)
print("Validation accuracy : ", model.categorical_evaluate(X_test, y_test))  # classification accuracy, this will
# percent the percent that was classified correctly

Loss :  -3.2098036196336994
Regression accuracy :  0.8697891452159215
Validation accuracy :  0.9315


In [9]:
# It looks like we can do better. Let's build a new neural network, with a different architecture.

model = sl.neural_networks.models.NeuralNetwork()
model.add(sl.neural_networks.layers.Flatten())
model.add(sl.neural_networks.layers.Dense(784 , 128, activation = sl.neural_networks.layers.ELU()))
model.add(sl.neural_networks.layers.Dense(128, 64, activation = sl.neural_networks.layers.ELU()))
model.add(sl.neural_networks.layers.Dense(64, 10, activation = sl.neural_networks.layers.Softmax()))

model.finalize(loss = sl.neural_networks.loss.CrossEntropy(), optimizer = sl.neural_networks.optimizers.Adam(lr = 0.01)) #use adam

model.train(X_train, y_train, epochs=20) # train this model

# this will take some about 3 minutes (I have 4 cores), given the change from 64 to 128 parameters in the first layer
# so please be patient 

Acc : 98.29%: 100%|█████████████████████████████████████████████████| 20/20 [11:12<00:00, 33.64s/it]


In [10]:
print("Loss : ", model.evaluate(X_test, y_test))
print("Regression accuracy : ", model.regression_evaluate(X_test, y_test))
print("Validation accuracy : ", model.categorical_evaluate(X_test, y_test))

# It looks like we like this model.

Loss :  -1.4782596297021773
Regression accuracy :  0.9316249609875145
Validation accuracy :  0.968


In [11]:
'''
Often times training a neural network will take some time. Who wants to go through the process from the start? 
Instead, we can save our weights and biases so we can reuse them and plug them back into a neural network as we please. 
'''

parameters = model.give_parameters() # get the parameters (weights + biases)

# we can also just directly store this into a pickle file
FILE_NAME = "MNIST_weights"
model.pickle_params(FILE_NAME) # now the parameters will be stored in a file known as MNIST_weights.pickle

In [12]:
# let's load in those weights using pickle
import pickle
with open('MNIST_weights.pickle', 'rb') as file :
    parameters = pickle.load(file) # load back the parameters in a variable
    
# now we can enter in the parameters into the weights of our current architecture (so we don't have to train from scratch)
model.enter_parameters(parameters)

In [19]:
# now let's train it some more!

model.train(X_train, y_train, epochs= 50) # train this model for 50 epochs

Acc : 100.0%: 100%|█████████████████████████████████████████████████| 50/50 [24:26<00:00, 29.33s/it]


In [20]:
print("Loss : ", model.evaluate(X_test, y_test))
print("Regression accuracy : ", model.regression_evaluate(X_test, y_test))
print("Validation accuracy : ", model.categorical_evaluate(X_test, y_test))

Loss :  -1.0776098235212133
Regression accuracy :  0.9480098712648173
Validation accuracy :  0.9766


In [21]:
# and if you so desire, we can save the weights again.
file_name = "MNIST_weights2"
model.pickle_params(file_name)

In [22]:

# there's a ton of stuff we didn't go over!
# specifically this is  : 
# the dropout layer (sl.neural_networks.Dropout) for regularization
# other optimizers like RMSprop, Momentum, and AdaGrad
# the other activation functions like Tanh (sl.neural_networks.layers.Tanh)
# or Sigmoid (sl.neural_networks.layers.Sigmoid), Swish, etc.

# this will generally do better and training will be much faster, so if you aren't seeing
# any noticeable results that's because this is MNIST. Either way we hope
# you enjoyed this tutorial and neural networks make a bit more sense
# with SeaLion.