In [1]:
import os

import numpy as np
import matplotlib.pyplot as plt

# We need to change the working directory to the root of the project
# We use a folder called "notebooks" to store the jupyter notebooks, which could be easier for us to manage
# However, the core implementation is in the root folder, therefore we need to change the working directory in order to import the modules

os.chdir(os.path.dirname(os.path.abspath(os.getcwd())))

# Let's set the seed for the random number generator
# Please feel free to change the seed to see how the results change

np.random.seed(3407)

In [2]:
# Here is our implementation of the neural network
# We use `keras` as the alias for our implementation, which could help you understand how it's similar to the `keras` library
import numpy_keras as keras

In [3]:
# To avoid any bias, we will use a random dataset
# However, we will also use a fixed seed to ensure reproducibility for both the dataset and the initialization of the model

y_1 = np.hstack([np.random.normal( 1, 1, size=(100, 2)),  np.ones(shape=(100, 1))])
y_2 = np.hstack([np.random.normal(-1, 1, size=(40, 2)), -np.ones(shape=(40, 1))])
dataset = np.vstack([y_1, y_2])

In [None]:
# Let's take a look at the dataset
plt.figure(figsize=(6, 6))
plt.scatter(y_1[:,0], y_1[:,1], label='+1')
plt.scatter(y_2[:,0], y_2[:,1], label='-1')
plt.grid()
plt.legend()
plt.show()

In [5]:
# Let's define a simple function to plot the decision boundary of a model
def plot_decision_boundary(model, X_train, y_train):
    xx, yy = np.meshgrid(np.arange(-2, 2, .02), np.arange(-2, 2, .02))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.figure(figsize=(15,7))
    plt.subplot(1, 2, 1)
    plt.pcolormesh(xx, yy, Z>0, cmap='cool')
    plt.scatter(X_train[:, 0], X_train[:, 1], c=[(['b', 'r'])[int(d>0)] for d in y_train], s=100)
    plt.xlim(-2, 2)
    plt.ylim(-2, 2)
    plt.grid()
    plt.title('Labels')
    plt.subplot(1, 2, 2)
    plt.pcolormesh(xx, yy, Z>0, cmap='cool')
    plt.scatter(X_train[:, 0], X_train[:, 1], c=[(['b', 'r'])[int(d>0)] for d in model.predict(X_train)], s=100)
    plt.xlim(-2, 2)
    plt.ylim(-2, 2)
    plt.grid()
    plt.title('Predictions')

In [None]:
# Let's first attempt to train a simple model
X_train, y_train = dataset[:, 0:2], dataset[:, 2]

np.random.seed(3407)

# 2 layers, exclusive of the input layer
layers = [
    keras.layers.Input(2),
    keras.layers.Dense(3, activation="relu", kernel_initializer='he_normal'),
    keras.layers.Dense(1, activation='tanh')
]

model = keras.Sequential(layers)
# We use the mean squared error as the loss function, and the stochastic gradient descent as the optimizer, with the R^2 score as the metric
model.compile(loss='mse', optimizer=keras.optimizers.SGD(1e-3), metrics=['r2_score'])
# Let's witness the magic of the neural network
history = model.fit(X_train, y_train, batch_size=2, epochs=500, verbose=1)
keras.plot_history(history)

In [None]:
# The model seems very good at fitting the training data
plot_decision_boundary(model, X_train, y_train)

In [None]:
# The model above looks pretty good
# Let's try to train a model with a different optimizer
X_train, y_train = dataset[:, 0:2], dataset[:, 2]

np.random.seed(3407)

layers = [
    keras.layers.Input(2),
    keras.layers.Dense(3, activation="relu", kernel_initializer='he_normal'),
    keras.layers.Dense(1, activation='tanh')
]

model = keras.Sequential(layers)
model.compile(loss='mse', optimizer='adam', metrics=['r2_score'])
history = model.fit(X_train, y_train, batch_size=2, epochs=500, verbose=1)
keras.plot_history(history)

In [None]:
# We can notice that there is a `corner` in the decision boundary, which is quite different from the previous model
# although the loss tends to be quite similar
plot_decision_boundary(model, X_train, y_train)

In [None]:
# Let's try to add different layers to the model, and see how the decision boundary changes
# What about adding a dropout layer?
X_train, y_train = dataset[:, 0:2], dataset[:, 2]

np.random.seed(3407)

layers = [
    keras.layers.Input(2),
    keras.layers.Dense(3, activation="relu", kernel_initializer='he_normal'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(1, activation='tanh')
]

model = keras.Sequential(layers)
model.compile(loss='mse', optimizer='adam', metrics=['r2_score'])
history = model.fit(X_train, y_train, batch_size=2, epochs=500, verbose=1)
keras.plot_history(history)

In [None]:
# This time, 
# the loss seems more oscillatory,
# while the decision boundary looks smoother, and the model seems to be more robust
plot_decision_boundary(model, X_train, y_train)

In [None]:
# Let's try to add a batch normalization layer
X_train, y_train = dataset[:,0:2], dataset[:,2]

np.random.seed(3407)

layers = [
    keras.layers.Input(2),
    keras.layers.Dense(3, activation=None),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(1, activation='tanh')
]

model = keras.Sequential(layers)
model.compile(loss='mse', optimizer='adam', metrics=['r2_score'])
history = model.fit(X_train, y_train, batch_size=2, epochs=500, verbose=1)
keras.plot_history(history)

In [None]:
# Oops, the loss seems to be extremely oscillatory
# Also, the model doesn't seem to be able to fit the training data
# A possible reason is that the batch normalization layer is sensitive to the batch size
# In this case, the batch size is too small, which could cause the model to be unstable
plot_decision_boundary(model, X_train, y_train)

In [None]:
# Let's try to increase the batch size, and see how the model behaves
X_train, y_train = dataset[:,0:2], dataset[:,2]

np.random.seed(3407)

layers = [
    keras.layers.Input(2),
    keras.layers.Dense(3, activation=None),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(1, activation='tanh')
]

model = keras.Sequential(layers)
model.compile(loss='mse', optimizer='adam', metrics=['r2_score'])
history = model.fit(X_train, y_train, batch_size=16, epochs=500, verbose=1)
keras.plot_history(history)

In [None]:
# It looks better now
# However, since the dataset is quite small, the loss is still quite oscillatory, however, the model seems to be able to fit the training data
plot_decision_boundary(model, X_train, y_train)

In [None]:
# At the end, let's redefine the task as a classification task instead of a regression task
# Also, let's try `add` method to add layers to the model instead of using a list
X_train, y_train = dataset[:,0:2], dataset[:,2]
np.random.seed(3407)
model = keras.Sequential()
model.add(keras.layers.Input(2))
model.add(keras.layers.Dense(3, activation='relu', kernel_initializer='he_normal'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(2, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(X_train, y_train, batch_size=2, epochs=500, verbose=1)
keras.plot_history(history)

In [None]:
plot_decision_boundary(model, X_train, y_train)