# Neural Network From Scratch
## classification demo

In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import time
import numpy as np

from andreiNet.neural_net import NeuralNetwork
from andreiNet.utils import norm_data, one_hot_encode
from andreiNet.metrics import accuracy
from andreiNet.losses import Loss, CrossEntropy, FocalLoss

$$ Cross Entropy = - \frac{1}{N} \sum_{k} \sum_{i} y_{i, k} \log(p_{i, k}) $$

$$ Focal Loss = - \frac{1}{N} \sum_{k} \sum_{i} \alpha_t (1 - p^t_{i, k})^{\gamma} y_{i, k} \log(p^t_{i, k}) $$
$$ p^t_{i, k} = y_{i, k} \times p_{i, k} $$

In [15]:
y_true = np.array([[1, 0, 0], [0, 1, 0]])
y_pred = np.array([[0.7, 0.2, 0.1], [0.2, 0.2, 0.6]])

ce = CrossEntropy().loss(y_true, y_pred)
print("cross entropy: {}".format(ce))

gamma = 0.0
fl = FocalLoss(gamma=gamma).loss(y_true, y_pred)
print("focal loss (gamma={}): {}".format(gamma, fl))

gamma = 2.0
fl = FocalLoss(gamma=gamma).loss(y_true, y_pred)
print("focal loss (gamma={}): {}".format(gamma, fl))


cross entropy: 0.9830564281864164
focal loss (gamma=0.0): 0.9830564281864164
focal loss (gamma=2.0): 0.5310705044561551


In [22]:
# Example 1: Easy Case, Correctly Classified
y_true = np.array([[1, 0, 0], ])
y_pred = np.array([[0.9, 0.05, 0.05], ])

ce = CrossEntropy().loss(y_true, y_pred)
print("cross entropy: {}".format(ce))

fl = FocalLoss(gamma=gamma).loss(y_true, y_pred)
print("focal loss (gamma={}): {}".format(gamma, fl))

print('ce / fl = {}'.format(ce / fl))

cross entropy: 0.10536051565782628
focal loss (gamma=2.0): 0.0010536051565782623
ce / fl = 100.00000000000006


In [24]:
# Example 2: Hard Case, Correctly Classified
y_true = np.array([[1, 0, 0], ])
y_pred = np.array([[0.4, 0.3, 0.3], ])

ce = CrossEntropy().loss(y_true, y_pred)
print("cross entropy: {}".format(ce))

fl = FocalLoss(gamma=gamma).loss(y_true, y_pred)
print("focal loss (gamma={}): {}".format(gamma, fl))

print('ce / fl = {}'.format(ce / fl))

cross entropy: 0.916290731874155
focal loss (gamma=2.0): 0.3298646634746958
ce / fl = 2.7777777777777777


In [25]:
# Example 3: Incorrectly Classified
y_true = np.array([[1, 0, 0], ])
y_pred = np.array([[0.3, 0.4, 0.4], ])

ce = CrossEntropy().loss(y_true, y_pred)
print("cross entropy: {}".format(ce))

fl = FocalLoss(gamma=gamma).loss(y_true, y_pred)
print("focal loss (gamma={}): {}".format(gamma, fl))

print('ce / fl = {}'.format(ce / fl))

cross entropy: 1.2039728043259361
focal loss (gamma=2.0): 0.5899466741197086
ce / fl = 2.0408163265306127


In [27]:
# Example 4: (very) Incorrectly Classified
y_true = np.array([[1, 0, 0], ])
y_pred = np.array([[0.05, 0.9, 0.05], ])

ce = CrossEntropy().loss(y_true, y_pred)
print("cross entropy: {}".format(ce))

fl = FocalLoss(gamma=gamma).loss(y_true, y_pred)
print("focal loss (gamma={}): {}".format(gamma, fl))

print('ce / fl = {}'.format(ce / fl))

cross entropy: 2.995732273553991
focal loss (gamma=2.0): 2.7036483768824766
ce / fl = 1.10803324099723


In [53]:
CrossEntropy().grad(y_true, y_pred)
FocalLoss2(gamma=2.0).grad(y_true, y_pred)

array([[-12.35810868,   0.        ,  -0.        ]])

In [38]:
# Load Iris Dataset
iris = datasets.load_iris()
X = iris.data  
y = iris.target

# We will also split the dataset into training and testing so we can evaluate the kNN classifier
X_trn, X_test, y_trn, y_test = train_test_split(X, 
                                                y, 
                                                test_size=0.50, 
                                                random_state=0,
                                                stratify=y)

print("X_trn.shape = {}, X_test.shape = {}".format(X_trn.shape, X_test.shape))
X_trn_norm, (trn_mean, trn_std) = norm_data(X_trn)
X_test_norm = (X_test - trn_mean) / trn_std

X_trn.shape = (75, 4), X_test.shape = (75, 4)


In [39]:
# Set parameters
activation = 'relu'
batch_size = 50
random_state = 0
lr = 0.001
n_epochs = 1000
metrics = ['accuracy']
weight_init = 'he_norm'
hidden_layers = (50, 60, 50)


In [54]:
loss = CrossEntropy() #'cross_entropy'


# Train model

# Initialize model
start_time = time.time()
nn = NeuralNetwork(hidden=hidden_layers, 
                   init_weights=weight_init,
                   loss=loss,
                   activation=activation,
                   shuffle=True,
                   random_state=random_state,
                   metrics=metrics,
                   verbose=False
                   )

nn.train(X_trn_norm, y_trn, 
         n_epochs=n_epochs,
         batch_size=batch_size, 
         early_stop=None, # ('accuracy', 500),
         lr=lr, 
         val_data=(X_test_norm, y_test),
         save_best=True)

# Run Inference
y_pred_trn = nn.predict(X_trn_norm).argmax(axis=1)
y_pred_test = nn.predict(X_test_norm).argmax(axis=1)

delta_1 = time.time() - start_time
print("--- %s seconds ---" % (delta_1))
print('trn acc', accuracy(y_pred_trn, y_trn))
print('test acc', accuracy(y_pred_test, y_test))

Exception: alpha (1.0) not accepted

In [41]:
loss = FocalLoss(gamma=2.0) #'cross_entropy'


# Train model

# Initialize model
start_time = time.time()
nn = NeuralNetwork(hidden=hidden_layers, 
                   init_weights=weight_init,
                   loss=loss,
                   activation=activation,
                   shuffle=True,
                   random_state=random_state,
                   metrics=metrics,
                   verbose=False
                   )

nn.train(X_trn_norm, y_trn, 
         n_epochs=n_epochs,
         batch_size=batch_size, 
         early_stop=None, # ('accuracy', 500),
         lr=lr, 
         val_data=(X_test_norm, y_test),
         save_best=True)

# Run Inference
y_pred_trn = nn.predict(X_trn_norm).argmax(axis=1)
y_pred_test = nn.predict(X_test_norm).argmax(axis=1)

delta_1 = time.time() - start_time
print("--- %s seconds ---" % (delta_1))
print('trn acc', accuracy(y_pred_trn, y_trn))
print('test acc', accuracy(y_pred_test, y_test))

TypeError: unsupported operand type(s) for *: 'NoneType' and 'float'

In [7]:
y_trn_oh = one_hot_encode(y_trn, len(set(y_trn)))
y_trn_pred_oh = nn.predict(X_trn_norm)

In [8]:
ce = loss.loss(y_trn_oh, y_trn_pred_oh)
ce_grad = loss.grad(y_trn_oh, y_trn_pred_oh)

In [9]:
print(ce)
print(ce_grad[0])


0.2420580872969582
[-0.0134586 -0.        -0.       ]


In [18]:
fl = FocalLoss(gamma=0.0, alpha='inverse_freq')
fl_loss = fl.loss(y_trn_oh, y_trn_pred_oh)
fl_grad = fl.grad(y_trn_oh, y_trn_pred_oh)


In [11]:
y_trn_oh.shape

(75, 3)

In [19]:
print(ce/25)
print(ce_grad[0]/25)


0.00968232349187833
[-0.00053834 -0.         -0.        ]


In [20]:
fl_loss, fl_grad[0]

(0.00968232349187833, array([-0.00053834, -0.        , -0.        ]))

In [None]:
# Plot neural network history
#loss = 'cross_entropy'
fig_size = (12, 5)
x_axis = np.arange(len(nn.trn_metric_hist[loss]))
metrics = nn.trn_metric_hist.keys()
fig, axs = plt.subplots(1, 2, figsize=fig_size)

for i, metric in enumerate(metrics):
    axs[i].plot(x_axis, nn.trn_metric_hist[metric], c='r', linewidth=2, label='trn')
    axs[i].plot(x_axis, nn.val_metric_hist[metric], c='b', linewidth=2, label='val')
    axs[i].set_xlabel('epoch', fontsize=18)
    axs[i].set_ylabel(metric, fontsize=18)
    if metric == loss:
        axs[i].set_ylabel(str(metric)+' (loss)', 
                          fontsize=18)
    axs[i].legend(prop={'size': 14})
    
plt.suptitle('andreiNet model history', fontsize=22)
plt.show()

In [None]:
# Train Keras model
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

np.random.seed(random_state)
start_time = time.time()

model = Sequential()

for l, hidden in enumerate(hidden_layers):
    output_shape = hidden
    if l==0:
        input_shape = X_trn_norm.shape[1]
    else:
        input_shape = hidden_layers[l-1]
    model.add(Dense(output_shape, activation=str(activation).lower(), 
                input_dim=input_shape, 
                kernel_initializer='he_normal'))
    
model.add(Dense(3, activation='softmax',))

sgd = SGD(lr=lr, decay=0.0, momentum=0.00, nesterov=False)

model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

keras_hist = model.fit(X_trn_norm, one_hot_encode(y_trn, 3),
                       epochs=n_epochs,
                       validation_data=(X_test_norm, one_hot_encode(y_test, 3)),
                       batch_size=batch_size,
                       verbose=0)
y_pred_trn = model.predict(X_trn_norm).argmax(axis=1)
y_pred_test = model.predict(X_test_norm).argmax(axis=1)
delta_2 = time.time() - start_time
print("--- %s seconds ---" % (delta_2))
print('trn acc', accuracy(y_pred_trn, y_trn))
print('test acc', accuracy(y_pred_test, y_test))

In [None]:
# Keras training history
fig_size = (12, 5)
x_axis = np.arange(n_epochs)
fig, axs = plt.subplots(1, 2, figsize=fig_size)

axs[0].plot(x_axis, keras_hist.history['acc'], c='r', linewidth=2, label='trn')
axs[0].plot(x_axis, keras_hist.history['val_acc'], c='b', linewidth=2, label='val')
axs[0].set_xlabel('epoch', fontsize=18)
axs[0].set_ylabel('accuracy', fontsize=18)
axs[0].legend(prop={'size': 14})

axs[1].plot(x_axis, keras_hist.history['loss'], c='r', linewidth=2, label='trn')
axs[1].plot(x_axis, keras_hist.history['val_loss'], c='b', linewidth=2, label='val')
axs[1].set_xlabel('epoch', fontsize=18)
axs[1].set_ylabel('categorical_crossentropy', fontsize=18)
axs[1].legend(prop={'size': 14})

plt.suptitle('Keras model history', fontsize=22)
plt.show()

In [None]:
# compare training time
plt.figure(figsize=(6, 6))
trn_times = [delta_1, delta_2]
y_pos = [0, 1]
plt.bar(y_pos, trn_times, color=['blue', 'red'])
plt.xticks(y_pos, ['andreiNet', 'Keras'], fontsize=16) 
plt.ylabel('training time (sec)', fontsize=16)
plt.show()