This note is the record of learning Deep Learning.

The forward and optimizers parts were from [Neural Networks from Scratch in Python](https://nnfs.io).

The backpropagation was from [Prof. Hung-yi Lee @ NTU](https://www.youtube.com/watch?v=ibJpTrp5mcE&list=PLJV_el3uVTsPy9oCRY30oBPNLCo89yu49&index=12).

In [126]:
import numpy as np

# Implement of Neural Network

In [184]:
class Dense_layer:
    """
    Generate a layer of neural network. Note that the outputs 
    are before an activation function (sum(input * weight) + bias).
    """

    def __init__(self, n_inputs: int, n_nodes: int) -> None:
        self.weights = 0.01 * np.random.randn(n_inputs, n_nodes)
        self.bias = np.zeros((1, n_nodes))

    def forward(self, inputs: np.array) -> None:
        # The inputs are either data or outputs from the 
        # previous hidden layer Activation(sum(input * weight) + bias).
        self.inputs = inputs
        self.forward_pass = np.dot(inputs, self.weights) + self.bias

    def backprop(self, dvalues: np.array) -> None:
        # dvalues are derivatives from the output of next hidden layer.
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dinputs = np.dot(dvalues, self.weights.T)
        self.biases = np.sum(dvalues, axis=0, keepdims=True)

In [128]:
class Activation_ReLU:
    """
    Activation function for hidden layers. 
    Output = max(input, 0)
    """

    def forward(self, inputs: np.array) -> None:
        self.inputs = inputs
        self.output = np.maximum(0, inputs)

    def backprop(self, dvalues: np.array) -> None:
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0 # dReLU == 1 if ReLU > 0

In [211]:
class Softmax_loss:
    """
    should only call forward() and backprop().
    """
    def __init__(self):
        self.softmax_probabilities = None
        self.pred_labels = None
        self.accuracy = None
        self.loss = None
        self.dvalues = None
        self.y_true = None

    def y_true_check(self, y_true: np.array) -> None:
        """make sure y_true is 1D array of labels"""
        if len(y_true.shape) == 2:
            self.y_true = np.argmax(y_true, axis=1)
        else:
            self.y_true = y_true

    def softmax(self, inputs: np.array) -> None:
        # The inputs are from the previous dense layer
        # convert inputs to negative values to 0, preventing overflow
        # the output will not change due to normalization
        # the ouputs is probabilities
        inputs_exp = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        self.softmax_probabilities = inputs_exp / np.sum(inputs_exp, axis=1, keepdims=True)

    def output_labels(self) -> None:
        self.pred_labels = np.argmax(self.softmax_probabilities, axis=1)

    def calculate_accuracy(self) -> None:
        self.accuracy = np.mean(self.pred_labels == self.y_true)

    def cross_entropy(self) -> None:
        """
        Loss = -sum(y_true x log(y_predict))
        This class simplifies the loss to -log(correct_class_condifence): Loss = -log(y_predict))

        y_pred: probabilities from softmax activation function
        y_true: 1D np.array
        """
        predicts_clip = np.clip(self.softmax_probabilities, 1e-7, 1 - 1e-7) # prevent log(0)
        self.confidences = predicts_clip[range(len(predicts_clip)), self.y_true]
        self.loss = np.mean(-np.log(self.confidences))

    def forward(self, inputs: np.array, y_true: np.array) -> None:
        self.y_true_check(y_true)
        self.softmax(inputs)
        self.output_labels()
        self.calculate_accuracy()
        self.cross_entropy()

    def backprop(self):
        # y_pred is the probabilities from softmax activation function
        self.dvalues = self.softmax_probabilities.copy()
        self.dvalues[range(len(self.y_true)), self.y_true] -= 1
        self.dvalues = self.dvalues / len(self.y_true)

In [188]:
class Optimizer_SGD:
    """
    Stochastic gradient descent optimizer
    """

    def __init__(self, learning_rate):
        self.learning_rate = learning_rate

    def update_param(self, layer):
        layer.weights -= self.learning_rate * layer.dweights
        layer.biases -= self.learning_rate * layer.dbiases

In [169]:
import nnfs
from nnfs.datasets import spiral_data
nnfs.init()

In [171]:
X, y = spiral_data( samples = 100 , classes = 3 )

In [215]:
dense1 = Dense_layer( 2 , 64 )
activation1 = Activation_ReLU()
dense2 = Dense_layer( 64 , 3 )
output = Softmax_loss()
optimizer = Optimizer_SGD(1)

for epoch in range (1000):
    # Perform a forward pass
    dense1.forward(X)
    activation1.forward(dense1.forward_pass)
    dense2.forward(activation1.output)
    loss = output.forward(dense2.forward_pass, y)
    # Perform a backpropgation
    output.backprop()
    dense2.backprop(output.dvalues)
    activation1.backprop(dense2.dinputs)
    dense1.backprop(activation1.dinputs)

    optimizer.update_param(dense1)
    optimizer.update_param(dense2)

    if epoch % 100 == 0:
        print(f"epoch: {epoch}, acc: {output.accuracy :.3f}, loss: {output.loss :.3f}")

TypeError: Activation_ReLU.backprop() takes 1 positional argument but 2 were given

In [None]:
class Optimizer_adam:
    """
    Adaptive momentum optimizer
    """

    def __init__(self) -> None:
        pass

In [None]:
softmax_outputs = np.array([[ 0.7 , 0.1 , 0.2 ],
[ 0.1 , 0.5 , 0.4 ],
[ 0.02 , 0.9 , 0.08 ]])
class_targets = np.array([ 0 , 1 , 1 ])


out_layer = Softmax_loss()
out_layer.softmax_probabilities = softmax_outputs
out_layer.y_true_check(class_targets)
out_layer.output_labels()
out_layer.calculate_accuracy()
out_layer.cross_entropy()
out_layer.backprop()

print(out_layer.softmax_probabilities)
print(out_layer.pred_labels)
print(out_layer.accuracy)
print(out_layer.loss)
print(out_layer.dvalues)
print(out_layer.y_true)

In [175]:
class Dummy:
    def __init__(self):
        self.a = "hello"

    def make_b(self, string):
        self.b = string

In [176]:
dummy = Dummy()
dummy.a

'hello'

In [179]:
dummy.make_b("yeap")

In [180]:
dummy.b

'yeap'

In [201]:
a = np.arange(9).reshape(3,3)
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [203]:
np.max(a, axis=1, keepdims=True)

array([[2],
       [5],
       [8]])