# Neural Networks From Scratch
Credits: [sentdex](https://www.youtube.com/playlist?list=PLQVvvaa0QuDcjD5BAw2DxE6OF2tius3V3), [book](https://nnfs.io/)

## A Signle Neuron 

In [1]:
inputs  = [1,     2,    3, 2.5]
weights = [0.2, 0.8, -0.5, 1.0]
bias    = 2

output = inputs[0]*weights[0] + inputs[1]*weights[1] + inputs[2]*weights[2] + inputs[3]*weights[3] + bias
print(output)

4.8


## Multiple Neurons

### pure python

In [2]:
inputs  = [     1,     2,    3,   2.5  ] # 1x4

weights = [ [ 0.2,   0.8,  -0.5,  1.0],
            [ 0.5,  -0.91, 0.26, -0.5],
            [-0.26, -0.27, 0.17,  0.87] ] # 3x4

biases  = [ 2, 3, 0.5 ] # 1x3

layer_outputs = [] 
for neuron_weights, neuron_bias in zip(weights, biases):
    neuron_output = 0
    for n_input, weight in zip(inputs, neuron_weights):
        neuron_output += n_input*weight
    neuron_output += neuron_bias
    layer_outputs.append(neuron_output)

print(layer_outputs)

[4.8, 1.21, 2.385]


### numpy

In [3]:
import numpy as np

inputs  = [     1,     2,    3,   2.5  ]

weights = [ [ 0.2,   0.8, -0.5,   1.0],
            [ 0.5,  -0.91, 0.26, -0.5],
            [-0.26, -0.27, 0.17,  0.87] ]

biases  = [ 2, 3, 0.5 ]

layer_outputs = np.dot(weights, inputs) + biases

print(layer_outputs)

[4.8   1.21  2.385]


## Batches

In [4]:
# multiple inputs (batch)
import numpy as np

inputs  = [ [ 1,   2,   3,    2.5],
            [ 2,   5,  -1,    2  ],
            [-1.5, 2.7, 3.3, -0.8] ]

weights = [ [ 0.2,   0.8, -0.5,   1.0],
            [ 0.5,  -0.91, 0.26, -0.5],
            [-0.26, -0.27, 0.17,  0.87] ]

biases  = [ 2, 3, 0.5 ]

layer_outputs = np.dot(inputs, np.array(weights).T) + biases
# column wise outputs
# layer_outputs = np.dot(weights, np.array(inputs).T) + np.reshape(biases, (3,1))

print(layer_outputs)

[[ 4.8    1.21   2.385]
 [ 8.9   -1.81   0.2  ]
 [ 1.41   1.051  0.026]]


## Layers

In [5]:
# multiple layers --> n_(l+1) x n_l
import numpy as np

inputs   = [ [ 1,   2,   3,    2.5],
            [ 2,   5,  -1,    2  ],
            [-1.5, 2.7, 3.3, -0.8] ] # m x n = 3 x 4 
           
weights  = [ [ 0.2,   0.8, -0.5,   1.0],
            [ 0.5,  -0.91, 0.26, -0.5],
            [-0.26, -0.27, 0.17,  0.87] ] # n_l1 x n = 3 x 4

biases   = [ 2, 3, 0.5 ] # n_l1 = 3

weights2 = [ [ 0.1, -0.14, 0.5  ],
            [ 0.5,   0.12, -0.33] ] # n_l2 x n_l1 = 2 x 3

biases2  = [ -1, 2 ] # n_l2 = 2

layer1_outputs = np.dot(inputs, np.array(weights).T) + biases # m x n_l1 = 3 x 3
layer2_outputs = np.dot(layer1_outputs, np.array(weights2).T) + biases2 # m x n_l2 = 3 x 2

print(layer2_outputs)

[[ 0.5031   3.75815]
 [ 0.2434   6.1668 ]
 [-0.99314  2.82254]]


## A Dense Layer Object

In [6]:
# using objects for generality
import numpy as np

np.random.seed(0)

X = [ [ 1,   2,   3,    2.5],
      [ 2,   5,  -1,    2  ],
      [-1.5, 2.7, 3.3, -0.8] ]
           
class Layer_Dense:
    
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.1 * np.random.randn(n_inputs, n_neurons)
        self.biases  = np.zeros((1, n_neurons))
        
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases
    
layer1 = Layer_Dense(4, 5)
layer2 = Layer_Dense(5, 2)

layer1.forward(X)
print(layer1.output)

layer2.forward(layer1.output)
print(layer2.output)

[[ 0.10758131  1.03983522  0.24462411  0.31821498  0.18851053]
 [-0.08349796  0.70846411  0.00293357  0.44701525  0.36360538]
 [-0.50763245  0.55688422  0.07987797 -0.34889573  0.04553042]]
[[ 0.148296   -0.08397602]
 [ 0.14100315 -0.01340469]
 [ 0.20124979 -0.07290616]]


## Activation Functions

### Dataset

In [7]:
!pip install nnfs



In [8]:
# using ReLU activation functions

import numpy as np
import nnfs # for data set

nnfs.init()
from nnfs.datasets import spiral_data

# 100 samples per class
X, y = spiral_data(samples=100, classes=3)
X.shape, y.shape

((300, 2), (300,))

### ReLU for hidden layers and Softmax for output layer
- Avoid possible overflow as exponentiation may lead to large values:

    $z_i = z_i - max(z)$ for each row $z$ in input

    $=> Z_{i,j} \le 0$

    $=> e^{Z_{i,j}} \in [0,1]$

In [9]:
class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.1 * np.random.randn(n_inputs, n_neurons)
        self.biases  = np.zeros((1, n_neurons))
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases
    
class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

class Activation_Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        self.output = exp_values/np.sum(exp_values, axis=1, keepdims=True)
        
# Hidden Layer
denseLayer1 = Layer_Dense(2, 3)
activation1 = Activation_ReLU()

denseLayer1.forward(X)
activation1.forward(denseLayer1.output)

# Output Layer
denseLayer2 = Layer_Dense(3, 3)
activation2 = Activation_Softmax()

denseLayer2.forward(activation1.output)
activation2.forward(denseLayer2.output)

# Probability for each sample (300) for each class (3)
print(activation2.output.shape)

(300, 3)


## Loss with Categorical Cross-Entropy

- One way to avoid zeros in log for the cross-entropy log function is to clip the outputs (y_preds).
- Handling on-hot or not-one-hot encoded y_ture values
    - No-one-hot: [1,0,1,2]: use numpy fancy slicing used to get the probabilities (from y_pred) for which y_true is 1 (in the one-hot encoded vector) for each sample.
    - One-hot: [[0,1,0,0],[1,0,0,0],[0,1,0,0],[0,0,0,1]]: Use element-wise multiplicaiton and then sum along columns.

In [31]:
# Dense Layer
class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.1 * np.random.randn(n_inputs, n_neurons)
        self.biases  = np.zeros((1, n_neurons))
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases

# Activation Functions    
class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

class Activation_Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        self.output = exp_values/np.sum(exp_values, axis=1, keepdims=True)

# Loss
class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output, y)
        data_loss = np.mean(sample_losses)
        return data_loss

class Loss_CategoricalCrossEntropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
        if len(y_true.shape)==1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
        elif len(y_true.shape)==1:
            correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)
        neg_log_likelihoods = -np.log(correct_confidences)
        return neg_log_likelihoods

# Hidden Layer
denseLayer1 = Layer_Dense(2, 3)
activation1 = Activation_ReLU()

denseLayer1.forward(X)
activation1.forward(denseLayer1.output)

# Output Layer
denseLayer2 = Layer_Dense(3, 3)
activation2 = Activation_Softmax()

denseLayer2.forward(activation1.output)
activation2.forward(denseLayer2.output)

# Probability for each sample (300) for each class (3)
print(activation2.output.shape)

# Loss
loss_function = Loss_CategoricalCrossEntropy()
loss = loss_function.calculate(activation2.output, y)
print(loss)

# Accuracy
accuracy = np.mean(np.argmax(activation2.output, axis=1) == y)*100
print(f"Accuracy = {accuracy}")


(300, 3)
1.0983309
Accuracy = 33.666666666666664
