# Neural Network w/ backpropagation in Python from scratch

Lecture: https://www.youtube.com/watch?v=59Hbtz7XgjM
Post: https://cs231n.github.io/optimization-2/
Post: https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/
Video: https://www.youtube.com/watch?v=4shguqlkTDM
Code Inspiration: https://github.com/yacineMahdid/artificial-intelligence-and-machine-learning/blob/master/deep-learning-from-scratch-python/multi_layer_perceptron.ipynb (different data, try out different activations - sigmoid, ReLu, tanh)
Code Inspiration 2: https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/

### Functional Implementation

In [1]:
import numpy as np
from csv import reader

In [2]:
train = [[2.7810836,2.550537003, 0],
           [1.465489372,2.362125076, 0],
           [3.396561688,4.400293529, 0],
           [1.38807019,1.850220317, 0],
           [3.06407232,3.005305973, 0],
           [7.627531214,2.759262235, 1],
           [5.332441248,2.088626775, 1],
           [6.922596716,1.77106367, 1],
           [8.675418651,-0.242068655, 1],
           [7.673756466,3.508563011, 1]]

In [3]:
# 1. Initialize network with weights

n_hidden = 1 # number of hidden layers
n_inputs = len(train[0][:-1]) # number of features
n_hidden_neurons = [1, 1] # number of neurons in hidden layer
n_outputs = 2 # number of possible outputs to be predicted

def init_network(n_inputs, n_hidden, n_hidden_neurons, n_outputs):

    network = []
    
    # number of parameters = 1 per input (features in original data) + bias = n_inputs + 1
    # number of neurons in layer = n_hidden_neurons[i]
    i = 0
    for n in range(n_hidden):
        if i == 0:
            hidden_layer = [{'params': [np.random.rand() for n in range(n_inputs + 1)]} for n in range(n_hidden_neurons[i])]
        else:
            hidden_layer = [{'params': [np.random.rand() for n in range(n_hidden_neurons[i-1] + 1)]} for n in range(n_hidden_neurons[i])]
        network.append(hidden_layer)
        i += 1
        
    # number of parameters = 1 per input (neurons in previous hidden layer) + bias = n_hidden + 1
    # number of neurons in layer = n_outputs
    output_layer = [{'params': [np.random.rand() for n in range(n_hidden_neurons[i-1] + 1)]} for n in range(n_outputs)]
    network.append(output_layer)
    
    return network

def print_layers(network):
    i = 0
    for layer in network:
        if i < n_hidden:
            print(f'HIDDEN LAYER {i+1}')
            print(layer)
            print(' ')
        if i == n_hidden:
            print('OUTPUT LAYER')
            print(layer)
        i += 1
        
network = init_network(n_inputs, n_hidden, n_hidden_neurons, n_outputs)

print_layers(network)

HIDDEN LAYER 1
[{'params': [0.8485114470477703, 0.17374294525312173, 0.49150058797919804]}]
 
OUTPUT LAYER
[{'params': [0.6996503849546813, 0.7626411539657336]}, {'params': [0.9528642831796602, 0.005538027976710769]}]


In [4]:
# Activation functions
def sigmoid(output):
    return 1.0 / (1.0 + np.exp(-output))

def ReLu(output):
    return max(0, output)

def tanh(output):
    return (np.exp(output)-np.exp(-output))/(np.exp(output)+np.exp(-output))

activation_functions = ('sigmoid', 'ReLu', 'tanh')

In [5]:
# 2. Forward propagate

# Calculates the output of a single neuron -> (weights * inputs) + bias
def calc_neuron_output(params, inputs):
    bias = params[-1]
    output = bias
    for i in range(len(params) - 1): # for every weight
        output += params[i] * inputs[i]
    return output

def forward_propagate(network, inputs):
    
    for layer in network:
        
        # this list will store the activated output of each neuron to be the input of the next layer
        # (in case the current layer is a hidden layer). Otherwise, this list will represent the outputs of the model
        next_inputs = []
        
        for neuron in layer:
            neuron_out = calc_neuron_output(neuron['params'], inputs) # linear output of neuron
            neuron['output_activated'] = sigmoid(neuron_out) # sigmoid activation of linear output
            next_inputs.append(neuron['output_activated'])
            
        inputs = next_inputs

    return inputs # outputs of output layer

forward_propagate(network, train[0][:-1])

[0.808031516804913, 0.715922515183487]

In [6]:
# Derivatives of activation functions
def d_sigmoid(s):
    return s*(1-s)

def d_ReLu(r):
    return 1 if r > 0 else 0

def d_tanh(t):
    return 1-t**2

In [7]:
print_layers(network)

HIDDEN LAYER 1
[{'params': [0.8485114470477703, 0.17374294525312173, 0.49150058797919804], 'output_activated': 0.9642368756174116}]
 
OUTPUT LAYER
[{'params': [0.6996503849546813, 0.7626411539657336], 'output_activated': 0.808031516804913}, {'params': [0.9528642831796602, 0.005538027976710769], 'output_activated': 0.715922515183487}]


In [8]:
# 3. Back propagate error

# delta (error) for neuron in output layer = (y_pred-y_expected) * d_actv
# delta (error) for neuron in hidden layers = sum(all connected weights from top layer * corresponding delta)

# i is the ith layer of the network we are iterating through
# expected_output are the expected outputs of the network (neurons in the output layer)
##([1,0] for 0 , [0,1] for 1) (answer corresponds to the index where 1 is)

def backpropagate(network, i, expected_output):
    
    # Base case -- backpropagation starts in output layer
    if i == n_hidden:

        for n in range(len(network[i])): # loop through each neuron in the layer i of the network (output layer)
            neuron = network[i][n] # current neuron
            error = neuron['output_activated'] - expected_output[n] # error for output in neuron n of output layer
            neuron['delta'] = error * d_sigmoid(neuron['output_activated'])
        return
    # End of base case

    errors = backpropagate(network, i + 1, expected_output)
    
    for n in range(len(network[i])): # loop through each neuron in the layer i of the network (hidden layer)
        neuron = network[i][n] # current neuron
        error = 0.0
        for top_neuron in network[i+1]: # for each neuron in layer above
            # (weights of top layer that the neuron output was multiplied by) * (corresponding delta)
            error += top_neuron['params'][n] * top_neuron['delta']
        neuron['delta'] = error * d_sigmoid(neuron['output_activated'])
        
    return
            
backpropagate(network, 0, [1,0])

In [9]:
print_layers(network)

HIDDEN LAYER 1
[{'params': [0.8485114470477703, 0.17374294525312173, 0.49150058797919804], 'output_activated': 0.9642368756174116, 'delta': 0.004065871017771854}]
 
OUTPUT LAYER
[{'params': [0.6996503849546813, 0.7626411539657336], 'output_activated': 0.808031516804913, 'delta': -0.029777495474596658}, {'params': [0.9528642831796602, 0.005538027976710769], 'output_activated': 0.715922515183487, 'delta': 0.14560250801902794}]


In [10]:
# 4. Train network

def update_weights(network, row, lr):
    for i in range(len(network)): # for every layer in the network
        inputs = row[:-1] # take training inputs
        if i != 0: # for all layers except the first
            inputs = [neuron['output_activated'] for neuron in network[i-1]] # inputs are the output of the previous layer
        for neuron in network[i]: # for every neuron in the layer
            for j in range(len(inputs)): # for every input to the layer (every weight in the neuron)
                neuron['params'][j] -= lr * neuron['delta'] * inputs[j] # weight update
            neuron['params'][-1] -= lr * neuron['delta'] # bias update

def train_network(network, training_data, lr, n_epochs):
    
    for n_epoch in range(n_epochs):
        sse = 0.0
        
        for row in training_data:
            output = forward_propagate(network, row)
            expected = [0 for i in range(len(output))] # initialize to an array of 0s of same size as outputs
            expected[row[-1]] = 1 # if actual output is 1, expected is [0,1], if 0 it is [1,0]
            sse += sum([(expected[i]-output[i])**2 for i in range(len(expected))])
            backpropagate(network, 0, expected)
            update_weights(network, row, lr)
        
        if n_epoch % 10 == 0:
            print('>epoch=%d, error=%.3f' % (n_epoch, sse)) 

In [11]:
def backprop(training_data, lr, n_epochs, n_inputs, n_hidden, n_hidden_neurons, n_outputs):
    model = init_network(n_inputs, n_hidden, n_hidden_neurons, n_outputs)
    train_network(model, training_data, lr, n_epochs)
    
    return model
    
lr = 0.2
training_data = train
n_epochs = 500

model = backprop(training_data, lr, n_epochs, n_inputs, n_hidden, n_hidden_neurons, n_outputs)

>epoch=0, error=6.361
>epoch=10, error=5.136
>epoch=20, error=5.118
>epoch=30, error=5.117
>epoch=40, error=5.116
>epoch=50, error=5.114
>epoch=60, error=5.112
>epoch=70, error=5.109
>epoch=80, error=5.105
>epoch=90, error=5.099
>epoch=100, error=5.090
>epoch=110, error=5.071
>epoch=120, error=5.012
>epoch=130, error=4.658
>epoch=140, error=3.685
>epoch=150, error=2.620
>epoch=160, error=1.858
>epoch=170, error=1.371
>epoch=180, error=1.056
>epoch=190, error=0.845
>epoch=200, error=0.697
>epoch=210, error=0.589
>epoch=220, error=0.507
>epoch=230, error=0.444
>epoch=240, error=0.394
>epoch=250, error=0.353
>epoch=260, error=0.319
>epoch=270, error=0.291
>epoch=280, error=0.267
>epoch=290, error=0.247
>epoch=300, error=0.229
>epoch=310, error=0.214
>epoch=320, error=0.200
>epoch=330, error=0.188
>epoch=340, error=0.177
>epoch=350, error=0.167
>epoch=360, error=0.159
>epoch=370, error=0.151
>epoch=380, error=0.144
>epoch=390, error=0.137
>epoch=400, error=0.131
>epoch=410, error=0.126
>ep

In [12]:
#### 5. Predict
def predict(model, train):
    
    preds = [] # stores the predictions of all data points of the training data
    
    for row in training_data:
        label = row[-1]
        features = row[:-1]
        outputs = forward_propagate(model, features)
        pred = outputs.index(max(outputs))
        preds.append(pred)
    
    return preds

predict(model, train)

[0, 0, 0, 0, 0, 1, 1, 1, 1, 1]

In [13]:
# Load a csv file
def load_csv(filename):
    dataset = list()
    with open(filename, 'r') as file:
        csv_reader = reader(file)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

In [14]:
# 6. Predict on fraud detection dataset - https://www.kaggle.com/datasets/whenamancodes/fraud-detection?resource=download

ccard_data = load_csv('creditcard.csv')
ccard_data_mod = []
i = 0
for row in ccard_data: # take only first 2 features and output, convert from string to float/integer
    if i != 0:
        new_row = row[1:3]
        for j in range(len(new_row)):
            new_row[j] = float(new_row[j])
        new_row.append(int(row[30]))

        ccard_data_mod.append(new_row)
    i += 1

traincc = ccard_data_mod[:8000]
testcc = ccard_data_mod[8000:10000]

lr = 0.1
training_data = traincc
n_epochscc = 500

n_hiddencc = 1 # number of hidden layers
n_inputscc = len(traincc[0][:-1]) # number of features
n_hidden_neuronscc = [1,1] # number of neurons in hidden layer
n_outputscc = 2 # number of possible outputs to be predicted

modelcc = backprop(traincc, lr, n_epochscc, n_inputscc, n_hiddencc, n_hidden_neuronscc, n_outputscc)

>epoch=0, error=116.986
>epoch=10, error=50.194
>epoch=20, error=49.965
>epoch=30, error=49.867
>epoch=40, error=49.701
>epoch=50, error=49.284
>epoch=60, error=49.055
>epoch=70, error=48.903
>epoch=80, error=48.730
>epoch=90, error=48.488
>epoch=100, error=48.132
>epoch=110, error=47.580
>epoch=120, error=46.655
>epoch=130, error=44.975
>epoch=140, error=41.818
>epoch=150, error=36.870
>epoch=160, error=32.243
>epoch=170, error=29.271
>epoch=180, error=27.303
>epoch=190, error=25.972
>epoch=200, error=25.044
>epoch=210, error=24.366
>epoch=220, error=23.843
>epoch=230, error=23.423
>epoch=240, error=23.076
>epoch=250, error=22.784
>epoch=260, error=22.534
>epoch=270, error=22.317
>epoch=280, error=22.129
>epoch=290, error=21.963
>epoch=300, error=21.817
>epoch=310, error=21.687
>epoch=320, error=21.571
>epoch=330, error=21.467
>epoch=340, error=21.373
>epoch=350, error=21.288
>epoch=360, error=21.212
>epoch=370, error=21.142
>epoch=380, error=21.079
>epoch=390, error=21.021
>epoch=400

In [18]:
# Check accuracy in training data
preds = predict(modelcc, traincc)
confusion_matrix = {'TPs': 0, 'TNs': 0, 'FPs': 0, 'FNs': 0}

for i in range(len(preds)):
    label = traincc[i][-1]
    pred = preds[i]
    if label == 1 and pred == 1: # truly predicted positive
        confusion_matrix['TPs'] += 1
    elif label == 0 and pred == 1: # falsely predicted positive
        confusion_matrix['FPs'] += 1
    elif label == 1 and pred == 0: # falsely predicted negative
        confusion_matrix['FNs'] += 1
    elif label == 0 and pred == 0: # truly predicted negative
        confusion_matrix['TNs'] += 1

print(confusion_matrix)

# Make class for confusion matrix. In it have accuracy, precision, recall, false positive rate, false negative rate
# Create confusion matrix for the training set and calculate measures

# Create confusion matrix for the test set and calculate measures

{'TPs': 15, 'TNs': 7975, 'FPs': 0, 'FNs': 10}


In [16]:
# 7. Predict on titanic dataset - https://www.kaggle.com/competitions/titanic/data

### OOP Implementation