# Implementing a Neural Network

This is inspired by <https://pub.towardsai.net/building-neural-networks-from-scratch-with-python-code-and-math-in-detail-i-536fae5d7bbf>'s "case study" with corrections.

In [1]:
%matplotlib inline

In [2]:
import time
import io

import numpy
import pandas
import matplotlib

import torch

In [3]:
input_csv = """
observation,input1,input2,output
1,0,0,0
2,0,1,1
3,1,0,1
4,1,1,1
"""

dataset = pandas.read_csv(io.StringIO(input_csv), index_col="observation")
inputs = dataset.iloc[:,:-1].to_numpy().astype('float32')
ground_truth = dataset.iloc[:,-1].to_numpy().reshape(-1, 1).astype('float32')

In [4]:
print(inputs)
print(ground_truth)

[[0. 0.]
 [0. 1.]
 [1. 0.]
 [1. 1.]]
[[0.]
 [1.]
 [1.]
 [1.]]


In [5]:
LEARNING_RATE = 0.05
NUM_ITERATIONS = 10000

## By Hand

In [6]:
def linear(x, weights, bias):
    return numpy.dot(x, weights) + bias

def sigmoid(x):
    return 1.0 / (1.0 + numpy.exp(-x))

def d_sigmoid(x):
    y = sigmoid(x)
    return y * (1.0 - y)

In [7]:
weights = numpy.random.rand(inputs.shape[1], 1)
bias = numpy.random.rand(1)[0]

In [8]:
t0 = time.time()
for i in range(NUM_ITERATIONS):
    y = linear(inputs, weights, bias)
    f = sigmoid(y)
    
    error = numpy.abs(f - ground_truth)
    
    # calculate out partial derivatives for each input
    dE_df = error/(f - ground_truth)
    df_dy = d_sigmoid(y)
    dE_dy = dE_df * df_dy
    dE_dw = numpy.dot(inputs.T, dE_dy)  # dy_dw = x

    # update weights and biases - the error is the sum of error over each input
    weights -= LEARNING_RATE * dE_dw
    bias -= LEARNING_RATE * dE_dy.sum()

    if i % (NUM_ITERATIONS / 10) == 0:
        print("error at step {:5d}: {:10.2e}".format(i, error.sum()))

print("Final weights: {}".format(weights.flatten()))
print("Final bias:    {}".format(bias))
print("{:d} iterations took {:.1f} seconds".format(NUM_ITERATIONS, time.time() - t0))

error at step     0:   1.13e+00
error at step  1000:   2.64e-01
error at step  2000:   1.16e-01
error at step  3000:   7.28e-02
error at step  4000:   5.26e-02
error at step  5000:   4.10e-02
error at step  6000:   3.36e-02
error at step  7000:   2.84e-02
error at step  8000:   2.46e-02
error at step  9000:   2.17e-02
Final weights: [9.9616462 9.9616605]
Final bias:    -4.515347139703512
10000 iterations took 1.6 seconds


In [9]:
predicted_output = sigmoid(linear(inputs, weights, bias))
predicted_output = pandas.DataFrame(
    predicted_output,
    columns=["prediction"],
    index=dataset.index)

output = pandas.concat(
    (dataset, predicted_output),
    axis=1)
output['error'] = output['output'] - output['prediction']
output

Unnamed: 0_level_0,input1,input2,output,prediction,error
observation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0,0,0,0.010821,-0.01082142
2,0,1,1,0.995706,0.004293658
3,1,0,1,0.995706,0.004293719
4,1,1,1,1.0,2.034268e-07


## PyTorch

In [10]:
# torch.manual_seed(0)

model = torch.nn.Sequential(
    torch.nn.Linear(inputs.shape[1], 1),
    torch.nn.Sigmoid())

print("Starting weights: {}".format(model[0].weight.flatten()))
print("Starting bias: {}".format(model[0].bias.flatten()))

Starting weights: tensor([-0.0930, -0.5388], grad_fn=<ViewBackward>)
Starting bias: Parameter containing:
tensor([-0.1633], requires_grad=True)


In [11]:
inputs_tensor = torch.from_numpy(inputs)
truth_tensor = torch.from_numpy(ground_truth.reshape(-1, 1))

loss = torch.nn.L1Loss(reduction='sum')

optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

model.train()
t0 = time.time()
for i in range(NUM_ITERATIONS):
    f = model(inputs_tensor)

    error = loss(f, truth_tensor)

    optimizer.zero_grad()

    error.backward()

    optimizer.step()

    if i % (NUM_ITERATIONS / 10) == 0:
        print("error at step {:5d}: {:10.2e}".format(i, error.sum()))

print("Final weights: {}".format(next(model.parameters()).detach().numpy().flatten()))
print("Final bias:    {}".format(list(model.parameters())[-1].item()))
print("{:d} iterations took {:.1f} seconds".format(NUM_ITERATIONS, time.time() - t0))

error at step     0:   2.38e+00
error at step  1000:   2.71e-01
error at step  2000:   1.18e-01
error at step  3000:   7.34e-02
error at step  4000:   5.29e-02
error at step  5000:   4.12e-02
error at step  6000:   3.37e-02
error at step  7000:   2.85e-02
error at step  8000:   2.47e-02
error at step  9000:   2.18e-02
Final weights: [9.957234 9.957111]
Final bias:    -4.513090133666992
10000 iterations took 8.1 seconds


In [12]:
model.eval()

predicted_output = model(inputs_tensor).detach().numpy()
predicted_output = pandas.DataFrame(
    predicted_output,
    columns=["prediction"],
    index=dataset.index)

output = pandas.concat(
    (dataset, predicted_output),
    axis=1)
output['error'] = output['output'] - output['prediction']
output

Unnamed: 0_level_0,input1,input2,output,prediction,error
observation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0,0,0,0.010846,-0.01084561
2,0,1,1,0.995697,0.004303455
3,1,0,1,0.995697,0.004302979
4,1,1,1,1.0,2.384186e-07
