# History of ANNs
Let's start with perceptrons.

First, I'm going to way overcomplicate things by training my own single layer perceptron.
Then I will do the same thing using Scikit Learn.

In [133]:
import numpy as np

def heaviside_step_function(x):
    return 0 if x < 0 else 1

def sign_step_function(x):
    return -1 if x < 0 else 1 if x > 0 else 0

class SkylersPerceptron:
    def __init__(self, input_count, output_count):
        self.input_count = input_count
        self.output_count = output_count

        # vectorize the activation function so I can use it over the matrix
        self.activation_fn = np.vectorize(heaviside_step_function)

        # Initialize a [input_count x output_count] matrix with weights [-1, 1)
        # (Row_i, Col_j) is the weight from input i to output j.
        self.weights = np.random.rand(input_count, output_count)*2 - 1

        # Initialize a [1 x output_count] matrix with bias weights.
        self.bias = 1
        self.bias_weights = np.random.rand(output_count)*2 - 1

    def run(self, inputs: np.ndarray) -> np.ndarray:
        # Evaluate the perceptron for a single case
        # Inputs must be a [input_count] matrix.
        # Returns a [output_count] matrix of results.
        if (inputs.shape[0] != self.input_count):
            raise Exception(f"Input vector columns must match input count. Expected: {self.input_count} Received: {inputs.shape[0]}")
        
        activation = inputs.dot(self.weights) + (self.bias * self.bias_weights)
        output = self.activation_fn(activation)
        return output

    def train(self, inputs: np.ndarray, expected_outputs: np.ndarray, iters=1000, train_rate=0.01, debug=False)  -> None:
        # My first naive attempt at perceptron training.
        # Run a set number of iterations using Hebbian learning.
        # "Cells that fire together, wire together"
        def hebbian_next_weight(weight, input, activation, expected_activation, train_rate):
            if debug:
                print(f"   {weight=}; {input=}; {activation=}; {expected_activation=}; (delta={(input * (activation - expected_activation) * train_rate)})")
            return weight + (input * (expected_activation - activation) * train_rate)

        test_cases = inputs.shape[0]
        test_order = [x for x in range(test_cases)]
        np.random.shuffle(test_order)
        
        for epoch in range(iters):
            epoch_test_idx = test_order[epoch % test_cases]
            epoch_inputs = inputs.take(epoch_test_idx, axis=0)

            epoch_outputs = self.run(epoch_inputs)
            epoch_expected = expected_outputs.take(epoch_test_idx, axis=0)

            if debug:
                print(f"Epoch {epoch} ---------------------------------------------------")

            next_weights = np.array([[hebbian_next_weight(self.weights[i, j], epoch_inputs[i], epoch_outputs[j], epoch_expected[j], train_rate)
                            for j in range(self.output_count)]
                            for i in range(self.input_count)])
            
            next_bias_weights = np.array([hebbian_next_weight(self.bias_weights[j], self.bias, epoch_outputs[j], epoch_expected[j], train_rate)
                            for j in range(self.output_count)])
            
            if debug:
                print(f"[Test: {epoch_test_idx}; Inputs: {epoch_inputs}; Outputs: {epoch_outputs}; Expected: {epoch_expected}]\nNext Weights:\n{next_weights}\nNext Bias:\n{next_bias_weights}")

            self.weights = next_weights
            self.bias_weights = next_bias_weights

## Training on binary operators
For simple training I will use binary operators like AND / OR.

In [180]:
def train_and_evaluate_perceptron(bunch, iters=1000, train_rate=0.01, debug=False):
    name = bunch['name']
    data = bunch['data']
    target = bunch['target']

    test_cases = data.shape[0]
    input_count = data.shape[1]
    output_count = target.shape[1]
    perceptron = SkylersPerceptron(input_count, output_count)

    print(f"Training and Evaluating on \"{name}\"")
    print(f"Starting Weights:\n{perceptron.weights}\n{perceptron.bias_weights}")
    perceptron.train(data, target, iters, train_rate, debug)
    print(f"Ending Weights:\n{perceptron.weights}\n{perceptron.bias_weights}")
    print("")
    print("=== RESULTS ===")
    success_count = 0
    total_count = 0
    for test_case in range(test_cases):
        input = data.take(test_case, axis=0)
        expected_output = target.take(test_case, axis=0)
        output = perceptron.run(input)
        success = np.all(expected_output == output)
        total_count += 1
        success_count += 1 if success else 0
        print(f"[{test_case}] pass={success}; output={output}; target={expected_output}; input={input}")
    print(f"{success_count}/{total_count} passed")

In [181]:
bin_AND_data = {
    'name': 'Binary AND',
    'data': np.array([
        [0, 0],
        [0, 1],
        [1, 0],
        [1, 1],
        [1, 1],
        [1, 1],
    ]),
    'target': np.array([
        [0,],
        [0,],
        [0,],
        [1,],
        [1,],
        [1,],
    ]),
}

train_and_evaluate_perceptron(bin_AND_data)

Training and Evaluating on "Binary AND"
Starting Weights:
[[0.32987437]
 [0.40996836]]
[-0.55287398]
Ending Weights:
[[0.32987437]
 [0.40996836]]
[-0.55287398]

=== RESULTS ===
[0] pass=True; output=[0]; target=[0]; input=[0 0]
[1] pass=True; output=[0]; target=[0]; input=[0 1]
[2] pass=True; output=[0]; target=[0]; input=[1 0]
[3] pass=True; output=[1]; target=[1]; input=[1 1]
[4] pass=True; output=[1]; target=[1]; input=[1 1]
[5] pass=True; output=[1]; target=[1]; input=[1 1]
6/6 passed


In [182]:
bin_OR_data = {
    'name': 'Binary OR',
    'data': np.array([
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 1],
        [1, 0],
        [1, 1],
    ]),
    'target': np.array([
        [0,],
        [0,],
        [0,],
        [1,],
        [1,],
        [1,],
    ]),
}

train_and_evaluate_perceptron(bin_OR_data)

Training and Evaluating on "Binary OR"
Starting Weights:
[[-0.25602411]
 [ 0.09834614]]
[-0.75367012]
Ending Weights:
[[0.18397589]
 [0.36834614]]
[-0.18367012]

=== RESULTS ===
[0] pass=True; output=[0]; target=[0]; input=[0 0]
[1] pass=True; output=[0]; target=[0]; input=[0 0]
[2] pass=True; output=[0]; target=[0]; input=[0 0]
[3] pass=True; output=[1]; target=[1]; input=[0 1]
[4] pass=True; output=[1]; target=[1]; input=[1 0]
[5] pass=True; output=[1]; target=[1]; input=[1 1]
6/6 passed


# Training on the Iris Dataset
The Iris dataset is a famous (and easy) classification problem.

We are using the Scikit learn dataset which is pretty convenient. It uses the `Bunch` class with the following keys:
* data - ndarray in shape of (150, 4) - meaning 150 cases and 4 attributes
* target - ndarray in shape of (150) - meaning the target classification for the 150 cases
* feature_names - the names of the 4 input features
* target_names - the names of the 3 output types

In [188]:
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron

iris = load_iris()
iris['name'] = 'Scikit Learn - Iris Dataset'

# The target data format for the iris dataset is a 1D array like [0, ..., 1, ..., 2, ...] instead of a 2D array with the expected output activations.
# Convert it to a 2D array like [ [1, 0, 0], ..., [0, 1, 0], ..., [0, 0, 1], ...].
iris['target'] = np.array([[1 if i == classification else 0 for i in range(3)] for classification in iris['target']])

train_and_evaluate_perceptron(iris, iters=100000, train_rate=0.005)

Training and Evaluating on "Scikit Learn - Iris Dataset"
Starting Weights:
[[ 0.48035811  0.95240209 -0.34267849]
 [ 0.87191074 -0.52407851  0.77433375]
 [-0.46128075  0.00776199  0.74916115]
 [ 0.24276686  0.13071044  0.24010402]]
[-0.8698913   0.83311052 -0.39215664]
Ending Weights:
[[ 0.16735811 -0.03909791 -0.33817849]
 [ 0.72941074 -0.38757851 -0.67666625]
 [-0.69478075  0.19826199  0.59916115]
 [ 0.16576686 -0.55478956  1.44560402]]
[-0.9198913   1.05811052 -1.16715664]

=== RESULTS ===
[0] pass=True; output=[1 0 0]; target=[1 0 0]; input=[5.1 3.5 1.4 0.2]
[1] pass=True; output=[1 0 0]; target=[1 0 0]; input=[4.9 3.  1.4 0.2]
[2] pass=True; output=[1 0 0]; target=[1 0 0]; input=[4.7 3.2 1.3 0.2]
[3] pass=True; output=[1 0 0]; target=[1 0 0]; input=[4.6 3.1 1.5 0.2]
[4] pass=True; output=[1 0 0]; target=[1 0 0]; input=[5.  3.6 1.4 0.2]
[5] pass=True; output=[1 0 0]; target=[1 0 0]; input=[5.4 3.9 1.7 0.4]
[6] pass=True; output=[1 0 0]; target=[1 0 0]; input=[4.6 3.4 1.4 0.3]
[7] p

In [227]:
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split

iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.1)

per_clf = Perceptron()
per_clf.fit(X_train, y_train)

print("=== RESULTS ===")
test_cases = X_test.shape[0]
success_count = 0
total_count = 0
for test_case in range(test_cases):
    input = X_test.take(test_case, axis=0)
    expected_output = y_test.take(test_case, axis=0)
    output = per_clf.predict(input.reshape(1, -1))
    success = np.all(expected_output == output)
    total_count += 1
    success_count += 1 if success else 0
    print(f"[{test_case}] pass={success}; output={output}; target={expected_output}; input={input}")
print(f"{success_count}/{total_count} passed")

=== RESULTS ===
[0] pass=True; output=[0]; target=0; input=[4.6 3.4 1.4 0.3]
[1] pass=True; output=[2]; target=2; input=[5.8 2.7 5.1 1.9]
[2] pass=False; output=[0]; target=1; input=[6.6 3.  4.4 1.4]
[3] pass=False; output=[0]; target=1; input=[5.5 2.3 4.  1.3]
[4] pass=False; output=[0]; target=1; input=[5.6 3.  4.1 1.3]
[5] pass=False; output=[0]; target=1; input=[6.  2.2 4.  1. ]
[6] pass=True; output=[0]; target=0; input=[4.9 3.  1.4 0.2]
[7] pass=False; output=[2]; target=1; input=[6.1 2.9 4.7 1.4]
[8] pass=True; output=[2]; target=2; input=[4.9 2.5 4.5 1.7]
[9] pass=False; output=[2]; target=1; input=[5.9 3.2 4.8 1.8]
[10] pass=True; output=[2]; target=2; input=[6.2 3.4 5.4 2.3]
[11] pass=False; output=[0]; target=1; input=[4.9 2.4 3.3 1. ]
[12] pass=True; output=[2]; target=2; input=[6.4 3.1 5.5 1.8]
[13] pass=True; output=[0]; target=0; input=[5.1 3.8 1.6 0.2]
[14] pass=True; output=[0]; target=0; input=[5.1 3.8 1.5 0.3]
8/15 passed
