## Neural Network example
Let's explore implementation of Neural Network!

In [1]:
from linear_algebra import Vector, dot

def step_function(x: float) -> float:
    return 1.0 if x >= 0 else 0.0

def perceptron_output(weights: Vector, bias: float, x: Vector) -> float:
    """Returns 1 if the perceptron 'fires', 0 if not"""
    calculation = dot(weights, x) + bias
    return step_function(calculation)

and_weights = [2., 2]
and_bias = -3.

assert perceptron_output(and_weights, and_bias, [1, 1]) == 1
assert perceptron_output(and_weights, and_bias, [0, 1]) == 0
assert perceptron_output(and_weights, and_bias, [1, 0]) == 0
assert perceptron_output(and_weights, and_bias, [0, 0]) == 0

or_weights = [2., 2]
or_bias = -1.

assert perceptron_output(or_weights, or_bias, [1, 1]) == 1
assert perceptron_output(or_weights, or_bias, [0, 1]) == 1
assert perceptron_output(or_weights, or_bias, [1, 0]) == 1
assert perceptron_output(or_weights, or_bias, [0, 0]) == 0

not_weights = [-2.]
not_bias = 1.

assert perceptron_output(not_weights, not_bias, [0]) == 1
assert perceptron_output(not_weights, not_bias, [1]) == 0

### Sigmoid function and feedforward


In [63]:
import math

def sigmoid(t: float) -> float:
    return 1 / (1 + math.exp(-t))

def neuron_output(weights: Vector, inputs: Vector) -> float:
    # weights includes the bias term, inputs includes a 1
    return sigmoid(dot(weights, inputs))

from typing import List

def feed_forward(neural_network: List[List[Vector]],
                 input_vector: Vector) -> List[Vector]:
    """
    Feeds the input vector through the neural network.
    Returns the outputs of all layers (not just the last one).
    """
    outputs: List[Vector] = []
    
    for layer in neural_network:
        input_with_bias = input_vector + [1]              # Add a constant.
        output = [neuron_output(neuron, input_with_bias)  # Compute the output
                  for neuron in layer]                    # for each neuron.
        outputs.append(output)                            # Add to results.

        # Then the input to the next layer is the output of this one
        input_vector = output

    return outputs


### Example
Build the XOR gate that we couldn’t build with a single perceptron. 

In [3]:
# and_gate = min
# or_gate = max
# xor_gate = lambda x, y: 0 if x == y else 1 

xor_network = [# hidden layer
               [[20., 20, -30],      # 'and' neuron
                [20., 20, -10]],     # 'or'  neuron
               # output layer
               [[-60., 60, -30]]]    # '2nd input but not 1st input' neuron

# feed_forward returns the outputs of all layers, so the [-1] gets the
# final output, and the [0] gets the value out of the resulting vector
assert 0.000 < feed_forward(xor_network, [0, 0])[-1][0] < 0.001
assert 0.999 < feed_forward(xor_network, [1, 0])[-1][0] < 1.000
assert 0.999 < feed_forward(xor_network, [0, 1])[-1][0] < 1.000
assert 0.000 < feed_forward(xor_network, [1, 1])[-1][0] < 0.001

In [1]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url= "https://learning.oreilly.com/library/view/data-science-from/9781492041122/assets/dsf2_1803.png")


### Backpropagation
we use data to train neural networks. The typical approach is an algorithm called backpropagation, which uses gradient descent or one of its variants.

In [4]:
def sqerror_gradients(network: List[List[Vector]],
                      input_vector: Vector,
                      target_vector: Vector) -> List[List[Vector]]:
    """
    Given a neural network, an input vector, and a target vector,
    make a prediction and compute the gradient of the squared error
    loss with respect to the neuron weights.
    """
    # forward pass
    hidden_outputs, outputs = feed_forward(network, input_vector)

    # gradients with respect to output neuron pre-activation outputs
    output_deltas = [output * (1 - output) * (output - target)
                     for output, target in zip(outputs, target_vector)]

    # gradients with respect to output neuron weights
    output_grads = [[output_deltas[i] * hidden_output
                     for hidden_output in hidden_outputs + [1]]
                    for i, output_neuron in enumerate(network[-1])]

    # gradients with respect to hidden neuron pre-activation outputs
    hidden_deltas = [hidden_output * (1 - hidden_output) *
                         dot(output_deltas, [n[i] for n in network[-1]])
                     for i, hidden_output in enumerate(hidden_outputs)]

    # gradients with respect to hidden neuron weights
    hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]]
                    for i, hidden_neuron in enumerate(network[0])]

    return [hidden_grads, output_grads]

In [5]:
# We’ll start by generating the training data and initializing our neural network with random weights
import random
random.seed(0)

# training data
xs = [[0., 0], [0., 1], [1., 0], [1., 1]]
ys = [[0.], [1.], [1.], [0.]]

# start with random weights
network = [ # hidden layer: 2 inputs -> 2 outputs
            [[random.random() for _ in range(2 + 1)],   # 1st hidden neuron
             [random.random() for _ in range(2 + 1)]],  # 2nd hidden neuron
            # output layer: 2 inputs -> 1 output
            [[random.random() for _ in range(2 + 1)]]   # 1st output neuron
          ]


In [6]:
# As usual, we can train it using gradient descent
from gradient_descent import gradient_step
import tqdm

learning_rate = 1.0

for epoch in tqdm.trange(20000, desc="neural net for xor"):
    for x, y in zip(xs, ys):
        gradients = sqerror_gradients(network, x, y)

        # Take a gradient step for each neuron in each layer
        network = [[gradient_step(neuron, grad, -learning_rate)
                    for neuron, grad in zip(layer, layer_grad)]
                   for layer, layer_grad in zip(network, gradients)]

# check that it learned XOR
assert feed_forward(network, [0, 0])[-1][0] < 0.01
assert feed_forward(network, [0, 1])[-1][0] > 0.99
assert feed_forward(network, [1, 0])[-1][0] > 0.99
assert feed_forward(network, [1, 1])[-1][0] < 0.01

neural net for xor: 100%|█████████████████████████████████████████████████████| 20000/20000 [00:01<00:00, 13955.52it/s]


### Example: Fizz Buzz
Print the numbers 1 to 100, except that if the number is divisible
by 3, print "fizz"; if the number is divisible by 5, print "buzz";
and if the number is divisible by 15, print "fizzbuzz".

In [7]:
def fizz_buzz_encode(x: int) -> Vector:
    if x % 15 == 0:
        return [0, 0, 0, 1]
    elif x % 5 == 0:
        return [0, 0, 1, 0]
    elif x % 3 == 0:
        return [0, 1, 0, 0]
    else:
        return [1, 0, 0, 0]

assert fizz_buzz_encode(2) == [1, 0, 0, 0]
assert fizz_buzz_encode(6) == [0, 1, 0, 0]
assert fizz_buzz_encode(10) == [0, 0, 1, 0]
assert fizz_buzz_encode(30) == [0, 0, 0, 1]

In [19]:
def binary_encode(x: int) -> Vector:
    binary: List[float] = []

    for i in range(10):
        binary.append(x % 2)
        x = x // 2

    return binary

#                             1  2  4  8 16 32 64 128 256 512
assert binary_encode(0)   == [0, 0, 0, 0, 0, 0, 0, 0,  0,  0]
assert binary_encode(1)   == [1, 0, 0, 0, 0, 0, 0, 0,  0,  0]
assert binary_encode(10)  == [0, 1, 0, 1, 0, 0, 0, 0,  0,  0]
assert binary_encode(101) == [1, 0, 1, 0, 0, 1, 1, 0,  0,  0]
assert binary_encode(999) == [1, 1, 1, 0, 0, 1, 1, 1,  1,  1]

In [48]:
xs = [binary_encode(n) for n in range(101, 1024)]
ys = [fizz_buzz_encode(n) for n in range(101, 1024)]

In [8]:
# let’s create a neural network with random initial weights
NUM_HIDDEN = 25

network = [
    # hidden layer: 10 inputs -> NUM_HIDDEN outputs
    [[random.random() for _ in range(10 + 1)] for _ in range(NUM_HIDDEN)],

    # output_layer: NUM_HIDDEN inputs -> 4 outputs
    [[random.random() for _ in range(NUM_HIDDEN + 1)] for _ in range(4)]
]

In [13]:
# training
from linear_algebra import squared_distance

learning_rate = 1.0

with tqdm.trange(500) as t:
    for epoch in t:
        epoch_loss = 0.0

        for x, y in zip(xs, ys):
            predicted = feed_forward(network, x)[-1]
            epoch_loss += squared_distance(predicted, y)
            gradients = sqerror_gradients(network, x, y)

            # Take a gradient step for each neuron in each layer
            network = [[gradient_step(neuron, grad, -learning_rate)
                        for neuron, grad in zip(layer, layer_grad)]
                    for layer, layer_grad in zip(network, gradients)]

        t.set_description(f"fizz buzz (loss: {epoch_loss:.2f})")

fizz buzz (loss: 29.44): 100%|███████████████████████████████████████████████████████| 500/500 [01:53<00:00,  4.40it/s]


In [9]:
# Our network will produce a four-dimensional vector of numbers, but we want a single prediction. 
#We’ll do that by taking the argmax, which is the index of the largest value:
def argmax(xs: list) -> int:
    """Returns the index of the largest value"""
    return max(range(len(xs)), key=lambda i: xs[i])

assert argmax([0, -1]) == 0               # items[0] is largest
assert argmax([-1, 0]) == 1               # items[1] is largest
assert argmax([-1, 10, 5, 20, -3]) == 3   # items[3] is largest

In [47]:
# finally we will solve this problem now:
num_correct = 0

for n in range(1, 101):
    x = binary_encode(n)
    predicted = argmax(feed_forward(network, x)[-1])
    actual = argmax(fizz_buzz_encode(n))
    labels = [str(n), "fizz", "buzz", "fizzbuzz"]
    print(n, labels[predicted], labels[actual])

    if predicted == actual:
        num_correct += 1

#print(num_correct, "/", 100)

[0.23615812252723245, 0.6544816976593077, 0.7428466359173387, 0.8871393899936986, 0.6834174826949725, 0.847209593846943, 0.784479914051605, 0.16071613819910335, 0.043692765840927295, 0.7387786394375336, 0.525918898422267, 0.9978655049817808, 0.1648905413828592]
[0.3852699376295755, 0.28778357569091084, 0.8786973176916403, 0.4836957619564889, 0.9136495815761022, 0.7071900029051184, 0.9988061683109193, 0.5997790159723878, 0.9761591529166639, 0.17340633728780475, 0.44168006118571757, 0.5783912042361588, 0.978295910581493]
[0.5678798241127215, 0.8652649809797757, 0.6285055588490285, 0.5124010252172735, 0.39144138423523445, 0.36863404796259336, 0.29521803737147234, 0.2113699708500314, 0.9625770244136284, 0.5364612161156107, 0.8658695364577362, 0.8849637610619134, 0.9422998611296344]
[0.238166806096774, 0.33772889244543136, 0.6331398796026679, 0.32205404522698333, 0.143928094536648, 0.7598592136554477, 0.5503918988479339, 0.5365230195570996, 0.7104710384938592, 0.11474025170264657, 0.9219090

AssertionError: vectors must be same length

### Home work practice in pair
1. Please explore and understand the decision tree theory and code<br>
2. (6 points) Please work on Iris dataset using neural network you learned in this lecture, and try to make predictions for:<br>
<br>['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
<br>[6.0, 3.0, 5.0, 0.6]
<br>[6.0, 3.0, 5.0, 1.6]
<br>[6.0, 3.0, 5.0, 2.6]
3. (4 points) Please explore neural network on sklearn and train it on Iris dataset and make predictions on the three samples above.

In [10]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
data = load_iris()

In [10]:
#print(data.data)
# print(data.target)

In [11]:
def inputRecord(inputV):
    resultVector = []
    for i in range(len(inputV)):
        if inputV[i] < 3:
            resultVector+=[0,0,1]
        elif inputV[i] < 6:
            resultVector+=[0,1,0]
        else:
            resultVector+=[1,0,0]
    return resultVector
print(inputRecord([6.8,3.2,5.9,2.3]))

def outputRecord(label):
    if label == 0:
        return [0,0,1]
    elif label==1:
        return [0,1,0]
    else:
        return [1,0,0]


[1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1]


In [59]:
nn = []
for i in data.data:
    # print(inputRecord(i))
    nn.append(inputRecord(i))
print(nn)

[[0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 0

In [46]:
nny = []
for i in data.target:
    #print(outputRecord(i))
    nny.append(outputRecord(i))

In [60]:
X = nn
y = nny

X_preprocessed = X
y_preprocessed = y
print(X_preprocessed[0])

[0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1]


In [61]:
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y_preprocessed, test_size=0.2, random_state=42)
NUM_HIDDEN = 13
network = [
    # Hidden layer: 12 inputs (4 features * 3 categories) -> NUM_HIDDEN outputs
    [[random.random() for _ in range(12 + 1)] for _ in range(NUM_HIDDEN)],

    # Output layer: NUM_HIDDEN inputs -> 3 outputs
    [[random.random() for _ in range(NUM_HIDDEN + 1)] for _ in range(3)]
]
print(len(network[0]))

13


In [38]:
def feed_forward(neural_network: List[List[Vector]],
                 input_vector: Vector) -> List[Vector]:
    """
    Feeds the input vector through the neural network.
    Returns the outputs of all layers (not just the last one).
    """
    outputs: List[Vector] = []

    for layer in neural_network:
        input_with_bias = input_vector + [1]              # Add a constant.
        print("Input vector length:", len(input_with_bias))  # Print input vector length
        output = [neuron_output(neuron, input_with_bias)  # Compute the output
                  for neuron in layer]                    # for each neuron.
        outputs.append(output)                            # Add to results.

        # Then the input to the next layer is the output of this one
        input_vector = output

    return outputs


In [67]:
from tqdm import tqdm
learning_rate = 0.1
epochs = 100

for _ in tqdm(range(epochs), desc="Training Neural Network"):
    for x, y in zip(X_train, y_train):
        gradients = sqerror_gradients(network, x, y)
        network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)]


Training Neural Network: 100%|███████████████████████████████████████████████████████| 100/100 [00:01<00:00, 89.69it/s]


In [69]:
input_lists = [
    [6.0, 3.0, 5.0, 0.6],
    [6.0, 3.0, 5.0, 1.6],
    [3.0, 3.0, 5.0, 2.6]
]

predictions = []
for input_list in input_lists:
    input_vector = inputRecord(input_list)
    output = feed_forward(network, input_vector)[-1]
    predictions.append(output)

print("Predictions for the provided input lists:")
for input_list, prediction in zip(input_lists, predictions):
    print("Input:", input_list)
    print("Prediction:", prediction)

Predictions for the provided input lists:
Input: [6.0, 3.0, 5.0, 0.6]
Prediction: [0.659326294127269, 0.355811692958883, 0.0025162869212524705]
Input: [6.0, 3.0, 5.0, 1.6]
Prediction: [0.659326294127269, 0.355811692958883, 0.0025162869212524705]
Input: [3.0, 3.0, 5.0, 2.6]
Prediction: [0.16025740513433614, 0.8368539776889957, 0.01395730982170832]
