In [4]:
%matplotlib inline

# For interactivity
# %matplotlib widget

# All imports
from random import choice
import numpy as np
import matplotlib.pyplot as plt

# Set number of decimal places to show
np.set_printoptions(formatter={'float': '{:.4f}'.format})

In [1]:
"""
Live updating:
use display.clear_output(wait=True)
Make an array for figures
Save figure by doing plt.plot, then append, then plt.close
Then to live update, display each figure using clear_output and then display.display()
use plt.clf() to clear figure
Maybe can stream instead of saving all figs?
"""

"""
Helpful functions: np.concatenate
Can choose axis to concat along
"""

'\nHelpful functions: np.concatenate\nCan choose axis to concat along\n'

In [2]:
def sigmoid(x: float) -> float:
    """Returns the sigmoid

    Args:
        x (float): the input

    Returns:
        float: the sigmoid of the input
    """
    return 1./(1. + np.exp(-x))

def sigmoid_prime(x: float) -> float:
    """Return the derivative of the sigmoid function

    Args:
        x (float): the input

    Returns:
        float: the derivative of the sigmoid of the input
    """
    return x*(1.-x)

def tanh(x: float) -> float:
    """Returns the hyperbolic tangent

    Args:
        x (float): input

    Returns:
        float: hyperbolic tangent of x
    """
    return ((np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x)))

def tanh_prime(x: float) -> float:
    """Returns the derivative of the hyperbolic tangent

    Args:
        x (float): input

    Returns:
        float: derivative of the hyperbolic tangent of x
    """
    g = tanh(x)
    return (1. - g**2)

In [None]:
class NeuralNetwork:
    def __init__(self, layers: list, activation: str='sigmoid', verbose=False):
        """Cosntructor for neural network

        Args:
            layers (list): number of inputs, hidden layers, and outputs as a list
            activation (str, optional): activation function to use. Only sigmoid and tanh are available. Defaults to 'signmoid'.
        """
        self.num_layers = len(layers)
        self.num_inputs = layers[0]
        self.num_outputs = layers[-1]
        self.num_hidden_layers = len(layers) - 1
        self.layers = layers
        if verbose:
            print(f"num_inputs = {self.num_inputs}\nnum_outputs = {self.num_outputs}\nlayers = {self.layers}")
        
        # Create random weights
        self.weights = []
        for i in range(self.num_layers - 1):
            # Add bias weight for each neuron if not last layer
            if i == self.num_layers - 1:
                curr_weights = np.random.rand(layers[i], layers[i+1])
            else:
                curr_weights = np.random.rand(layers[i] + 1, layers[i+1])
            self.weights.append(curr_weights)

        if verbose:
            for layer, weights in enumerate(self.weights):
                print(f"weights[{layer}]:\n{weights}")
                print(f"weights[{layer}].shape: {weights.shape}")
                print()
        
        if activation == "sigmoid":
            self.activation = sigmoid
            self.activation_derivative = sigmoid_prime
        elif activation == "tanh":
            self.activation = tanh
            self.activation_derivative = tanh_prime
        else:
            raise Exception("Activation function must be 'sigmoid' or 'tanh'")

        # Array to store post-activations for each layer; for n layers, there are n - 1 activations, as layer n - 1, the output layer,
        # does not have an activation
        self.activations = [np.array([]) for _ in range(self.num_layers - 1)]
        # Array to store inputs for each layer. Last layer's 'input' is the activation of the last hidden layer
        self.inputs = [np.array([]) for _ in range(self.num_layers)]
        self.errors = []
        self.is_trained = False

    def forward_prop(self, X, verbose=False):
        # Add to inputs array
        X = np.atleast_2d(X) # Convert to 2D matrix
        # Add bias to first input
        X = np.concatenate((X, np.ones((1, 1))), axis=1)
        self.inputs[0] = X
        if verbose:
            print("inputs:", self.inputs[0])
        
        for layer, weight in enumerate(self.weights):
            curr_layer_input = self.inputs[layer]
            
            if verbose:
                print(f"layer {layer}, curr_input.shape: {curr_layer_input.shape}")
                print(f"Layer {layer}, curr_input: {curr_layer_input}")
                print(self.inputs)
            
            dot_product = np.dot(curr_layer_input, weight)
            z_curr_layer = np.atleast_2d(self.activation(dot_product)) # Apply activation function
            
            # Save activation of curr layer
            self.activations[layer] = z_curr_layer
            # Save input to next layer after adding bias if next layer is not output layer
            if not layer == self.num_layers - 2:
                next_layer_input = np.concatenate((z_curr_layer, np.ones((1, 1))), axis=1)
                self.inputs[layer + 1] = next_layer_input
            else:
                self.inputs[layer + 1] = z_curr_layer
        
        if verbose:
            print("activations:")
            print(self.activations)
            print("inputs:")
            print(self.inputs)
        # self.activations has length n - 1. activations[0] is the output of layer 0, so activations[n-2] is the 
        # output of the last hidden layer, aka the output of the NN. Layer n - 1 is the output layer and has no 
        # activations
        return self.activations[-1]
    
    def backprop(self, alpha: float = 0.02, verbose=False):
        """_summary_

        Args:
            alpha (float, optional): learning rate. Defaults to 0.02.
            verbose (bool, optional): whether to print out statements. Defaults to False.

        Returns:
            None
        """
        # Keep track of deltas, weight updates, and errors at each pass
        deltas = [np.array([]) for _ in range(self.num_hidden_layers)]
        weight_updates = [np.array([]) for _ in range(self.num_hidden_layers)]
        sample = np.random.choice(len(self.X))
        curr_sample = self.X[sample]
        curr_target = self.target[sample]
        if isinstance(curr_sample, np.ndarray) and not len(curr_sample) == self.num_inputs:
                raise Exception("Number of inputs must match those specified in architecture")
        if isinstance(curr_target, np.ndarray) and not len(curr_target) == self.num_outputs:
                raise Exception("Number of outputs must match those specified in architecture and be supplied as a list")

        z = self.forward_prop(curr_sample, False)
        if verbose:
            print(z)
        curr_target = np.atleast_2d(curr_target)
        error = curr_target - z
        self.errors.append(error)

        # output layer delta has diff formula
        delta_last = (curr_target - z)*self.activation_derivative(z)
        deltas[-1] = delta_last
        # self.inputs' last element is the NN output, so one before that is the input for the last hidden layer
        delta_weights_output = np.dot(self.inputs[self.num_layers - 2].T, delta_last)*alpha
        weight_updates[-1] = delta_weights_output
        
        self.weights[-1] += delta_weights_output
        if verbose:
            print(f"Backprop for layer {self.num_hidden_layers - 1} done\n")
        # Start backprop-ing starting from layer (n-3), as layer (n-2) weights have been updated.
        for curr_layer in range(self.num_layers - 3, -1, -1):
            if verbose: 
                print(f"backprop from layer {curr_layer + 1} to {curr_layer} out of {self.num_layers} layers")
            
            # Drop the bias activation value from the next layer, so do not take input of next layer, 
            # just activation of current layer. This is z^(i)
            next_layer_activations = self.activations[curr_layer]
            next_layer_weights = self.weights[curr_layer + 1]
            # Drop the weights of the bias neuron from next layer
            next_layer_weights = next_layer_weights[:-1, :]
            
            delta_curr_layer = sigmoid_prime(next_layer_activations)*np.dot(deltas[curr_layer + 1], next_layer_weights.T)
            deltas[curr_layer] = delta_curr_layer
            curr_inputs = self.inputs[curr_layer]
            weight_update_curr_layer = alpha*np.dot(curr_inputs.T, delta_curr_layer)
            weight_updates[curr_layer] = weight_update_curr_layer
            self.weights[curr_layer] += weight_update_curr_layer
            if verbose: 
                print(f"Backprop for layer {curr_layer} done")
                # print(f"Weight updates:\n{weight_update_curr_layer}")
                print()

        return 0

    def fit(self, X:np.array, y:np.array, learning_rate:float=0.2, steps:float=10**5, tolerance:float = 10**-2, verbose:bool = False):
        """_summary_

        Args:
            X (np.ndarray): _description_
            y (np.ndarray): _description_
            learning_rate (float, optional): _description_. Defaults to 0.2.
            steps (float, optional): _description_. Defaults to 10**5.
            tolerance (float, optional): _description_. Defaults to 10**-2.
            verbose (bool, optional): _description_. Defaults to False.
        """
        if not isinstance(X, np.ndarray) or not X.any():
            raise Exception("Input must be given")
        if not isinstance(y, np.ndarray) or not y.any():
            raise Exception("Target outputs must be supplied as a numpy array")
        if not len(X) == len(y):
            raise Exception(f"Training data length must match target data length {X.shape} != {y.shape}")
        self.X = X
        self.target = y
        self.is_trained = True
        old_weights = self.weights

        for iter in range(0, steps + 1):
            if (iter) % (int(steps*0.1)) == 0:
                print(f"Iteration {iter}/{steps}")
            self.backprop(learning_rate, verbose)

        return

    def find_RMS_error(self, X, y):
        """_summary_

        Args:
            X (_type_): _description_
            y (_type_): _description_
        """
        return

    def predict(self, x):
        """_summary_

        Args:
            x (_type_): _description_
        """
        return self.forward_prop(x, False)
    
    def predict_many(self, X):
        for sample in X:
            print(self.predict(sample))

    def visual_NN_boundaries(self, Nsamp=2000):
        """_summary_

        Args:
            Nsamp (int, optional): _description_. Defaults to 2000.
        """
        return
    

In [None]:
test = NeuralNetwork([2, 2, 1], activation='sigmoid', verbose=False)
# test.forward_prop([0.3, 0.4, 0.5], True)
X = np.array([[0, 0],
            [0, 1],
            [1, 0],
            [1, 1]])
y = np.array([0, 1, 1, 0])
print(f"before train: {test.predict_many(X)}")

test.fit(X, 
         y,
         steps=2*10**5,
         verbose=False)
print(f"after train: {test.predict_many(X)}")
# test = NeuralNetwork([5, 2, 2, 3, 1])

before train: [[0.7493]]
Iteration 0/200000
Iteration 100/200000
Iteration 200/200000
Iteration 300/200000
Iteration 400/200000
Iteration 500/200000
Iteration 600/200000
Iteration 700/200000
Iteration 800/200000
Iteration 900/200000
Iteration 1000/200000
Iteration 1100/200000
Iteration 1200/200000
Iteration 1300/200000
Iteration 1400/200000
Iteration 1500/200000
Iteration 1600/200000
Iteration 1700/200000
Iteration 1800/200000
Iteration 1900/200000
Iteration 2000/200000
Iteration 2100/200000
Iteration 2200/200000
Iteration 2300/200000
Iteration 2400/200000
Iteration 2500/200000
Iteration 2600/200000
Iteration 2700/200000
Iteration 2800/200000
Iteration 2900/200000
Iteration 3000/200000
Iteration 3100/200000
Iteration 3200/200000
Iteration 3300/200000
Iteration 3400/200000
Iteration 3500/200000
Iteration 3600/200000
Iteration 3700/200000
Iteration 3800/200000
Iteration 3900/200000
Iteration 4000/200000
Iteration 4100/200000
Iteration 4200/200000
Iteration 4300/200000
Iteration 4400/2000

In [30]:
# Random testing

# targ = np.array(([10], [0]))
# out = np.array(([10], [0]))
# out*(1-out)
w_test = np.arange(6).reshape((3, 2))
a_test = np.random.rand(3,1)
deltas = (10*np.ones(2)).reshape((2, 1))
deltas[1][0] *= 10
# deltas[2][0] *= 100
updates = np.dot(a_test, deltas.T)
print(a_test)
print(w_test)
print(updates)
updates + w_test

[[0.5138]
 [0.7215]
 [0.5805]]
[[0 1]
 [2 3]
 [4 5]]
[[5.1384 51.3837]
 [7.2154 72.1544]
 [5.8055 58.0549]]


array([[5.1384, 52.3837],
       [9.2154, 75.1544],
       [9.8055, 63.0549]])