In [6]:
%matplotlib inline

# For interactivity
# %matplotlib widget

# All imports
from random import choice
import numpy as np
import matplotlib.pyplot as plt

# Set number of decimal places to show
np.set_printoptions(formatter={'float': '{:.4f}'.format})

In [7]:
"""
Live updating:
use display.clear_output(wait=True)
Make an array for figures
Save figure by doing plt.plot, then append, then plt.close
Then to live update, display each figure using clear_output and then display.display()
use plt.clf() to clear figure
Maybe can stream instead of saving all figs?
"""

"""
Helpful functions: np.concatenate
Can choose axis to concat along
"""

'\nHelpful functions: np.concatenate\nCan choose axis to concat along\n'

In [8]:
def sigmoid(x: float) -> float:
    """Returns the sigmoid

    Args:
        x (float): the input

    Returns:
        float: the sigmoid of the input
    """
    return 1./(1. + np.exp(-x))

def sigmoid_prime(x: float) -> float:
    """Return the derivative of the sigmoid function

    Args:
        x (float): the input

    Returns:
        float: the derivative of the sigmoid of the input
    """
    return x*(1.-x)

def tanh(x: float) -> float:
    """Returns the hyperbolic tangent

    Args:
        x (float): input

    Returns:
        float: hyperbolic tangent of x
    """
    return ((np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x)))

def tanh_prime(x: float) -> float:
    """Returns the derivative of the hyperbolic tangent

    Args:
        x (float): input

    Returns:
        float: derivative of the hyperbolic tangent of x
    """
    g = tanh(x)
    return (1. - g**2)

In [78]:
class NeuralNetwork:
    def __init__(self, layers: list, activation: str='sigmoid', verbose=False):
        """Cosntructor for neural network

        Args:
            layers (list): number of inputs, hidden layers, and outputs as a list
            activation (str, optional): activation function to use. Only sigmoid and tanh are available. Defaults to 'signmoid'.
        """
        self.num_layers = len(layers)
        self.num_inputs = layers[0]
        self.num_outputs = layers[-1]
        self.num_hidden_layers = len(layers) - 2
        self.layers = layers
        if verbose:
            print(f"num_inputs = {self.num_inputs}\nnum_outputs = {self.num_outputs}\nlayers = {self.layers}")
        
        # Create random weights
        self.weights = []
        for i in range(self.num_layers - 1):
            # Add bias weight for each neuron if not last layer
            if i == self.num_layers - 1:
                curr_weights = np.random.rand(layers[i], layers[i+1])
            else:
                curr_weights = np.random.rand(layers[i] + 1, layers[i+1])
            self.weights.append(curr_weights)

        if verbose:
            for layer, weights in enumerate(self.weights):
                print(f"weights[{layer}]:\n{weights}")
                print(f"weights[{layer}].shape: {weights.shape}")
                print()
        
        if activation == "sigmoid":
            self.activation = sigmoid
        elif activation == "tanh":
            self.activation = tanh
        else:
            raise Exception("Activation function must be 'sigmoid' or 'tanh'")

        self.activations = []
        self.inputs = []
        self.errors = []

    def forward_prop(self, X, verbose=False):
        # Add bias
        X = np.concatenate((X, np.ones(1))) # Add bias
        X = np.atleast_2d(X) # Convert to 2D matrix
        # post-activations array
        a_arr = [X]
        for layer, weight in enumerate(self.weights):
            # 1x4
            curr_layer_input = a_arr[layer]
            # print(f"layer {layer}, curr_input.shape: {curr_layer_input.shape}")
            if not (layer < self.num_layers - 2): # Add bias upto layer preceding output layer
                # curr_layer_input = np.concatenate((curr_layer_input, np.ones((1, 1))), axis=1)
                pass
            if verbose:
                print(f"Layer {layer}")
                print(curr_layer_input.shape)
                print(curr_layer_input[0].shape)
            # print(f"layer {layer}, curr_input.shape: {curr_layer_input.shape}")
            # weight = 4x2
            dot_product = np.dot(curr_layer_input, weight)
            z_layer = np.atleast_2d(self.activation(dot_product))
            if layer < self.num_layers - 3:
                # print(f"layer {layer}, z_layer.shape: {z_layer.shape}")
                z_layer = np.concatenate((z_layer, np.ones((1, 1))), axis=1)
                # print(f"layer {layer}, z_layer.shape: {z_layer.shape}")
            a_arr.append(z_layer)
        if verbose: 
            print(a_arr)
        self.activations = a_arr # Save activations at each layer
        return a_arr[-1]
    
    def backprop(self, X: list, y: list, alpha: float = 0.02, verbose=False):
        # Keep track of deltas, weight updates, and errors at each pass
        deltas = []
        weight_updates = []
        
        z = self.forward_prop(X, False)
        if verbose:
            print(z)
        y = np.atleast_2d(y)
        error = y - z
        self.errors.append(error)

        # output layer delta has diff formula. np's '*' operator is element-wise multiplication
        delta_output = (y-z)*sigmoid_prime(z) # (n, 1) matrix
        deltas.append(delta_output)

        delta_weights_output = np.dot(self.activations[-2].T, delta_output)*alpha
        weight_updates.insert(0, delta_weights_output)
        # print(f"updates: {weight_updates}")
        self.weights[-1] += delta_weights_output
        # Start backprop-ing starting from layer (n-3), as layer (n-1) is the output layer, 
        # and layer (n-2) is the last hidden layer, and n-2 weights have been updated
        for i in range(self.num_layers - 3, -1, -1):
            print(f"backprop from layer {i + 1} to {i} out of {self.num_layers - 1} layers")
            next_layer_activations = self.activations[i+1]
            print(f"In layer {i};\nlayer {i+1} activations = {next_layer_activations}\nlayer {i+1} activations.shape: = {next_layer_activations.shape}")
            delta_i = sigmoid_prime(next_layer_activations)*np.dot(deltas[0], self.weights[i+1].T)
            deltas.insert(0, delta_i)
            curr_inputs = self.activations[i]
            print(f"curr inputs shape: {curr_inputs.shape}")
            delta_weights_i = alpha*np.dot(delta_i, curr_inputs.T)
            weight_updates.insert(0, delta_weights_i)
            self.weights[i] += delta_weights_i
            print(f"Backprop for layer {i} done")

        return 0

    def fit(self, X:list, y:list, learning_rate:float=0.2, steps:float=10**5, tolerance:float = 10**-2, verbose:bool = False):
        """_summary_

        Args:
            X (list): _description_
            y (list): _description_
            learning_rate (float, optional): _description_. Defaults to 0.2.
            steps (float, optional): _description_. Defaults to 10**5.
            tolerance (float, optional): _description_. Defaults to 10**-2.
            verbose (bool, optional): _description_. Defaults to False.
        """
        if not X or (len(X) != self.num_inputs):
            raise Exception("Number of inputs must match those specified in architecture")
        if not y or not isinstance(y, list) or (self.num_outputs > 1 and len(y) != self.num_outputs):
            raise Exception("Number of outputs must match those specified in architecture and be supplied as a list")
        self.backprop(X, y, learning_rate)
        return

    def find_RMS_error(self, X, y):
        """_summary_

        Args:
            X (_type_): _description_
            y (_type_): _description_
        """
        return

    def predict(self, x):
        """_summary_

        Args:
            x (_type_): _description_
        """
        return
    
    def visual_NN_boundaries(self, Nsamp=2000):
        """_summary_

        Args:
            Nsamp (int, optional): _description_. Defaults to 2000.
        """
        return
    

In [None]:
test = NeuralNetwork([3, 2, 3, 2], verbose=True)
# test.fit([0.3, 0.4, 0.5], [0.5, 1.0])
# test = NeuralNetwork([5, 2, 2, 3, 1])

backprop from layer 2 to 1 out of 3 layers
In layer 1;
layer 2 activations = [[0.8634 0.8208 0.7292]]
layer 2 activations.shape: = (1, 3)
curr inputs shape: (1, 3)
Backprop for layer 1 done
backprop from layer 1 to 0 out of 3 layers
In layer 0;
layer 1 activations = [[0.7167 0.7242 1.0000]]
layer 1 activations.shape: = (1, 3)
curr inputs shape: (1, 4)


ValueError: shapes (1,3) and (4,1) not aligned: 3 (dim 1) != 4 (dim 0)

In [None]:
test.fit([0.3, 0.4, 0.5], [0.5, 1.0])

updates: [array([[-0.0065, 0.0057],
       [-0.0073, 0.0064],
       [-0.0079, 0.0069]])]
layer 1
(1, 2)
(1, 3)
layer 0


ValueError: operands could not be broadcast together with shapes (1,2) (1,3) 

In [None]:
# targ = np.array(([10], [0]))
# out = np.array(([10], [0]))
# out*(1-out)

w_test = np.arange(6).reshape((3, 2))
a_test = np.random.rand(3,1)
deltas = (10*np.ones(2)).reshape((2, 1))
deltas[1][0] *= 10
# deltas[2][0] *= 100
updates = np.dot(a_test, deltas.T)
print(a_test)
print(w_test)
print(updates)
updates + w_test

[[0.8326]
 [0.3836]
 [0.1912]]
[[0 1]
 [2 3]
 [4 5]]
[[8.3255 83.2553]
 [3.8357 38.3566]
 [1.9123 19.1229]]


array([[8.3255, 84.2553],
       [5.8357, 41.3566],
       [5.9123, 24.1229]])