In [1]:
import numpy as np

In [2]:
# define some activation and error functions
def tanh(x, derivative=False):
    """Implements the hyperbolic tangent function element wise over an array x.

    Parameters
    ----------
    x : numpy array
        This array contains arguments for the hyperbolic tangent function.
    derivative : bool
        Indicates whether to use the hyperbolic tangent function or its derivative.

    Returns
    -------
    numpy array
        An array of equal shape to `x`.
    """

    if derivative:
        tanh_not_derivative = tanh(x)
        return 1.0 - tanh_not_derivative**2
        #return 1.0 - x**2
    else:
        return np.tanh(x)

def mean_squared_error(target_output, actual_output, derivative=False):
    try:
        assert(target_output.shape == actual_output.shape)
    except AssertionError:
        print(f"Shape of target vector: {target_output.shape} does not match shape of actual vector: {actual_output.shape}")

    if not derivative:
        error = np.sum(0.5 * np.sum((target_output-actual_output)**2, axis=1, keepdims=True))
    else:
        error = (actual_output - target_output)

    return error

In [6]:
class NeuralNet(object):
    RNG = np.random.default_rng()


    def __init__(self, topology:list[int] = []):
        self.topology    = topology
        self.size = len(topology)
        self.weight_mats = []

        self._init_matrices()
        self._init_gradients()

    def _init_matrices(self):
        #-- set up matrices
        if len(self.topology) > 1:
            j = 1
            for i in range(len(self.topology)-1):
                num_rows = self.topology[i]
                num_cols = self.topology[j]

                mat = self.RNG.random(size=(num_rows, num_cols))
                self.weight_mats.append(mat)

                j += 1
    
    
    def _init_gradients(self,):
        self.stored_gradients = [None] * len(self.weight_mats)
                
    
    def feedforward(self, input_vector):
        I = input_vector
        
        for idx, W in enumerate(self.weight_mats):
            
            I = np.dot(I, W)
            
            if idx == len(self.weight_mats) - 1:
                out_vector = np.tanh(I)  #output layer
            else:
                I          = np.tanh(I)  #hidden layers
            
        return out_vector
    
    
    def _gradient_descent(self,gradients, last_change):
        """Uses the gradients computed by the backpropagation method to update network weights.

        performs stochastic gradient descent and adjusts the weights


        Parameters
        ----------
        gradients : python iterable
            This iterable {list, tuple, etc.} contains numpy arrays.
            Each numpy array is the gradient matrix computed by backpropagation for each layer matrix.

        """

        learning_rate = 0.01 
        momentum      = 0.1
        
        for layer_idx in range(self.size):
            delta_weight = learning_rate * gradients[layer_idx]
            full_change = delta_weight + (momentum * last_change[layer_idx])
            self.weights[layer_idx] -= full_change
            last_change[layer_idx] = 1*gradients[layer_idx] #copy gradient mat


    def backprop(self, 
                 input_samples,
                 target,output,
                 error_func,
                 last_change,
                 hidden_activation=tanh,
                 output_activation=tanh):
        """Backpropagation.

        Parameters
        ----------
        input_samples : numpy array
            Contains all samples in a batch.
        target_outputs : numpy array
            Matching targets for each sample in `input_samples`.
        output : numpy array
            Actual output from feedforward propagation. It will be used to check the network's error.
        batch_mode : bool, Don't use for now.
            Indicates whether to use batch or online training.
        error_func : function object
            This is the function that computes the error of the epoch and used during backpropagation.
            It must accept parameters as: error_func(target={target numpy array},actual={actual output from network},derivative={boolean to indicate operation mode})
        hidden_activation : function object, optional
            It is the activation function for hidden layers. It must be able to accept numpy arrays.
            It must also provide a parameter to indicate operation in derivative or normal mode.
        output_activation : function object, optional
            It is the activation function for final layer. It must be able to accept numpy arrays.
            It must also provide a parameter to indicate operation in derivative or normal mode.

        """

        #placeholder variables
        #delta = None
        #gradient_mat = None

        #Compute gradients and deltas
        for i in range(self.size):
            back_index =self.size-1 -i                  # This will be used for the items to be accessed backwards
            if i!=0:
                W_trans = self.weight_mats[back_index+1].T        #we use the transpose of the weights in the current layer
                d_activ = hidden_activation(self.netIns[back_index],derivative=True)
                d_error = np.dot(delta, W_trans)
                delta = d_error * d_activ
                gradient_mat = np.dot(self.netOuts[back_index].T , delta)
                self.stored_gradients[back_index] = gradient_mat
            else:
                #Here we calculate gradients for final layer
                d_activ = output_activation(self.netIns[back_index],derivative=True)
                d_error = error_func(target,output,derivative=True)
                delta = d_error * d_activ
                gradient_mat = np.dot(self.netOuts[back_index].T , delta)
                self.stored_gradients[back_index] = gradient_mat
        # Update weights using the computed gradients
        self._gradient_descent(gradients=self.stored_gradients, last_change)

    def train(self, input_vector, target_vector, epochs=1000, error_threshold=1E-10, error_func=mean_squared_error):
        last_change = [np.zeros(mat.shape) for mat in self.weight_mats]
        for i in range(epochs):
            nnet_output = self.feedforward(input_vector)
            self.backprop(input_vector, target_vector, nnet_output, error_func=tanh)

In [7]:
# Create a neural network for the XOR logic gate, so two inputs and one output neuron. 
nnet = NeuralNet([2,3,1])

# XOR logic gate truth table (this will be our training set)
inputs  = np.array([[0,0], 
                    [0,1], 
                    [1,0], 
                    [1,1]])

targets = np.array([[0],
                    [1],
                    [1],
                    [0]])


In [8]:
nnet.train(inputs, targets, 1000)

AttributeError: 'NeuralNet' object has no attribute 'netIns'