In [428]:
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [555]:
class Neural_Network():
    
    def __init__(self,input_shape, output_shape, hidden_shape, n_layers, alpha= 0.001):
        self.input_shape = input_shape
        self.output_shape = output_shape
        self.hidden_shape = hidden_shape
        self.n_layers = n_layers
        self.w_list = self.Compile(self.input_shape, self.output_shape, self.hidden_shape, self.n_layers)
        self.alpha = alpha
        
        
    def Compile(self, input_shape, output_shape, hidden_shape, n_layers ):
        w_list, b_list = self.init_w_b_matrices(input_shape, output_shape, hidden_shape)
        w_list = self.put_bias_in_w_matrix(w_list, b_list)
        return w_list
    
    
    def predict(self,x):
        return self.feedforward(self.w_list,x)[0]
        
        
    def feedforward(self,w_list, x):
        hidden_values = []
        hidden_raw_values = []  # It is actually the summed values of neurons (z)

        hidden_values.append(x)
        hidden_raw_values.append(x)
        
        pre_hidden = self.one_adder(x)
        for i in range(len(hidden_shape)):
            raw = w_list[i] @ pre_hidden
            hidden_raw_values.append(raw)
            next_hidden =self.sigmoid(raw)
            hidden_values.append(next_hidden)
            pre_hidden = self.one_adder(next_hidden)
            
        output_value = w_list[-1] @ self.one_adder(hidden_values[-1])  #note that the activation of the output layer is linear
        hidden_values.append(output_value)
        hidden_raw_values.append(output_value)
        return output_value, hidden_values, hidden_raw_values
            
            
    def init_w_b_matrices(self,input_shape, output_shape, hidden_shape):
        w_list = []
        w_shapes = []
        b_list = []
        w_shapes.append((hidden_shape[0],input_shape))
        for i in range(len(hidden_shape)-1):
            w_shapes.append((hidden_shape[i+1], hidden_shape[i]))
        w_shapes.append((output_shape,hidden_shape[-1]))
        for i in range(len(w_shapes)):
            w = np.random.random((w_shapes[i][0], w_shapes[i][1]))
            w_list.append(w)

        for i in range(len(w_shapes)):
            b_list.append( np.random.random(w_shapes[i][0]))
        return w_list, b_list
        
    def activate(self, x):
        x[0] = 1
        x[1:] = sigmoid(x[1:])
        return x
        
    def sigmoid(self,x):
        return 1/(1+ np.exp(-x))
    
    def MSE(self, y, y_pred):
        error = ((y - y_pred) **2).sum()
        return error
    
    
    def put_bias_in_w_matrix(self,w_list, b_list):
        for i in range(len(w_list)):
            b_list[i] = b_list[i].reshape((w_list[i].shape[0],1)) 
            w_list[i] = np.hstack((b_list[i],w_list[i])) 

        return w_list
    
        
    def one_adder(self, x):
        return np.array([1] + x.tolist())
    
    
    
    def w_gradient_calculator(self,all_layers_delta, outputs, n_layers):
        w_gradients = [1 for i in range(n_layers-1)]
        for i in range(n_layers-1):
            w_gradients[i] = self.calculate_grad(all_layers_delta[i], outputs[i])

        return w_gradients



    def update_weights(self,w_gradients, n_layers):
        for i in range(n_layers-1):
            self.w_list[i] = self.w_list[i] - self.alpha*w_gradients[i]

        return self.w_list
    
    

    def fit(self,X,Y, epochs=1):
        
        n_data = len(X)
        
        for epo in range(epochs):

            for i in range(n_data):
                x = X[i]
                y = Y[i]
                results = self.feedforward(self.w_list, x)
                predict = result[0]
                outputs = result[1]
                inputs = result[2]

                all_layers_delta = self.all_layers_delta_calculator(predict, y, self.w_list, inputs, self.n_layers)
                w_grad = self.w_gradient_calculator(all_layers_delta, outputs, self.n_layers)
                self.w_list = self.update_weights(w_grad, n_layers)


                
    def delta_current_layer_calculator(self, w_next_layer, delta_next_layer, input_current_layer):
        # this function will calculate the delat of current layer
        #    pre_layer    current_layer    next_layer

        w_next_layer = np.delete(w_next_layer,0, axis=1)
        activation_prime = self.calculate_activation_prime(input_current_layer).reshape((-1,1))
        delta = (w_next_layer.T @ delta_next_layer.reshape((-1,1))) * activation_prime

        return delta.reshape((-1))
    
    
    def calculate_grad(self, delta_layer, output_pre_layer):
        grad = delta_layer.reshape((-1,1)) @ self.one_adder(output_pre_layer).reshape((1,-1))
        return grad
    
    def last_layer_delta(self,predict, y):
        return predict - y
    
    
    def calculate_activation_prime(self,input_of_the_layer):
        # the activation is considered to be sigmoid
        # if you are using other activation funciton change the derivative in this sectoin
        # Note that the derivative of the sigmoid function is sig(x)*(1-sig(x))
        derivative = self.sig_derivative(input_of_the_layer)
        #derivative = zero_adder(derivative)
        return derivative
    
    
    
    def zero_adder(self,x):
        return np.array([0] + x.tolist())
    
    
    def all_layers_delta_calculator(self,predict, y, w_list,layers_input, n_layers):
        n_list = np.array([i for i in range(n_layers)]) # 0,1,2,3,4
        n_list_descending = n_list[::-1] # 4,3,2,1,0

        all_layers_delta_list = [1 for i in range(n_layers-1)] # [1,1,1,1,1]
        all_layers_delta_list[-1] = self.last_layer_delta(predict,y) 



        for i in n_list_descending[1:-1]: # 3,2,1,0 

            w_next_layer = w_list[i]
            delta_next_layer = all_layers_delta_list[i - (n_layers-1)]  # -1,-2,-3,-4
            input_current_layer = layers_input[i - n_layers]  # -2,-3,-4,-5
            delta = self.delta_current_layer_calculator(w_next_layer, delta_next_layer, input_current_layer)
            all_layers_delta_list[i - n_layers] = delta

        return all_layers_delta_list
    
    
    def sig_derivative(self,x):
        return x*(1-x)
    

In [614]:
x = np.array([1,2,3])
y = np.array([2,3,3,1])


In [615]:
model = Neural_Network(input_shape, output_shape, hidden_shape, n_layers)


In [616]:
model.fit([x],[y], epochs=500)

In [617]:
model.predict(x)

array([1.65799657, 2.58158639, 1.03691455, 1.2186281 ])

In [207]:
def init_w_b_matrices(input_shape, output_shape, hidden_shape):
    w_list = []
    w_shapes = []
    b_list = []

    w_shapes.append((hidden_layers[0],input_shape))
    for i in range(len(hidden_shape)-1):
        w_shapes.append((hidden_shape[i+1], hidden_shape[i]))
    w_shapes.append((output_shape,hidden_shape[-1]))

    for i in range(len(w_shapes)):
        w = np.random.random((w_shapes[i][0], w_shapes[i][1]))
        w_list.append(w)

    #self.w_shapes = w_shapes


    for i in range(len(w_shapes)):
        b_list.append( np.random.random(w_shapes[i][0]))

    return w_list, b_list


def sigmoid(x):
    return 1/(1+ np.exp(-x))


def compile(self, input_shape, output_shape, hidden_shape=[4]):
        w_list, b_list = self.init_w_b_matrices(input_shape, output_shape, hidden_shape)
        return w_list, b_list
    
    
def feed_forward(self, w_list, b_list, x):
        hidden_values = []
        for i in range(len(hidden_shape)):
            hidden = sigmoid(w_list[i] @ x + b_list[i])
            hidden_values.append(hidden)
        output_value = w[-1] @ hidden_values[-1]
        
        return hidden_values
    
def MSE(y, y_pred):
    error = ((y - y_pred) **2).sum()
    return error