In [5]:
import numpy as np

def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def sigmoid_derivative(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

def relu(Z):
    return np.maximum(0,Z)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy = True)
    dZ[Z <= 0] = 0;
    return dZ

# suffers from vanishing gradient problem
def tanh(x):
    return np.tanh(x)

def tanh_derivative(dA, Z):
    return 1 - tanh(Z)*tanh(Z)

def linear(x):
    return x

def linear_derivative(dA, Z):
    return dA
    
    
# binary cross entropy loss
def binary_cross_entropy_loss(Y_hat, Y):
    m = Y_hat.shape[1]
    cost = -1 / m * (np.dot(Y.T, np.log(Y_hat).T) + np.dot(1 - Y.T, np.log(1 - Y_hat).T)) #+T Y
    return cost
    
    
# mean squared error loss
def mean_squared_error_loss(Y_hat, Y):
    differences = np.subtract(Y, Y_hat)
    squared_differences = np.square(differences)
    return np.mean(squared_differences)


# All of the neuron layers
class Model():
    
    def __init__(self, layers, learning_rate, epochs, batch_size, model_type="c"):
        self.layers = layers
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.cost_history_train = []
        self.cost_history = []
        self.accuracy_history = []
        self.model_type = model_type
        if (model_type == "c"):
            self.cost_function = binary_cross_entropy_loss
        elif (model_type == "r"):
            self.cost_function = mean_squared_error_loss
    
    
    # The propagation of all neuron layers.
    # Returns the last output.
    def full_forward_propagation(self, input_data):
        curr_a = input_data
        
        for layer in self.layers:
            prev_a = curr_a
            curr_a = layer.forward_propagation(prev_a)
            
        return curr_a

    
    def get_accuracy(self,Y_hat, Y):
        if self.model_type == "c":
            return (np.round(Y_hat) == Y).all(axis=0).mean()
        elif self.model_type == "r":
            return self.mean_absolute_error(Y,Y_hat)
        
    
    def mean_absolute_error(self, y_true, y_pred):
        return - np.mean(np.abs(y_true - y_pred))
    
    
    def get_initial_dA(self, Y_hat, Y):
        m = Y.shape[0]
        
        if self.model_type == "c":
            return - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1- Y_hat))
        elif self.model_type == "r":
            return (Y_hat - Y) #(1/m)*
            
        
    def full_backward_propagation(self, Y_hat, Y):
        Y = Y.T
        
        # use cost function for first value
        dA_prev = self.get_initial_dA(Y_hat, Y)

        for layer in reversed(self.layers):
            dA_curr = dA_prev
            dA_prev, dW_curr, db_curr = layer.backward_propagation(dA_curr)
        
    
    def update(self):
        for layer in self.layers:
            layer.weights = layer.weights - self.learning_rate * layer.dW
            layer.bias = layer.bias - self.learning_rate * layer.db
            
    
    def predict(self, X):
        p = self.full_forward_propagation(X)
        
        if self.model_type == "c":
            return np.round(p).astype(int)
        elif self.model_type == "r":
            return p
       
    
    def predict_no_change(self, X):
        return self.full_forward_propagation(X)
    
    
    def train(self,X_train,Y_train, X_val, Y_val):
        for i in range(self.epochs):
            
            batches_num = int(len(X_train) / self.batch_size)
            
            for batch in range(batches_num - 1):
                x = (X_train[batch*self.batch_size:(batch+1)*self.batch_size]).T
                y = (Y_train[batch*self.batch_size:(batch+1)*self.batch_size])
                Y_hat = self.full_forward_propagation(x)
                self.full_backward_propagation(Y_hat, y)
                self.update()
            
            y_pred = self.predict(X_val.T)
            accuracy_val = self.get_accuracy(y_pred, Y_val.T)
            
            self.cost_history_train.append(self.cost_function(Y_hat, y))
            self.cost_history.append(self.cost_function(self.predict_no_change(X_val.T), Y_val))
            self.accuracy_history.append(accuracy_val)
            
        y_pred = self.predict(X_val.T)
        accuracy_val = self.get_accuracy(y_pred, Y_val.T)
        print("validation accuracy: " + str(accuracy_val))
        return accuracy_val