In [2]:
import numpy as np

def relu(Z):
    return np.maximum(0,Z)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy = True)
    dZ[Z <= 0] = 0;
    return dZ
    
def cross_entropy_loss(y_hat, y):
    y = y.astype(int)
    y = np.eye(y_hat.shape[0])[y]
    y_hat = y_hat.T
    return -1/len(y) * np.sum(y * np.log(y_hat + 10**-100))


# All of the neuron layers
class ClassificationModel():
    
    def __init__(self, layers, learning_rate, epochs, batch_size):
        self.layers = layers
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.cost_history_train = []
        self.cost_history = []
        self.accuracy_history = []
    
    
    def get_accuracy(self,Y_hat, Y):
        return (np.round(np.argmax(Y_hat, axis=0)) == Y).mean()
        
    
    def mean_absolute_error(self, y_true, y_pred):
        return - np.mean(np.abs(y_true - y_pred))
    
    
    # Get the derivative of softmax
    def get_dZ(self, y, y_true):
        return y - y_true
            
    
    # The propagation of all neuron layers.
    # Returns the last output.
    def full_forward_propagation(self, input_data):
        curr_a = input_data
        
        for layer in self.layers:
            prev_a = curr_a
            curr_a = layer.forward_propagation(prev_a)
            
        return curr_a

        
    def full_backward_propagation(self, Y_hat, Y):
        Y = np.eye(Y_hat.shape[0])[Y]
        
        dA_prev = self.get_dZ(Y_hat, Y.T)  # here it's actually dZ

        for layer in reversed(self.layers):
            dA_curr = dA_prev
            dA_prev, dW, db = layer.backward_propagation(dA_curr)
        
    
    def update(self):
        for layer in self.layers:
            layer.weights = layer.weights - self.learning_rate * layer.dW
            layer.bias = layer.bias - self.learning_rate * layer.db
            
    
    def predict(self, X):
        p = self.full_forward_propagation(X)
        return np.round(p).astype(int)
       
    
    def predict_no_change(self, X):
        return self.full_forward_propagation(X)
    
    
    def train(self,X_train,Y_train, X_val, Y_val):
        for i in range(self.epochs):
            
            batches_num = int(len(X_train) / self.batch_size)
            
            for batch in range(batches_num - 1):
                x = (X_train[batch*self.batch_size:(batch+1)*self.batch_size]).T
                y = (Y_train[batch*self.batch_size:(batch+1)*self.batch_size]).astype(int)
                Y_hat = self.full_forward_propagation(x)
                self.full_backward_propagation(Y_hat, y)
                self.update()
                
            y_pred = self.predict(X_val.T)
            accuracy_val = self.get_accuracy(y_pred, Y_val.T)
            train_loss = cross_entropy_loss(Y_hat, y)
            
#             print("accuracy: " + str(accuracy_val))
#             print("loss: " + str(train_loss))
            
            self.cost_history_train.append(train_loss)
            self.cost_history.append(cross_entropy_loss(self.predict_no_change(X_val.T), Y_val))
            self.accuracy_history.append(accuracy_val)
            
        y_pred = self.predict(X_val.T)
        accuracy_val = self.get_accuracy(y_pred, Y_val.T)
        print("validation accuracy: " + str(accuracy_val))
        return accuracy_val