In [17]:
import numpy as np
import pandas as pd

### Define signature of a generic activation function class.

In [18]:
from abc import ABC, abstractmethod
class ActivationFunction(ABC):
    def __init__(self):
        pass
    @abstractmethod
    def function(self):
        pass
    @abstractmethod
    def derivative(self):
        pass
    @abstractmethod
    def __call__(self):
        pass

### Define all activation functions

In [76]:
class Sigmoid(ActivationFunction):
    def __call__(self, x):
        return self.function(x)
    def function(self, x):
        return 1/(1+np.exp(-x))
    def derivative(self, x):
        return self.function(x)*(1-self.function(x))

class Tanh(ActivationFunction):
    def __call__(self, x):
        return self.function(x)
    def function(self, x):
        expression = np.exp(2*x)
        return (expression-1)/(expression+1)
    def derivative(self, x):
        expr1, expr2 = np.exp(x), np.exp(-x)
        return 4/((expr1+expr2)**2)

class ReLU(ActivationFunction):
    def __call__(self, x):
        return self.function(x)
    def function(self, x):
        return max(0, x)
    def derivative(self, x):
        if max(0,x) == 0:
            return 0
        return 1

class LeakyReLU(ActivationFunction):
    def __call__(self, x):
        return self.function(x)
    def function(self, x):
        return max(0.01*x, x)
    def derivative(self, x):
        if max(0.01*x, x) == x:
            return 1
        return 0.01

### Import existing loss functions and code new ones

In [72]:
def huberLoss(y_true, y_pred, delta=10):
    err = y_true - y_pred
    abs_err = np.abs(err)
    delta_sq = 0.5*(delta ** 2)
    huber_loss_vectorized = np.vectorize(lambda x: (x**2)*0.5 if x <= delta else delta*x - delta)
    huber_loss_vec = huber_loss_vectorized(abs_err)
    return np.sum(huber_loss_vec)
    # return np.sum(huber_loss_vectorized(abs_err))

### Define Layer and Sequential Model

In [77]:
class Layer():
    
    __valid_activations = {'sigmoid': Sigmoid, 'tanh': Tanh, 'relu': ReLU, 'leaky_relu': LeakyReLU}
    
    def __init__(self, activation, in_dim, out_dim, learning_rate=0.01):
        if activation.lower() not in list(Layer.__valid_activations.keys()):
            raise Exception(f"Valid activations are {Layer.__valid_activations}.")
        self.activation = Layer.__valid_activations[activation.lower()]()
        self.learning_rate = learning_rate
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.weights = np.random.uniform(-1,1, size=(in_dim, out_dim))

    def forward_compute(self, X):
        output_prime = X.dot(self.weights) # of order n x out_dim
        output_val = self.activation(output_prime) # of order n x out_dim
        
        # computing stuff for eventual backpropagation
        self._activation_gradient = self.activation.derivative(output_prime) # of order n x out_dim
        self._input = X
        
        return output_val

    def backprop_compute(self, prev_grad_multipliers):
        gradient_mat = 1
        
        # To Do: find a way of multiplying pre v_grad_multipliers with this layer's gradient multiplier matrix.
        '''
        Needs: 
         1. current-layer's activation gradient matrix(as a function of this layer's input, i.e. prev layers output)
         2. current-layer's input matrix
        '''

        activ_prev_layer_output_element_wise_product = np.multiply(prev_grad_multipliers, self._activation_gradient) # of order n x out_dim
        weights_gradient = self._input.T.dot(activ_prev_layer_output_element_wise_product)
        self.weights -= self.learning_rate * weights_gradient        

        send_mat_to_prev_layer = activ_prev_layer_output_element_wise_product.dot(self.weights.T) # of order n x in_dim, 
                                                                      # which is basically n x out_dim for the previous layer.
        return send_mat_to_prev_layer

In [73]:
y1 = np.random.randint(1,10,(4,))
y2 = np.random.randint(1,10,(4,))
print(y1, "\n", y2, "\n\n", huberLoss(y1, y2, delta=4))

[4 9 5 3] 
 [3 1 7 3] 

 30.5


In [None]:
from sklearn.metrics import log_loss, mean_squared_error, mean_absolute_error, 

class ModelSequential():
    __valid_loss_functions = {
        'binary_crossentropy': BinaryCrossEntropy, 'categorical_crossentropy': CategoricalCrossEntropy, 
        'mse': MeanSquaredError
    }

    # def __init__(self, layers_arr: List, loss_function, metrics_to_track, early_stopping=False):
    def __init__(self, layers_arr: List, loss_function):
        self.__layers = layers_arr
            

### Test

In [74]:
def get_y(x):
    return x[:,0] + (x[:,1]**2) + np.abs(x[:, 2])

x_10K = np.random.randint(-100,100,size=(10000, 3))
y_10K = get_y(x_10K)

x_100K = np.random.randint(-100,100,size=(100000, 3))
y_100K = get_y(x_10K)

x_1M = np.random.randint(-100,100,size=(1000000, 3))
y_1M = get_y(x_1M)

x_10M = np.random.randint(-100,100,size=(10000000, 3))
y_10M = get_y(x_10M)

print(sys.getsizeof(x_10K), sys.getsizeof(x_100K), sys.getsizeof(x_1M), sys.getsizeof(x_10M))
print()
print(sys.getsizeof(y_10K), sys.getsizeof(y_100K), sys.getsizeof(y_1M), sys.getsizeof(x_10M))

240120 2400120 24000120 240000120

80104 80104 8000104 240000120


In [78]:
l = Layer('Tanh', 3, 1, 0.01)
y_10k_pred = l.forward_compute(x_10K)
print(f"Huber loss = {huberLoss(y_10K, y_10k_pred)}")

Huber loss = 3372444578134.9873
