In [None]:
import numpy as np

#HIDDEN LAYERS
#parent class - defines functions all children have (to get the function and it's derivative)
class ActivationFunction:
    def get_func(self):
        return self.function
    def get_deriv(self):
        return self.derivative
    #to initialized parameter matrix based on activation type, matrix already initialized to random
    def param_init_by_activ_type(self, V, M_last):
        return self.param_initializer(V, M_last)

#activation function (for hidden layer) 
class ReLU(ActivationFunction):
    def __init__(self):
        reLU = lambda x: np.maximum(0, x)
        self.function = reLU
        #reLU_der = lambda x: 0 if x < 0 else 1 
        reLU_der = lambda x: (x > 0) *1 # should be elementwise! make sure this is happening
        self.derivative = reLU_der
        #custom weight initialization based on af
        self.param_initializer = lambda V, M_last : V * np.sqrt(2/M_last) #He init for ReLu

#activation function (for hidden layer) 
class tanh(ActivationFunction):
    def __init__(self):
        tanh = lambda x: np.tanh(x)
        self.function = tanh
        tanh_der = lambda x: 1 - np.tanh(x) * np.tanh(x) #TODO check elementwise here
        self.derivative = tanh_der
        #custom weight initialization based on af
        self.param_initializer = lambda V, M_last : V * np.sqrt(1/M_last) #Xavier init for tanh

#activation function (for hidden layer)
#lambda is by default set to 0.2, but can be changed as HP
class LeakyReLU(ActivationFunction):
    def __init__(self, lam=0.2):
        self.function = self.leaky_ReLU
        self.derivative = self.der_leaky_ReLU
        self.lam = lam
        #custom weight initialization based on af
        self.param_initializer = lambda V, M_last : V * np.sqrt(2/M_last) #He init for ReLu
        
    #sum compacted, bc always > 0 with min addition
    def leaky_ReLU(self, x):                           
        y = np.where(x > 0, x, x * self.lam)                          
        return y

    def der_leaky_ReLU(self, x):
        y = np.where((x>0), 1, self.lam)
        return y

#FINAL LAYER: 
#activation function (for final layer) multiclass classification
class SoftMax(ActivationFunction):
    def __init__(self):
        softmax = lambda x: np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)
        self.function = softmax
       
class identity(ActivationFunction):
    def __init__(self):
        identity = lambda x: x
        self.function = identity

#activation function (for final or hidden layer) binary classification
class logistic(ActivationFunction):
    def __init__(self):
        logistic = lambda x: 1./ (1. + np.exp(-x))
        self.function = logistic
        logistic_der = lambda x: np.multiply(x, (1 - x))
        self.derivative = logistic_der