# Activation functions.

In [1]:
import numpy as np

## Activations are represented as classes.

In [11]:
class ReLU():
    def __init__(self):
        self.out_vector = None
    def forward(self,input_vector):
        out_vec = np.where(input_vector>0,input_vector,0)
        self.out_vector = out_vec
        return out_vec
    def backward(self):
        gradient = np.where(self.out_vector>0,1,0)
        return gradient


class Sigmoid():
    def __init__(self):
        self.out_vec = None
    def forward(self,in_vec):
        in_vec = np.clip(in_vec,-500,500)
        out_vec = 1/(1+np.exp(-in_vec))
        self.out_vec = out_vec
        return out_vec
    def backward(self):
        gradient = self.out_vec*(1-self.out_vec)
        return gradient

class Tanh():
    def __init__(self):
        self.out_vec = None
    def forward(self,in_vec):
        in_vec = np.clip(in_vec,-500,500)
        out_vec = np.tanh(in_vec)
        self.out_vec = out_vec
        return out_vec
    def backward(self):
        gradient = 1-np.square(self.out_vec)
        return gradient
    
class Softmax():
    def __init__(self):
        self.out_vec = None
    
    def forward(self,x):
        #softmax is rowwise
        x = x-np.max(x,axis=1,keepdims=True)
        expX = np.exp(x)
        expSum = np.sum(expX,axis=1,keepdims=True)
        self.out_vec = expX/expSum
        return self.out_vec
    
    def backward(self):
        #Jacobians = tensor of shape batch_size,num_classes,num_classes
        Jacobians = np.zeros(shape=(self.out_vec.shape[0],self.out_vec.shape[1],self.out_vec.shape[1]))
        # mask with ones everywhere except on the diagonal. its basicall a matrix of 1s - a diagonal matrix of 1s. eye is a numpy function that creates such a matrix.
        mask = np.ones(shape=(self.out_vec.shape[1],self.out_vec.shape[1])) - np.eye(self.out_vec.shape[1],self.out_vec.shape[1])
        for x in range(self.out_vec.shape[0]):
            off_diagonal = np.outer(self.out_vec[x],-self.out_vec[x])
            off_diagonal = off_diagonal*mask
            diagonal = np.diag(self.out_vec[x]*(1-self.out_vec[x]))
            Jacobian = off_diagonal + diagonal
            Jacobians[x] = Jacobian
        return Jacobians



## Testing activation functions.

In [14]:
#checking shapes of forward and backward pass
ReLU_inst = ReLU()
Tanh_inst = Tanh()
Sigmoid_inst = Sigmoid()
Softmax_inst = Softmax()
actviations = [ReLU_inst,Tanh_inst,Sigmoid_inst,Softmax_inst]
input_data = np.array([[1,2,3],[4,5,6]])
forward_shape = (2,3)
for idx,actviation in enumerate(actviations):
    activation_shape = actviation.forward(input_data).shape
    print(activation_shape)
    if activation_shape != forward_shape:
        print(f'{idx} has incorrect shape')

(2, 3)
(2, 3)
(2, 3)
(2, 3)


In [15]:
for idx,actviation in enumerate(actviations):
    activation_shape = actviation.backward().shape
    print(activation_shape)

(2, 3)
(2, 3)
(2, 3)
(2, 3, 3)


### Expected shapes of forward and backward pass as expected.

In [16]:
#testing expected values
print('Sigmoid')
print('Expected = [[0.73105858 0.88079708 0.95257413],[0.98201379 0.99330715 0.99752738]]')
print(f'Result = {Sigmoid_inst.forward(input_data)}')

Sigmoid
Expected = [[0.73105858 0.88079708 0.95257413],[0.98201379 0.99330715 0.99752738]]
Result = [[0.73105858 0.88079708 0.95257413]
 [0.98201379 0.99330715 0.99752738]]


In [17]:
print('Tanh')
print('Expected = [[0.76159416 0.96402758 0.99505475],[0.99932930 0.99990920 0.99998771]]')
print(f'Result = {Tanh_inst.forward(input_data)}')

Tanh
Expected = [[0.76159416 0.96402758 0.99505475],[0.99932930 0.99990920 0.99998771]]
Result = [[0.76159416 0.96402758 0.99505475]
 [0.9993293  0.9999092  0.99998771]]


In [18]:
print('Relu')
print('Expected =[[1 2 3],[4 5 6]]')
print(f'Result = {ReLU_inst.forward(input_data)}')

Relu
Expected =[[1 2 3],[4 5 6]]
Result = [[1 2 3]
 [4 5 6]]


In [19]:
print('Softmax')
print('Expected = [[0.09003057 0.24472847 0.66524096],[0.09003057 0.24472847 0.66524096]]')
print(f'Result = {Softmax_inst.forward(input_data)}')

Softmax
Expected = [[0.09003057 0.24472847 0.66524096],[0.09003057 0.24472847 0.66524096]]
Result = [[0.09003057 0.24472847 0.66524096]
 [0.09003057 0.24472847 0.66524096]]


In [20]:
#Testing softmax backwards pass
print('Softmax')
print('Expected = [[ 0.08192507 -0.02204578 -0.05987929],[-0.02204578  0.18483645 -0.16279067],[-0.05987929 -0.16279067  0.22266996]]')
print(f'Result = {Softmax_inst.backward()}')

Softmax
Expected = [[ 0.08192507 -0.02204578 -0.05987929],[-0.02204578  0.18483645 -0.16279067],[-0.05987929 -0.16279067  0.22266996]]
Result = [[[ 0.08192507 -0.02203304 -0.05989202]
  [-0.02203304  0.18483645 -0.1628034 ]
  [-0.05989202 -0.1628034   0.22269543]]

 [[ 0.08192507 -0.02203304 -0.05989202]
  [-0.02203304  0.18483645 -0.1628034 ]
  [-0.05989202 -0.1628034   0.22269543]]]
