# Assignment 2

In [1]:
import numpy as np
import matplotlib.pyplot as plt

## Question 3: Implementing `Logistic` Class

In [2]:
class Logistic(object):
    '''
     act = Logistic()
     
     Creates an object that represents the logistic function.
     
     Usage:
      act = Logistic()
      act(np.array([0., 0.5]))
     produces the numpy array
      [0.5 , 0.62245933]
    '''
    def __init__(self):
        return
        
    def __call__(self, z):
        '''
         y = act(z)
         
         Evaluates the logistic function, element-by-element, on z.
         
         Input:
          z  is a numpy array
         Output:
          y  is a numpy array the same size as z
        '''
        self.input = np.copy(z)
        y = self.logistic_func(self.input)
        return y
    
    def logistic_func(self, z):
        '''
         Apply logistic function on ndarray.
         
         f(x) = 1 / (1 + e^-x)

        Input:
          z  is a numpy array
         Output:
          y  is a numpy array the same size as z
        '''
        return 1/(1+np.exp(-z))  
    
    def derivative(self):
        '''
         act.derivative()
         
         Computes and the derivative of the logistic function
         element-by-element.
         Note that the __call__ function must be called before this
         function can be called.
         
         Output:
           dactdz  array the same size as z when __call__ was called
           
         Usage:
           
           dactdz = act.derivative()
        '''
        dactdz = self.logistic_derivative_func(self.input)
        return dactdz
    
    def logistic_derivative_func(self, z):
        '''
        Compute the derivative on ndarray.
        
        f'(x) = - 1 / (1 + e^-x)^2 * (-e^-x) = e^-x / (1 + e^-x)^2

        Input:
          z  is a numpy array
         Output:
          y  is a numpy array the same size as z
        '''
        return np.exp(-z)/np.square((1+np.exp(-z)))


## Demonstrate `Logistic`

In [3]:
# Input array
z = np.array([[0.5, 0.1],[-0.4, 0.9], [-0.1, 0.4]])
# Get logistic operator
act = Logistic()
y = act(z)
print(y)

[[0.62245933 0.52497919]
 [0.40131234 0.7109495 ]
 [0.47502081 0.59868766]]


In [4]:
# Get derivatives
d = act.derivative()
print(d)

[[0.23500371 0.24937604]
 [0.24026075 0.20550031]
 [0.24937604 0.24026075]]


## Question 4: Implementing `CrossEntropy` class

In [5]:
class CrossEntropy(object):
    '''
     E = CrossEntropy()
     
     Creates an object that implements the average cross-entropy loss.
     
     Usage:
      E = CrossEntropy()
      loss = E(y, t)
    '''
    def __init__(self):
        return
    
    def __call__(self, y, t):
        '''
         E.__call__(y, t)  or   E(y, t)
         
         Computes the average cross-entropy between the outputs
         y and the targets t.
         
         Inputs:
           y  2D array with one sample per row
           t  array the same size as y
           
         Output:
           loss  average CE loss (scalar)
        '''
        # Cross Entropy formula
        self.n_samples = np.shape(t)[0]
        E = -np.sum(t*np.log(y+1e-9))/self.n_samples # Add small number to avoid log(0)
        self.dE = -t*(1/y+1e-9)/self.n_samples # Add small number to avoid 1/0
        return E

    def derivative(self):
        '''
         E.derivative()
         
         Computes and the derivative of cross-entropy with respect to y.
         Note that the __call__ function must be called before this
         function can be called.
         
         Output:
           dEdy  array the same size as y when __call__ was called
        '''
        return self.dE


## Demonstrate `CrossEntropy`

In [6]:
# Input
z = np.array([[0.9, 0.1],[0.5, 0.5], [0.2, 0.8]])
# Target
t = np.array([[1,0], [0,1], [1,0]])
E = CrossEntropy()
loss = E(z,t)
print(loss)

0.802648533513587


In [7]:
# Get derivatives
dE = E.derivative()
print(dE)

[[-0.37037037  0.        ]
 [ 0.         -0.66666667]
 [-1.66666667  0.        ]]


## Evaluate $\nabla_{\hspace{-1mm}z} E(y,t)$

In [8]:
# Input and target
z = np.array([[0.5, 0.1],[-0.4, 0.9], [-0.1, 0.4]])
t = np.array([[1, 0], [0, 1], [1, 0]])

# Get activation function and loss
act = Logistic()
E = CrossEntropy()

# Calculate loss
loss = E(act(z), t)

In [9]:
# Get derivatives
d = act.derivative()
dE = E.derivative()

print(d)
print(dE)

[[0.23500371 0.24937604]
 [0.24026075 0.20550031]
 [0.24937604 0.24026075]]
[[-0.53551022  0.        ]
 [ 0.         -0.46885655]
 [-0.70172364  0.        ]]


In [10]:
# By the chain rule
gradients = d * dE
print(gradients)

[[-0.12584689  0.        ]
 [ 0.         -0.09635017]
 [-0.17499306  0.        ]]
