# Assignment 2

In [246]:
import numpy as np
import matplotlib.pyplot as plt
import copy

## Question 3: Implementing `Logistic` Class

In [247]:
class Logistic(object):
    '''
     act = Logistic()
     
     Creates an object that represents the logistic function.
     
     Usage:
      act = Logistic()
      act(np.array([0., 5.]))
     produces the numpy array
      [0.5 , 0.62245933]
    '''
    def __init__(self):
        return
        
    def __call__(self, z):
        '''
         y = act(z)
         
         Evaluates the logistic function, element-by-element, on z.
         
         Input:
          z  is a numpy array
         Output:
          y  is a numpy array the same size as z
        '''
        #===== YOUR CODE HERE =====
        # Saves the input array z
        self.input_array = copy.deepcopy(z)
        # Lambda function for the logistic function
        logistic_function = lambda x: 1/(1+np.exp(-x))
        # Apply the logistic function on each element of the input array
        y = logistic_function(self.input_array)
        return y
    
    def derivative(self):
        '''
         act.derivative()
         
         Computes and the derivative of the logistic function
         element-by-element.
         Note that the __call__ function must be called before this
         function can be called.
         
         Output:
           dactdz  array the same size as z when __call__ was called
           
         Usage:
           
           dactdz = act.derivative()
        '''
        #===== YOUR CODE HERE =====
        # Lambda function for the logistic function derivative
        logistic_derivative_function = lambda x: np.exp(-x)/((1+np.exp(-x))**2)
        # Apply the logistic function derivative on each element of the input array
        dactdz = logistic_derivative_function(self.input_array)
        return dactdz

## Demonstrate `Logistic`

In [248]:
#===== YOUR CODE HERE =====
# 3X2 array z
z = np.array([[0., 0.5],
              [1., 10.],
              [-2., -5.]])
# Get logistic
act = Logistic()
act(z)

array([[0.5       , 0.62245933],
       [0.73105858, 0.9999546 ],
       [0.11920292, 0.00669285]])

In [249]:
#===== YOUR CODE HERE =====
# Get logistic derivative
dactdz = act.derivative()
dactdz

array([[2.50000000e-01, 2.35003712e-01],
       [1.96611933e-01, 4.53958077e-05],
       [1.04993585e-01, 6.64805667e-03]])

## Question 4: Implementing `CrossEntropy` class

In [250]:
class CrossEntropy(object):
    '''
     E = CrossEntropy()
     
     Creates an object that implements the average cross-entropy loss.
     
     Usage:
      E = CrossEntropy()
      loss = E(y, t)
    '''
    def __init__(self):
        return
    
    def __call__(self, y, t):
        '''
         E.__call__(y, t)  or   E(y, t)
         
         Computes the average cross-entropy between the outputs
         y and the targets t.
         
         Inputs:
           y  2D array with one sample per row
           t  array the same size as y
           
         Output:
           loss  average CE loss (scalar)
        '''
        #===== YOUR CODE HERE =====
        # If input shape differs from each other throw an exception
        if y.shape != t.shape:
            raise ValueError
        # Saves the input array y and t
        self.input_y = copy.deepcopy(y)
        self.input_t = copy.deepcopy(t)
        # Initialize loss as 0
        loss = 0.
        # Get input shape
        self.shape = self.input_y.shape
        # Computes the CE for each element pair of y and t
        # Accumulates the loss at each iteration of the loop
        for i in range(self.shape[0]):
            for j in range(self.shape[1]):
                # Gets current y and t
                current_y = self.input_y[i][j]
                current_t = self.input_t[i][j]
                # Calculates the CE loss for a partifular pair and adds it to the loss
                loss += -current_t*np.log(current_y)-(1-current_t)*np.log(1-current_y)
        # Divides the total cross-entropy loss by the number of samples for the average
        loss /= self.shape[0]
        return loss

    def derivative(self):
        '''
         E.derivative()
         
         Computes and the derivative of cross-entropy with respect to y.
         Note that the __call__ function must be called before this
         function can be called.
         
         Output:
           dEdy  array the same size as y when __call__ was called
        '''
        #===== YOUR CODE HERE =====
        # Initialize array 
        dEdy = np.zeros(self.shape)
        # Computes the derivative of the CE for each element pair of y and t
        # Writes to the answer array at each iteration of the loop
        for i in range(self.shape[0]):
            for j in range(self.shape[1]):
                current_y = self.input_y[i][j]
                current_t = self.input_t[i][j]
                # Computes the derivative of the CE for each element pair of y and t
                dEdy[i][j] = (1-current_t)/(1-current_y)-current_t/current_y
                # Divide the derivative of the CE by the number of samples because
                # we are calculating the deriative of the average.
                dEdy[i][j] /= self.shape[0]
        return dEdy


## Demonstrate `CrossEntropy`

In [251]:
#===== YOUR CODE HERE =====
# 3X2 array y, t
y = np.array([[0.1, 0.9],
              [0.8, 0.3],
              [0.7, 0.6]])
t = np.array([[0, 1],
              [1, 0],
              [1, 1]])
# Get CE loss
E = CrossEntropy()
loss = E(y, t)
loss

0.5526800314244393

In [252]:
#===== YOUR CODE HERE =====
# Get CE loss derivative
E.derivative()

array([[ 0.37037037, -0.37037037],
       [-0.41666667,  0.47619048],
       [-0.47619048, -0.55555556]])

## Evaluate $\nabla_{\hspace{-1mm}z} E(y,t)$

In [253]:
#===== YOUR CODE HERE =====
# 3X2 array z, t
z = np.array([[0., 0.5],
              [1., 10.],
              [-2., -5.]])
t = np.array([[0, 1],
              [1, 0],
              [1, 1]])
# Get y and calc derivative of logistic
act = Logistic()
y = act(z)
dactdz = act.derivative()
# Calculate CrossEntropy derivative
E = CrossEntropy()
loss = E(y, t)
E_derivative = E.derivative()
# Multiply the derivatives of logistic and CrossEntropy together elementwise
# by the chain rule to get the derivative of the average cross-entropy
# with respect to each element in z
np.multiply(E_derivative, dactdz)

array([[ 0.16666667, -0.12584689],
       [-0.08964714,  0.3333182 ],
       [-0.29359903, -0.33110238]])