# Week 02 Exercises: Loss & Activation Functions

In [130]:
import numpy as np
import copy
import matplotlib.pyplot as plt

## (a) Complete `Identity` and `MSE` class implementations

<div class="alert alert-block alert-info">
    Complete the <code>__call__</code> and <code>derivative</code> functions.
</div>

In [132]:
class Identity(object):
    '''
     act = Identity()
     
     Creates an object that represents the identity mapping.
     
     Usage:
      act = Identity()
      act(np.array([[1.2, 5.]]))
     produces the numpy array
      [[1.2, 5.]]
    '''
    def __init__(self):
        return
        
    def __call__(self, z):
        '''
         y = act(z)
         
         Evaluates the identity function, element-by-element, on z.
         
         Input:
          z  is a numpy array
         Output:
          y  is a numpy array the same size as z
        '''
        self.dims = z.shape
        y = copy.deepcopy(z)
        return y
    
    def derivative(self):
        '''
         act.derivative()
         
         Computes and the derivative of the identity mapping
         element-by-element.
         Note that the __call__ function must be called before this
         function can be called.
         
         Output:
           dactdz  array the same size as z when __call__ was called
           
         Usage:
           
           dactdz = act.derivative()
        '''
        # Compute the derivatives
        return np.ones(self.dims)


In [133]:
class MSE(object):
    '''
     E = MSE()
     
     Creates an object that implements the mean squared error loss.
     
     Usage:
      E = MSE()
      loss = E(y, t)
      
     Example:
      y = np.array([[0.5, 0.1],[-0.4, 0.9], [-0.1, 0.4]])
      t = np.array([[0.6, 0.1],[-0.4, 0.7], [-0.1, 0.6]])
      loss = E(y, t)
     produces the value
      0.015  since it equals
      (0.1^2 + 0.2^2 + 0.2^2)/2 / 3
    '''
    def __init__(self):
        self.dE = []
    
    def __call__(self, y, t):
        '''
         E.__call__(y, t)  or   E(y, t)
         
         Computes the mean (average) squared error between the outputs
         y and the targets t.
         
         Inputs:
           y  array with one sample per row
           t  array the same size as y
           
         Output:
           loss  MSE loss (scalar)
        '''
        # MSE formula
        self.n_samples = np.shape(t)[0]
        E = np.sum((y-t)**2)/2./self.n_samples
        self.dE = (y-t) / self.n_samples
        return E

    def derivative(self):
        '''
         E.derivative()
         
         Computes and the derivative of the MSE with respect to y.
         Note that the __call__ function must be called before this
         function can be called.
         
         Output:
           dEdy  array the same size as y when __call__ was called
        '''
        # Compute the gradient of MSE w.r.t. output
        return self.dE


## (b) Test `Identity` class

<div class="alert alert-block alert-info">
    Add some code to the notebook that creates a 2D array <code>z</code>, and applies the identity function to it, yielding <code>y</code>. Compute the derivative of the identity function for that data. Does it make sense?
</div>

The output of the code
```
  z = np.array([[0.5, 0.1],[-0.4, 0.9], [-0.1, 0.4]])
  y = act(z)
```
cell below should be
```
[[ 0.5  0.1]
 [-0.4  0.9]
 [-0.1  0.4]]
 ```

In [134]:
act = Identity()
z = np.array([[0.5, 0.1],[-0.4, 0.9], [-0.1, 0.4]])
y = act(z)
print(y)


[[ 0.5  0.1]
 [-0.4  0.9]
 [-0.1  0.4]]


In [135]:
dactdz = act.derivative()
print(dactdz)


[[1. 1.]
 [1. 1.]
 [1. 1.]]


## (c) Test `MSE` class

<div class="alert alert-block alert-info">
    Create another 2D array <code>t</code> by adding Gaussian noise to <code>y</code> (see <code>numpy.random.normal</code>). Then compute the MSE between <code>y</code> and <code>t</code>. Also, evaluate the derivative of the MSE (with respect to <code>y</code>) at <code>(y, t)</code>. Does it make sense?
</div>

The output of the code
```
  y = np.array([[0.5, 0.1],[-0.4, 0.9], [-0.1, 0.4]])
  t = np.array([[0.6, 0.1],[-0.4, 0.7], [-0.1, 0.6]])
  E = MSE()
  loss = E(y, t)
```
should be
```
  0.015
```

In [138]:
y = np.array([[0.5, 0.1],[-0.4, 0.9], [-0.1, 0.4]])
t = np.array([[0.6, 0.1],[-0.4, 0.7], [-0.1, 0.6]])
E = MSE()
loss = E(y, t)
print(loss)


0.015


In [139]:
print(E.derivative())


[[-0.03333333  0.        ]
 [ 0.          0.06666667]
 [ 0.         -0.06666667]]
