In [2]:
import numpy as np
import math

In [4]:

import numpy as np


def hello_do_you_copy():
    """
    This is a sample function that we will try to import and run to ensure that
    our environment is correctly set up on Google Colab.
    """
    print("Roger that from relu.py!")

class ReLU:
    """
    An implementation of rectified linear units(ReLU)
    """

    def __init__(self):
        self.cache = None
        self.dx = None

    def forward(self, x):
        '''
        The forward pass of ReLU. Save necessary variables for backward
        :param x: input data
        :return: output of the ReLU function
        '''
        out = np.maximum(0, x)
        #############################################################################
        # TODO: Implement the ReLU forward pass.                                    #
        #############################################################################

        #############################################################################
        #                              END OF YOUR CODE                             #
        #############################################################################
        self.cache = x
        return out

    def backward(self, dout):
        """
        :param dout: the upstream gradients
        :return:
        """
        dx, x = None, self.cache

        dx = np.greater(x, 0).astype(int)
        dx = dx*dout  #unsure on order
        #############################################################################
        # TODO: Implement the ReLU backward pass.                                   #
        #############################################################################

        #############################################################################
        #                              END OF YOUR CODE                             #
        #############################################################################
        self.dx = dx


def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))


def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
    """
    a naive implementation of numerical gradient of f at x
    - f should be a function that takes a single argument
    - x is the point (numpy array) to evaluate the gradient at
    """

    fx = f(x)  # evaluate function value at original point
    grad = np.zeros_like(x)
    # iterate over all indexes in x
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:

        # evaluate function at x+h
        ix = it.multi_index
        oldval = x[ix]
        x[ix] = oldval + h  # increment by h
        fxph = f(x)  # evalute f(x + h)
        x[ix] = oldval - h
        fxmh = f(x)  # evaluate f(x - h)
        x[ix] = oldval  # restore

        # compute the partial derivative with centered formula
        grad[ix] = (fxph - fxmh) / (2 * h)  # the slope
        if verbose:
            print(ix, grad[ix])
        it.iternext()  # step to next dimension

    return grad


def eval_numerical_gradient_array(f, x, df, h=1e-5):
    """
    Evaluate a numeric gradient for a function that accepts a numpy
    array and returns a numpy array.
    """
    grad = np.zeros_like(x)
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        ix = it.multi_index

        oldval = x[ix]
        x[ix] = oldval + h
        pos = f(x)
        x[ix] = oldval - h
        neg = f(x)
        x[ix] = oldval

        grad[ix] = np.sum((pos - neg) * df) / (2 * h)
        it.iternext()
    return grad

In [6]:

#check ReLU

def _relu_forward(x):
    relu = ReLU()
    return relu.forward(x)

def test_forward():
    x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4)
    relu = ReLU()
    out = relu.forward(x)
    correct_out = np.array([[0., 0., 0., 0., ],
                            [0., 0., 0.04545455, 0.13636364, ],
                            [0.22727273, 0.31818182, 0.40909091, 0.5, ]])
    diff = rel_error(out, correct_out)
    print(diff)

def test_backward():
    x = np.random.randn(10, 10)
    dout = np.random.randn(*x.shape)

    dx_num = eval_numerical_gradient_array(lambda x: _relu_forward(x), x, dout)

    relu = ReLU()
    out = relu.forward(x)
    relu.backward(dout)
    dx = relu.dx

    print("dx_num shape: ", dx_num.shape, 'dx shape: ', dx.shape, "out shape", out.shape)
    print(out)
    print(dx_num)

test_backward()

dx_num shape:  (10, 10) dx shape:  (10, 10) out shape (10, 10)
[[0.         0.         0.         0.48935178 1.16253407 0.29912946
  0.86840123 0.26602623 0.         0.4459056 ]
 [0.         0.49350853 0.         0.59829199 0.         0.
  0.46273842 0.32473276 0.         0.76115997]
 [0.         0.         0.         0.8710263  0.         0.
  0.         0.         0.         0.17183249]
 [0.73248082 0.         0.64344148 0.33818439 0.         0.
  1.50892657 0.         0.         0.57025269]
 [0.         0.         0.         0.         0.         1.01276412
  1.1309099  0.         0.         0.        ]
 [0.88505655 0.68887457 0.0946157  0.         1.44432825 2.04198305
  0.         0.         1.45888539 0.        ]
 [1.0331871  0.54801606 0.83363271 0.         1.63566713 1.24234057
  0.         0.         0.00760195 0.9472641 ]
 [0.         2.7575342  0.75156802 0.73110151 0.         0.
  1.74563556 0.         0.         0.87005336]
 [0.         0.3651147  0.         0.         0.0

In [7]:
#check max_pool
class MaxPooling:
    """
    Max Pooling of input
    """

    def __init__(self, kernel_size, stride):
        self.kernel_size = kernel_size
        self.stride = stride
        self.cache = None
        self.dx = None

    def forward(self, x):
        """
        Forward pass of max pooling
        :param x: input, (N, C, H, W)
        :return: The output by max pooling with kernel_size and stride
        """
        N = x.shape[0]
        C = x.shape[1]
        H = x.shape[2]
        W = x.shape[3]
        H_out = math.floor((H - self.kernel_size)/self.stride + 1)
        W_out = math.floor((W - self.kernel_size)/self.stride + 1)

        #initialize output
        out = np.zeros((N, C, H_out, W_out))

        for n in range(N):
            for c in range(C):
                for hi in range(H_out):
                    for wi in range(W_out):
                            out[n, c, hi, wi] = np.max(x[n, c, hi*self.stride:(hi*self.stride + self.kernel_size), wi*self.stride:(wi*self.stride + self.kernel_size)])

        self.cache = (x, H_out, W_out)
        return out

    def backward(self, dout):

        x, H_out, W_out = self.cache

        #dL_dx = 1 only for index positions in input array x in which value was the max value in pooling operation
        out = self.forward(x)
        self.dx = np.zeros_like(x)

        N = x.shape[0]
        C = x.shape[1]
        H = x.shape[2]
        W = x.shape[3]

        for n in range(N):
            for c in range(C):
                for hi in range(H_out):
                    for wi in range(W_out):
                            slice = x[n, c, hi*self.stride:(hi*self.stride + self.kernel_size), wi*self.stride:(wi*self.stride + self.kernel_size)]
                            # print(slice.flatten())
                            max_idx = np.argmax(slice)
                            # print(max_idx)
                            max_pos = list(np.unravel_index(max_idx, (1,1,self.kernel_size, self.kernel_size)))
                            #correct index for which area of x max pool came from - H, W increased by stride and kernel size
                            max_pos[2] = max_pos[2] + hi*self.stride   #height position
                            max_pos[3]  = max_pos[3] + wi*self.stride    #width position

                            #troubleshooting
                            # print("max idx ", max_idx)
                            print("max_pos: ", max_pos)
                            self.dx[tuple(max_pos)] = 1

#test function

x_shape = (2, 3, 4, 4)
x = np.linspace(-0.3, 0.4, num=np.prod(x_shape)).reshape(x_shape)
maxpool = MaxPooling(kernel_size=2, stride=2)
out = maxpool.forward(x)

correct_out = np.array([[[[-0.26315789, -0.24842105],
                          [-0.20421053, -0.18947368]],
                         [[-0.14526316, -0.13052632],
                          [-0.08631579, -0.07157895]],
                         [[-0.02736842, -0.01263158],
                          [0.03157895, 0.04631579]]],
                        [[[0.09052632, 0.10526316],
                          [0.14947368, 0.16421053]],
                         [[0.20842105, 0.22315789],
                          [0.26736842, 0.28210526]],
                             [[0.32631579, 0.34105263],
                              [0.38526316, 0.4]]]])
# print(correct_out.shape)
# print(out.shape)
# print(out)

In [95]:
#check backprop
# print(x)
maxpool.backward(out)
dx = maxpool.dx

dx_num = eval_numerical_gradient_array(lambda x: maxpool._pool_forward(x), x, dout)
print("correct ans: ", dx_num)
print(dx)

max_pos:  [0, 0, 1, 1]
max_pos:  [0, 0, 1, 3]
max_pos:  [0, 0, 3, 1]
max_pos:  [0, 0, 3, 3]
max_pos:  [0, 0, 1, 1]
max_pos:  [0, 0, 1, 3]
max_pos:  [0, 0, 3, 1]
max_pos:  [0, 0, 3, 3]
max_pos:  [0, 0, 1, 1]
max_pos:  [0, 0, 1, 3]
max_pos:  [0, 0, 3, 1]
max_pos:  [0, 0, 3, 3]
max_pos:  [0, 0, 1, 1]
max_pos:  [0, 0, 1, 3]
max_pos:  [0, 0, 3, 1]
max_pos:  [0, 0, 3, 3]
max_pos:  [0, 0, 1, 1]
max_pos:  [0, 0, 1, 3]
max_pos:  [0, 0, 3, 1]
max_pos:  [0, 0, 3, 3]
max_pos:  [0, 0, 1, 1]
max_pos:  [0, 0, 1, 3]
max_pos:  [0, 0, 3, 1]
max_pos:  [0, 0, 3, 3]
[[[[0. 0. 0. 0.]
   [0. 1. 0. 1.]
   [0. 0. 0. 0.]
   [0. 1. 0. 1.]]

  [[0. 0. 0. 0.]
   [0. 0. 0. 0.]
   [0. 0. 0. 0.]
   [0. 0. 0. 0.]]

  [[0. 0. 0. 0.]
   [0. 0. 0. 0.]
   [0. 0. 0. 0.]
   [0. 0. 0. 0.]]]


 [[[0. 0. 0. 0.]
   [0. 0. 0. 0.]
   [0. 0. 0. 0.]
   [0. 0. 0. 0.]]

  [[0. 0. 0. 0.]
   [0. 0. 0. 0.]
   [0. 0. 0. 0.]
   [0. 0. 0. 0.]]

  [[0. 0. 0. 0.]
   [0. 0. 0. 0.]
   [0. 0. 0. 0.]
   [0. 0. 0. 0.]]]]


In [3]:
#understand np.unravel_index - unlike sample_array.argmax(), it shows the index of the multi-dimensional array where the max is, not the index of the flattened version

arr = np.arange(20).reshape(5, 4)
print("arr: ", arr)
x = arr.argmax()
print("x: ", x)
dims = arr.shape
print("dims array: ", dims)
idx = np.unravel_index(x, dims)
print("idx using unravel_index: ", idx)
print(arr[idx] == arr.max())



arr:  [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]]
x:  19
dims array:  (5, 4)
idx using unravel_index:  (4, 3)
True


In [5]:
#for np.unravel_index, input indices for flattened version of array, get back tuple of two arrays (row positions array, col positions array) if list of inputs, or single tuple if single input
#so 22 -> (3,4), 41 -> (6,5) 37 -> (6,1)
max_indices = np.unravel_index([22, 41, 37], (7,6))
print('max indices: ', max_indices)
print('max for 22: ', max_indices[0][0], max_indices[1][0])

max indices:  (array([3, 6, 6], dtype=int64), array([4, 5, 1], dtype=int64))
max for 22:  3 4


In [9]:
max_pos = np.unravel_index(2, (2,3,2,2))

temp = out.copy()
# print("temp: ", temp)
print("max pos", max_pos)
temp[max_pos] = 1
print("temp: ", temp)

max pos (0, 0, 1, 0)
temp:  [[[[-0.26315789 -0.24842105]
   [ 1.         -0.18947368]]

  [[-0.14526316 -0.13052632]
   [-0.08631579 -0.07157895]]

  [[-0.02736842 -0.01263158]
   [ 0.03157895  0.04631579]]]


 [[[ 0.09052632  0.10526316]
   [ 0.14947368  0.16421053]]

  [[ 0.20842105  0.22315789]
   [ 0.26736842  0.28210526]]

  [[ 0.32631579  0.34105263]
   [ 0.38526316  0.4       ]]]]


In [58]:
#max_pooling back prop
np.argmax(out)

23

In [13]:
#practice reshaping
num_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3

input_size = num_inputs * np.prod(input_shape)
weight_size = output_dim * np.prod(input_shape)

x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
b = np.linspace(-0.3, 0.1, num=output_dim)

print(x.shape)
x = np.reshape(x, (x.shape[0], np.prod(np.array(x.shape[1:]))))
print(x.shape)

(2, 4, 5, 6)
(2, 120)
