# 2. Convolution and Pooling (10 - 10 pts)

### a) Write a function for convolution operation in the cell below. Write your own code with your own algorithm. It should have necessary error checks for matrix dimensions (consider multichannel matrices). Inputs must have the same structure as Tensorflow convolution operation and also the results must be same. Check [tf.nn.convolution](https://www.tensorflow.org/api_docs/python/tf/nn/convolution)

In [0]:
%tensorflow_version 1.x" before "import tensorflow
import tensorflow as tf
import numpy as np
from math import floor, ceil

`%tensorflow_version` only switches the major version: 1.x or 2.x.
You set: `1.x" before "import tensorflow`. This will be interpreted as: `1.x`.


TensorFlow 1.x selected.


# Convolution
This method is the implementation of convolution operation on a multichannel image *my_input* with a filter whose dimensions are given with the parameter *my_filter*. The padding type can be specified with *my_padding*, by giving either "VALID"(for no padding) or "SAME"(for padding to conserve the input dimensions). The stride dimensions can be specified with "my_strides" parameter with a type of list.

In [0]:
def my_convolution(my_input, my_filter, my_padding, my_strides):  
    
    f_shape = my_filter.shape
    in_shape = my_input.shape
    my_strides[0] = floor(my_strides[0])
    my_strides[1] = floor(my_strides[1])
    
    N = len(in_shape) - 2
    assert 1 <= N <= 3    
    assert len(in_shape) >= 2
    assert len(f_shape) >= 2
    assert in_shape[1] >= f_shape[0] 
    assert in_shape[2] >= f_shape[1]
    assert in_shape[3] == f_shape[2]  
    assert my_padding is "VALID" or my_padding is "SAME"


    if my_padding is "VALID":
      p_height = 0
      p_width = 0
      x = lambda i,f,s: ceil((i - (f-1)) / s) 
      ret = np.zeros(shape=(in_shape[0], x(in_shape[1], f_shape[0], my_strides[0]), x(in_shape[2], f_shape[1], my_strides[1]), f_shape[3]))
    elif my_padding is "SAME":
      p_height = floor(f_shape[0]/2)
      p_width = floor(f_shape[1]/2)
      x = lambda i,s: ceil(i / s)
      ret = np.zeros(shape=(in_shape[0], x(in_shape[1], my_strides[0]), x(in_shape[2], my_strides[1]), f_shape[3]))

    use = np.zeros(shape=(in_shape[0], in_shape[1] + 2*p_height, in_shape[2] + 2*p_width, in_shape[3]))
    use[:, p_height: use.shape[1]-p_height, p_width:use.shape[2]-p_width, :] = my_input

    #batch size
    for i in range(in_shape[0]):
      #input height
      for j in np.arange(floor(f_shape[0]/2), use.shape[1]-floor(f_shape[0]/2), my_strides[0]):
        #input width
        for k in np.arange(floor(f_shape[1]/2), use.shape[2]-floor(f_shape[1]/2), my_strides[1]):
          #filter number
          for m in range(f_shape[3]):
            addition = 0
            #filter height
            for h in np.arange(-floor(f_shape[0]/2), floor(f_shape[0]/2) + 1, 1):
              #filter width
              for w in np.arange(-floor(f_shape[1]/2), floor(f_shape[1]/2) + 1, 1):
                #input depth
                for q in range(in_shape[3]):
                  addition += use[i, j + h, k + w, q] * my_filter[floor(f_shape[0]/2) + h, floor(f_shape[1]/2) + w, q, m]
            ret[i, ceil((j- floor(f_shape[0]/2))/my_strides[0]), ceil((k - floor(f_shape[1]/2))/my_strides[1]), m] = addition

    return ret

From the tf.nn.convolution documentation:


1.   input: An (N+2)-D Tensor of type T, of shape [batch_size] + input_spatial_shape + [in_channels] if data_format does not start with "NC" (default), or [batch_size, in_channels] + input_spatial_shape if data_format starts with "NC".
2.   filter: An (N+2)-D Tensor with the same type as input and shape spatial_filter_shape + [in_channels, out_channels].
3. padding: A string, either "VALID" or "SAME". The padding algorithm.
4. strides: Optional. Sequence of N ints >= 1. Specifies the output stride. Defaults to [1]*N. If any value of strides is > 1, then all values of dilation_rate must be 1.


In [0]:
def tf_convolution(tf_input, tf_filter, tf_padding, tf_strides):
    
    sess = tf.Session()
    tf_function = tf.nn.convolution(input=tf_input, filter=tf_filter, padding=tf_padding, strides=tf_strides)
    tf_output = sess.run(tf_function)
    sess.close()
    
    return tf_output

In [0]:
sample_input = np.random.random([1,5,5,1])
sample_filter = np.random.random([3,3,1,1])
sample_padding = "SAME"
sample_strides = [2,2]

tf_output = tf_convolution(sample_input,sample_filter,sample_padding,sample_strides)
my_output = my_convolution(sample_input,sample_filter,sample_padding,sample_strides)

print("input:")
print(sample_input)
print("filter:")
print(sample_filter)
print("output:")
print(tf_output)

input:
[[[[0.71950311]
   [0.86032599]
   [0.65654004]
   [0.99101177]
   [0.00723444]]

  [[0.11298596]
   [0.74380132]
   [0.16618082]
   [0.7534613 ]
   [0.25576091]]

  [[0.16052137]
   [0.52499305]
   [0.7855823 ]
   [0.23870289]
   [0.61803179]]

  [[0.30325429]
   [0.09959388]
   [0.43617282]
   [0.98500698]
   [0.7024627 ]]

  [[0.86552284]
   [0.87392352]
   [0.03369949]
   [0.41557934]
   [0.11142732]]]]
filter:
[[[[0.0095794 ]]

  [[0.16400217]]

  [[0.60847611]]]


 [[[0.8487276 ]]

  [[0.01185965]]

  [[0.68977959]]]


 [[[0.28972102]]

  [[0.61696787]]

  [[0.44197069]]]]
output:
[[[[1.00041545]
   [2.07257936]
   [1.21727469]]

  [[1.06626423]
   [1.84569087]
   [0.97786057]]

  [[0.72341426]
   [1.70062138]
   [0.47867633]]]]


### b) Write a function for pooling operation in the cell below. Write your own code with your own algorithm. It should have necessary error checks for matrix dimensions (consider multichannel matrices). Inputs must have the same structure as Tensorflow pooling operation and also the results must be same. Check [tf.nn.pool](https://www.tensorflow.org/api_docs/python/tf/nn/pool)

# Pooling
This function performs pooling operation in a striding window over an image with multiple depths. The parameter "my_input" takes the input image as a numpy array. The shape of the pooling window is specified with "my_window_shape" and the stride is given with "my_strides". The pooling type is taken with "my_padding" parameter while either being "MAX"(for max pooling) or "AVG"(for average pooling).

In [0]:
def my_pooling(my_input, my_window_shape, my_pooling_type, my_padding, my_strides):

    w_shape = my_window_shape
    in_shape = my_input.shape
    my_strides[0] = floor(my_strides[0])
    my_strides[1] = floor(my_strides[1])

    N = len(in_shape) - 2
    assert len(in_shape) >= 2

    assert len(w_shape) >= 2
    assert len(w_shape) == N  
    assert my_padding is "VALID" or my_padding is "SAME"
    assert my_pooling_type is "MAX" or my_pooling_type is "AVG"
    assert len(my_strides) >= N
    assert w_shape[0] >= my_strides[0]
    assert w_shape[1] >= my_strides[1]

    if my_padding is "VALID":
      p_height = 0
      p_width = 0
      x = lambda i,f,s: ceil((i - (f-1)) / s) 
      out_height = x(in_shape[1], w_shape[0], my_strides[0])
      out_width = x(in_shape[2], w_shape[1], my_strides[1])
      pad_along_height = 0
      pad_along_width = 0
      pad_top	= 0
      pad_bottom	= 0
      pad_left	= 0
      pad_right	= 0
      ret = np.zeros(shape=(in_shape[0], out_height, out_width, in_shape[3]))
    elif my_padding is "SAME":
      p_height = floor(w_shape[0]/2)
      p_width = floor(w_shape[1]/2)
      x = lambda i,s: ceil(i / s)
      out_height = x(in_shape[1], my_strides[0])
      out_width = x(in_shape[2], my_strides[1])
      pad_along_height = max((out_height - 1) * my_strides[0] + w_shape[0] - in_shape[1], 0)
      pad_along_width = max((out_width - 1) * my_strides[1] + w_shape[1] - in_shape[2], 0)
      pad_top	= pad_along_height // 2
      pad_bottom	= pad_along_height - pad_top
      pad_left	= pad_along_width // 2
      pad_right	= pad_along_width - pad_left

      ret = np.zeros(shape=(in_shape[0], out_height, out_width, in_shape[3]))

    use = np.zeros(shape=(in_shape[0], in_shape[1] + pad_along_height, in_shape[2] + pad_along_width, in_shape[3]))
    use[:, pad_top: use.shape[1]-pad_bottom, pad_left:use.shape[2]-pad_right, :] = my_input

    #we should not include the pads to the average of max operations
    #hence we exclude them with these methods
    def average_pool(sample, hI, wI, dI):
      sum = 0
      count = 0
      for i in range(hI, hI + floor(w_shape[0])):
        for j in range(wI, wI + floor(w_shape[1])):
          if i >= pad_left and j >= pad_top and i < in_shape[1] and j < in_shape[2]:
            sum += use[sample, i, j, dI]
            count += 1
      return sum / count

    def max_pool(sample, hI, wI, dI):
      maximum = -48345738
      for i in range(hI, hI + floor(w_shape[0])):
        for j in range(wI, wI + floor(w_shape[1])):
          if i >= pad_left and j >= pad_top andi < in_shape[1] and j < in_shape[2]:
            maximum = max(use[sample, i, j, dI], maximum)
            
      return maximum


    if my_pooling_type is "MAX":
      pooling = lambda s, h, w, i:max_pool(s, h, w, i)
    elif my_pooling_type is "AVG":
      pooling = lambda s, h, w, i:average_pool(s, h, w, i)


    #batch size
    for i in range(in_shape[0]):
      #input height
      for j in np.arange(0, use.shape[1]-floor(w_shape[0]/2), my_strides[0]):
        #input width
        for k in np.arange(0, use.shape[2]-floor(w_shape[1]/2), my_strides[1]):
          #input depth
          for m in range(in_shape[3]):
            ret[i, ceil(j/my_strides[0]), ceil(k/my_strides[1]), m] = pooling(i, j,  k, m)



    return ret

In [0]:
def tf_pooling(tf_input, tf_window_shape, tf_pooling_type, tf_padding, tf_strides):
    
    sess = tf.Session()
    tf_function = tf.nn.pool(input=tf_input, window_shape=tf_window_shape, pooling_type=tf_pooling_type, padding=tf_padding, strides=tf_strides)
    tf_output = sess.run(tf_function)
    sess.close()
    
    return tf_output

In [0]:
sample_input = np.random.random([1,4,3,1])
sample_window_shape = [2,2]
sample_pooling_type = "AVG"
sample_padding = "SAME"
sample_strides = [2,2]

tf_output = tf_pooling(sample_input,sample_window_shape,sample_pooling_type,sample_padding,sample_strides)
my_output = my_pooling(sample_input,sample_window_shape,sample_pooling_type,sample_padding,sample_strides)

print("input:")
print(sample_input)
print("output:")
print(tf_output)
print("my output:")
print(my_output)

input:
[[[[0.96993453]
   [0.17925985]
   [0.54353287]]

  [[0.52305583]
   [0.88540399]
   [0.17589982]]

  [[0.16149899]
   [0.79599133]
   [0.01808049]]

  [[0.29914766]
   [0.83458964]
   [0.07202726]]]]
output:
[[[[0.63941355]
   [0.35971635]]

  [[0.52280691]
   [0.04505388]]]]
my output:
[[[[0.63941355]
   [0.35971635]]

  [[0.52280691]
   [0.04505388]]]]
