# LAB: Implementing convolutions

In [1]:
import init
init.init(force_download=False)
init.get_weblink()

username: dl_test_student

password: nada

In [3]:
from local.lib.rlxmoocapi import submit, session
import inspect
student = session.Session(init.endpoint).login( course_id=init.course_id, 
                                                session_id="UDEA", 
                                                lab_id="LAB_U4.02" )

In [5]:
import sys
if 'google.colab' in sys.modules:
    print ("setting tensorflow version in colab")
    %tensorflow_version 2.x
    %load_ext tensorboard
import tensorflow as tf
tf.__version__

In [6]:
import tensorflow as tf
from time import time
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from local.lib import mlutils
%matplotlib inline

## observe how we create a `tf.keras.Conv2D` layers and assign it some weights

In [7]:
c = tf.keras.layers.Conv2D(filters=2, kernel_size=(3,3), activation="sigmoid", dtype=tf.float64)
c.get_config()

In [44]:
[i.shape for i in c.weights]

initialize its weights (filters) by forcing an evaluation 

In [45]:
c(np.random.randint(10, size=(1,7,7,1)).astype(float))
[i.shape for i in c.weights]

### let's set by hand some filters

In [46]:
c.weights[0].shape

In [47]:
f1 = np.r_[[[-1, 1,-1],[-1, 1,-1], [-1, 1,-1]]]
f2 = np.r_[[[-1,-1,-1],[ 1, 1, 1], [-1,-1,-1]]]

f = np.zeros(c.weights[0].shape)
f[:,:,0,0] = f1
f[:,:,0,1] = f2
print (f[:,:,0,0])
print (f[:,:,0,1])

In [48]:
c.set_weights([f,np.r_[0,0]]) # keep bias as [0,0]

check they are set

In [49]:
c.weights

## and apply the layer to some image

In [50]:
from skimage import io
img = io.imread("local/imgs/sample_img.png")
img = img.reshape(1,*img.shape, 1)
img = (img-np.min(img))/(np.max(img)-np.min(img))
plt.imshow(img[0,:,:,0], cmap=plt.cm.Greys_r);

we are using **valid** padding so activations are smaller than original image

In [51]:
activations = c(img)
activations.shape

In [52]:
plt.imshow(activations[0,:,:,0], cmap=plt.cm.Greys_r)

In [53]:
plt.imshow(activations[0,:,:,1], cmap=plt.cm.Greys_r)

## observe how activations pixels are computed

In [54]:
activations[0,:5,:5,1]

three first pixels in the first row

In [55]:
sigmoid = lambda x: 1/(1+np.exp(-x))
linear  = lambda x: x
print (sigmoid((img[0,:3,:3,:]  * f[:,:,:,1]).sum()))
print (sigmoid((img[0,:3,1:4,:] * f[:,:,:,1]).sum()))
print (sigmoid((img[0,:3,2:5,:] * f[:,:,:,1]).sum()))


observe the shapes of filters and activations to make sense of the indices

In [56]:
img.shape, f.shape, activations.shape

# Step 1: Do the convolution by hand in a set of for loops

## Task 1

complete the following function. its parameters:

- `img`: the images, an array of size [1,y,x,k], where:
    - `1`: you will be receiving only one image
    - `x`, `y`: the size of the image
    - `k` : the number of channels
    
- `f`: the filters, an array of size [fy,fx,k,n], where:
    - `fx`, `fy`: the size of the filters
    - `k` : the number of channels (**must be the same as in images**)
    - `n`: the number of filters
- `activation`: the activation function to use (such as `sigmoid` or `linear` above)

In [22]:
def convolution_byhand(img, f, activation=sigmoid):
    assert f.shape[2]==img.shape[3]
    fy = f.shape[0]
    fx = f.shape[1]
    r = np.zeros( (1, img.shape[1]-fy+1, img.shape[2]-fx+1, f.shape[3] ))
    ... # YOUR CODE HERE
    return r

In [58]:
r = convolution_byhand(img, f)

check your answer

In [59]:
plt.figure(figsize=(7,3))
plt.subplot(121); plt.imshow(r[0,:,:,0], cmap=plt.cm.Greys_r); plt.title("your convolution")
plt.subplot(122); plt.imshow(activations[0,:,:,0], cmap=plt.cm.Greys_r); plt.title("keras convolution")

In [60]:
plt.figure(figsize=(7,3))
plt.subplot(121); plt.imshow(r[0,:,:,1], cmap=plt.cm.Greys_r); plt.title("your convolution")
plt.subplot(122); plt.imshow(activations[0,:,:,1], cmap=plt.cm.Greys_r); plt.title("keras convolution")

**Submit your solution**

In [65]:
student.submit_task(namespace=globals(), course_id=init.course_id, lab_id='LAB_U4.02', task_id='task_01')

# Step 2: Do the convolution by hand in 'one shot'

We will prepare images to do the convolution with one dot product operation for each filter and each image. This will use more memory but will increase performance.

For instance, assume we have the following 1x8x6x1 images (only one image, one channel) and 2x3x1x2 filters (one channel, two filters)

In [66]:
img = np.r_[[9, 4, 9, 6, 7, 1, 2, 2, 8, 0, 8, 6, 8, 6, 5, 5, 1, 4, 3, 4, 4, 4,
             3, 6, 5, 1, 7, 9, 1, 4, 0, 3, 1, 4, 3, 5, 1, 5, 5, 4, 9, 6, 3, 2,
             8, 9, 0, 6]].reshape(1,8,6,1)
f = np.r_[[6, 7, 8, 5, 2, 9, 6, 4, 9, 7, 9, 7]].reshape(2,3,1,2)
print ("images", img.shape)
print (img[0,:,:,0])
print ("--")
print ("filters", f.shape)
print (f[:,:,0,0])
print (f[:,:,0,1])

### Task 2: complete the following function to prepare the images

where:

- `img` is the images array (assume we only have one image)
- `fy` and `fx` are the filter dimensions (2,3 in the example just above)

if called with `img` and `f` above you shoud get the following output

    > pimg = prepare_img(img, *f.shape[:2])
    > pimg
    
    array([[[[9., 4., 9., 2., 2., 8.],
             [4., 9., 6., 2., 8., 0.],
             [9., 6., 7., 8., 0., 8.],
             [6., 7., 1., 0., 8., 6.]],

            [[2., 2., 8., 8., 6., 5.],
             [2., 8., 0., 6., 5., 5.],
             [8., 0., 8., 5., 5., 1.],
             [0., 8., 6., 5., 1., 4.]],

            [[8., 6., 5., 3., 4., 4.],
             [6., 5., 5., 4., 4., 4.],
             [5., 5., 1., 4., 4., 3.],
             [5., 1., 4., 4., 3., 6.]],

            [[3., 4., 4., 5., 1., 7.],
             [4., 4., 4., 1., 7., 9.],
             [4., 4., 3., 7., 9., 1.],
             [4., 3., 6., 9., 1., 4.]],

            [[5., 1., 7., 0., 3., 1.],
             [1., 7., 9., 3., 1., 4.],
             [7., 9., 1., 1., 4., 3.],
             [9., 1., 4., 4., 3., 5.]],

            [[0., 3., 1., 1., 5., 5.],
             [3., 1., 4., 5., 5., 4.],
             [1., 4., 3., 5., 4., 9.],
             [4., 3., 5., 4., 9., 6.]],

            [[1., 5., 5., 3., 2., 8.],
             [5., 5., 4., 2., 8., 9.],
             [5., 4., 9., 8., 9., 0.],
             [4., 9., 6., 9., 0., 6.]]]])
             
    > pimg.shape
    
    (1, 7, 4, 6)
    
    
observe that:

- resulting images after convolution with any filter will have size 7x4
- the resulting structure `img` has at each pixel (in the 7x4 grid) a vector of six elements associated with it.
- this vector is the flattened contents of 2x3x1 image fragment located at that pixel that would by multiplied element by element by any filter located at that pixel during the convolution.
- the first row in `pimg` corresponds to the flattened 2x3 fragment located at the top left corner of `img`
- the second row contains the 2x3 fragment after shifting one pixel to the right.
- the [`np.flatten`](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.flatten.html) operation will surely be useful for you.

In [67]:
## Teacher
def prepare_img(img, fy, fx):
    r = np.zeros( (1, img.shape[1]-fy+1, img.shape[2]-fx+1, fy*fx*img.shape[3] ))
    for y in range(img.shape[1]-fy):
        for x in range(img.shape[2]-fx):
            r[0,y,x,:] = img[0,y:y+fy,x:x+fx,:].flatten()
    return r

In [53]:
def prepare_img(img, fy, fx):
    r = np.zeros( (1, img.shape[1]-fy+1, img.shape[2]-fx+1, fy*fx*img.shape[3] ))
    ... # YOUR CODE HERE
    return r

In [68]:
pimg = prepare_img(img, *f.shape[:2])
print (pimg.shape)
pimg

**Submit your solution**

In [73]:
student.submit_task(namespace=globals(), course_id=init.course_id, lab_id='LAB_U4.02', task_id='task_02')

## Task 3: use the prepared images to do the convolution  in a single `.dot` operation


complete the following function so that:

- you do the convolution of one image (`i`) and one filter (`j`) with one `.dot` operation in **one single line of code**.
- you apply the corresponding activation function

In [76]:
## Teacher
def oneshot_convolution(pimg, f, activation=sigmoid):
    r = np.zeros((*pimg.shape[:-1], f.shape[3]))
    for i in range(pimg.shape[0]):
        for j in range(f.shape[3]):
            r[i,:,:,j] = activation(pimg[i].dot(f[:,:,:,j].flatten()))
    return r

In [75]:
def oneshot_convolution(pimg, f, activation=sigmoid):
    r = np.zeros((*pimg.shape[:-1], f.shape[3]))
    for i in range(pimg.shape[0]):
        for j in range(f.shape[3]):
            r[i,:,:,j] = ... # YOUR CODE HERE
    return r

check your solution against your previous implementation

In [77]:
k1 = oneshot_convolution(pimg, f, activation=sigmoid)
k2 = convolution_byhand(img, f, activation=sigmoid)
np.allclose(k1, k2)

In [78]:
k1 = oneshot_convolution(pimg, f, activation=linear)
k2 = convolution_byhand(img, f, activation=linear)
np.allclose(k1, k2)

check your solution against keras

In [79]:
c = tf.keras.layers.Conv2D(filters=f.shape[3], 
                           kernel_size=f.shape[:2], 
                           activation="linear", dtype=tf.float64)
c(img.astype(float))
c.set_weights((f, np.r_[0,0]))

In [80]:
np.alltrue(c(img.astype(float)).numpy()==k2)

**Submit your solution**

In [85]:
student.submit_task(namespace=globals(), course_id=init.course_id, lab_id='LAB_U4.02', task_id='task_03')