In [None]:
#importing everything required for the lab
import numpy as np
import time
from tensorflow import keras
from tqdm import tqdm
from sklearn import metrics

# Copying functions from previous labs

##Compute padding

In [None]:
import math

def compute_pad_filt(input_size, output_size):
  Hx, Wx = input_size
  Hy, Wy = output_size

  min_pad_h, min_pad_w = 1, 1 # minimal padding is (1, 1)

  min_h, min_w = Hx-Hy+3, Wx-Wy+3 # for minimal padding kernel size is: min_h = Hx - Hy + 2 * pad[0] + 1, min_w = Wx - Wy + 2 * pad[1] + 1

  if min_h < 1: # minimal size for height of a kernel = 1
    min_pad_h = 1 + math.ceil((1-min_h) / 2) # add paddings so kernel height greater or equal to 1
    min_h = min_h + (min_pad_h-1) * 2 # calculate new height


  if min_w < 1: # minimal size for width of a kernel = 1
    min_pad_w = 1 + math.ceil((1-min_w) / 2) # add paddings so kernel width greater or equal to 1
    min_w = min_w + (min_pad_w-1) * 2  # calculate new height


  return (min_pad_h, min_pad_w), (min_h, min_w)

## Convolution

In [None]:
# forward propagation for convolution
# add padings
def conv2d_forward(matrix, filter, pad=(0,0)):
  if pad != (0,0): # if there are paddings, apply them
    matrix = np.pad(matrix, ((pad[0], pad[0]),(pad[1], pad[1]),(0,0)))
  h_x, w_x, _ = matrix.shape # getting matrix shape
  h_w, w_w, _  = filter.shape # getting filter shape
  output = np.zeros((h_x - h_w + 1, w_x - w_w + 1)) # initializing output matrix
  for i in range(len(output)): # for each pixel
    for j in range(len(output[i])):
        output[i][j] = np.sum(matrix[i:i+h_w, j:j+w_w, :] * filter) # calculate sum of hadamart product between
                                                                    # matrix batch and filter, save value in output cell
  return output

In [None]:
# backward propagation for convolution (dL/dZ)
def conv2d_backward_pad(upstream, filter, pad=(0,0)):
  # if there are paddings, we send them to conv2d_forward
  h_w, w_w, d_w  = filter.shape # getting filter shape
  rotated_filter = np.rot90(np.rot90(filter)) # rotate filter by 180 degree
  dL_dZ = [] # initializing output
  for i in range(d_w): # for each channel
    dL_dZ.append(conv2d_forward(upstream, rotated_filter[:, :, i, np.newaxis], pad)) # adding dL/dZ
  return np.array(dL_dZ)

In [None]:
# backward propagation for convolution (dL/dW)
def conv2d_backward_weights(weights, upstream, pad=(0,0)):
   # if there are paddings, apply them
  if pad != (0,0):
    weights = np.pad(weights, ((pad[0], pad[0]),(pad[1], pad[1]),(0,0)))
  h_x, w_x, d_x  = weights.shape # getting filter shape
  dL_dZ = [] # initializing output
  for i in range(d_x): # for each channel
    dL_dZ.append(conv2d_forward(weights[:, :, i, np.newaxis], upstream)) # adding dL/dZ
  return np.transpose(np.array(dL_dZ), (1, 2, 0))

## ReLU

In [None]:
#function for RelU forward and backward propagation
#taken from previous assignments
def RelU_jacobian(input):
  return 1 * (input > 0)

def RelU_forward_prop(input):
  return np.maximum(input, 0)

def RelU_backward_prop(input, loss):
  jac = RelU_jacobian(input) # finding jacobian for RelU according to input
  return jac * np.array(loss)

## Matmul

In [None]:
#functions for matmul backward and forward propagation from previous assignments
def MatMul_forward_prop(matrix, input):
  return np.array(matrix) @ np.array(input)

#function that finds dL/dx
def MatMul_backward_prop(matrix, loss):
  return np.array(matrix).T @ np.array(loss)

#function that finds dL/dW
def MatMul_matrix_backward_prop(X, loss):
  return np.array(loss) @ np.array(X).T

## Labels vectorization

In [None]:
#function taken from previous assignments
#it translates label number into the vector of 0s and 1s
def label_vec_func(labels):
  labels_matrix = np.zeros([len(labels), c])
  for i in range(len(labels)):
    labels_matrix[i, labels[i]] = 1
  return labels_matrix

## Convolution for many filters

In [None]:
# forward propagation for convolution
# now we have filters - array of size: (height, width, depth, number of filters)
# add paddings to the function calls
def conv2d_forward_many(matrix, filters, pad=(0,0)):
  h_x, w_x, _ = matrix.shape # getting matrix shape
  h_w, w_w, _, d  = filters.shape # getting filter shape
  output = np.zeros((h_x - h_w + 1 + 2 *pad[0], w_x - w_w + 1 + 2 * pad[1], d)) # initializing output matrix
  for k in range(d): #for each filter
    output[:, :, k] = conv2d_forward(matrix, filters[:, :, :, k], pad)
  return output

In [None]:
# backward propagation for convolution (dL/dZ)
# now we have filters - array of size: (height, width, depth, number of filters)
# add paddings to the function calls
def conv2d_backward_many(upstream, filters, pad=(0,0)):
  h_w, w_w, d_w, D  = filters.shape # getting filter shape
  dL_dZ = []
  for i in range(D):  # for each filter
    dL_dZ.append(conv2d_backward_pad(upstream[:, :, i, np.newaxis], filters[:, :, : , i], pad))
  return np.transpose(np.sum(dL_dZ, 0), (1, 2, 0))

In [None]:
# backward propagation for convolution (dL/dW)
# now we have filters - array of size: (height, width, depth, number of filters)
# add paddings to the function calls
def conv2d_backward_weights_many(weight, upstream, pad=(0,0)):
  _, _, D  = upstream.shape # getting filter shape
  dL_dWs = [] # initializing output
  for i in range(D): # for each channel
    dL_dWs.append(conv2d_backward_weights(weight, upstream[:, :, i, np.newaxis], pad))
  return np.transpose(np.array(dL_dWs), (1, 2, 3, 0))

## Resize

In [None]:
# for rescaling I used Nearest-neighbour interpolation
def nearest(input, width, height):
  """Function that takes photo, rescales it to specified width and height, given as a parameters

    Reference to the source that I used: https://kwojcicki.github.io/blog/NEAREST-NEIGHBOUR

    Keyword arguments:
      input (np.array): input image represented as numpy array
      width (int) -- width of a new rescaled image
      height (int) -- height of a new rescaled image

    Returns:
      output (np.array): output (rescaled) image represented as numpy array
  """
  # initialization of output variable
  output = np.zeros((width, height, 3), dtype=int)
  # calculation of scales between input's width, height and
  sx = input.shape[0] / output.shape[0]
  sy = input.shape[1] / output.shape[1]
  # for each pixel from output image
  for y in range(len(output)):
    for x in range(len(output[y])):
      # finding nearest input's pixel for current output's pixel
      proj_x = math.floor(x * sx)
      proj_y = math.floor(y * sy)

      # initialize output's pixel using obtained nearest input's pixel
      output[y][x] = input[proj_y][proj_x]

  return output

# Implementing functions required for this lab

In [None]:
def SE_forward(y_pred, y_true):
  output = y_pred - y_true
  return np.sum(output ** 2)

In [None]:
def SE_backward(y_pred, y_true):
  output = y_pred - y_true
  return 2 * output

# Downloading and preprocessing dataset

In [None]:
import requests, zipfile

link = 'https://www.dropbox.com/scl/fi/ha6f9n5rcae6h79dhdykb/pikachu.zip?rlkey=b1z9xqub0z4acglomt9r983q0&dl=1'
file_path = 'dataset.zip'
response = requests.get(link)

with open(file_path, 'wb') as local_file:
    local_file.write(response.content)

with zipfile.ZipFile(file_path, 'r') as zip_ref:
    zip_ref.extractall('.')


Function for reading dataset from directory

In [None]:
import cv2

def read_dataset(directory, h, w):
  annotation = eval(open(directory + '/annotations.json').read())
  x = []
  y = []
  for data in annotation.values():
    path = data['image']
    box  = data['loc']
    y.append(np.array(box))
    image = cv2.imread(directory +'/images/' + path)
    image = nearest(image, h, w)
    x.append(image)
  return np.array(x), np.array(y)

In [None]:
h, w = 32, 32
n = h * w #number of pixels for one picture

In [None]:
x_train, y_train = read_dataset('/content/train', h, w)
x_test, y_test = read_dataset('/content/val', h, w)

In [None]:
c = 4 # number of outputs for last linear layer

In [None]:
N_train, N_test = len(x_train), len(x_test)

Let's decrease samples in dataset in order to speed up the training process

In [None]:
pad, kernel = compute_pad_filt((h, w), (h, w)) # we want output to have the smae dimensions

hw1, Ww1  = kernel # initialize kernel sizes for first layer
d, D1 = 3, 2 # initialize depth and number of filters for first layer

hw2, Ww2, D2 = hw1, Ww1, 2 # initialize kernel sizes, number of filters for second layer

Conv1 =  np.random.uniform(-1, 1, (hw1, Ww1, d, D1)) # initial convolution filters for layer 1
b1 = np.random.uniform(-1, 1 , (h, w, D1)) #b1 - initial bias
Conv2 =  np.random.uniform(-1, 1, (hw2, Ww2, D1, D2))# initial convolution filters for layer 2
b2 = np.random.uniform(-1, 1 , (h, w, D2)) #b2 - initial bias

vectorized_len = h * w * D2 # size for vectorized tensor after 2 convulitions

c1 = vectorized_len // 4 # dimension of the first layer
W1 = np.random.uniform(-1, 1, (c1, vectorized_len)) #W1 - initial weights
W2 = np.random.uniform(-1, 1, (c, c1)) #W2 - initial weights
b3 = np.random.uniform(-1, 1 , (c1, 1)) #b1 - initial bias
b4 = np.random.uniform(-1, 1 , (c, 1)) #b2 - initial bias


nu = 0.0001 # learning rate
num_epochs = 10 # amount of epochs

N = 2 # number of images in minibatch

# initialize partial derivatives
dL_dConv1 = np.zeros((hw1, Ww1, d, D1))
dL_dConv2 = np.zeros((hw2, Ww2, D1, D2))
dL_dW1 = np.zeros((c1, vectorized_len))
dL_dW2 = np.zeros((c, c1))
dL_db1 = 0
dL_db2 = 0
dL_db3 = 0
dL_db4 = 0

In [None]:
for i in range(num_epochs): #for each epoch
  total_loss = 0 #sum of losses for one epoch
  counter = 0 #counter to check that batch ended
  for i in tqdm(range(N_train)): #for each picture

    x = x_train[i, :, :] / 255 #normalize pixels, so they will be from 0 to 1

    y_true = y_train[i].reshape(c, 1)
    #forward propagation
    y1 = conv2d_forward_many(x, Conv1, pad=pad) #applying convoltion filters from layer 1
    y2 = y1 + b1 #adding bias
    y3 = RelU_forward_prop(y2) #applying RelU
    y4 =  conv2d_forward_many(y3, Conv2, pad=pad) #applying convoltion filters from layer 2
    y5 = y4 + b2 #adding bias
    y6 = RelU_forward_prop(y5) #applying RelU

    y7 = np.reshape(y6, (vectorized_len, 1))

    y8 = MatMul_forward_prop(W1, y7) #applying matrix multiplication with the first weight matrix
    y9 = y8 + b3 #adding bias
    y10 = RelU_forward_prop(y9) #applying RelU
    y11 =  MatMul_forward_prop(W2, y10) #applying matrix multiplication with the second weight matrix
    y12 = y11 + b4 #adding bias
    # no ReLU after bias addition, since we have nonlinearity further (softmax)
    loss = SE_forward(y12, y_true)

    total_loss += loss #adding current loss to total loss

    #backward propagation

    back = SE_backward(y12, y_true) #backpropagation from loss to the input of softmax

    dL_db4 += back #backpropagation from loss to the input of addition, finding dL/db4

    dL_dW2 += MatMul_matrix_backward_prop(y10, back) #fiding dL/dW2

    back = MatMul_backward_prop(W2, back) #backpropagation from loss to the input of matrix multiplication with matrix W2

    back = RelU_backward_prop(y9, back) #backpropagation from loss to the input of RelU

    dL_db3 += back #backpropagation from loss to the input of addition, finding dL/db3

    dL_dW1 += MatMul_matrix_backward_prop(y7, back) #fiding dL/dW1

    back = MatMul_backward_prop(W1, back) #backpropagation from addition to the input of matrix multiplication with matrix W1

    back = back.reshape((h, w, D2)) # backpropagation of reshaping

    back = RelU_backward_prop(y5, back) # backpropagation from reshaping to the input of RelU

    dL_db2 += back #backpropagation from ReLU to the input of addition, finding dL/db2

    dL_dConv2 += conv2d_backward_weights_many(y3, back, pad=pad) #finding dL/dConv2

    back = conv2d_backward_many(back, Conv2, pad=pad) # backpropagation from addition to the input convolution
    back = RelU_backward_prop(y2, back) # backpropagation from convolution to the input of RelU

    dL_db1 += back #backpropagation from ReLU to the input of addition, finding dL/db1
    dL_dConv1 += conv2d_backward_weights_many(x, back, pad=(1,1)) #finding dL/dConv1

    counter += 1 # increasing counter

    if counter == N or i == N_train - 1: # if batch ended or dataset ended. We apply gradient descent only in batches
    #applying gradient descent for weights and biases

      Conv1 = Conv1 - nu / N * dL_dConv1
      Conv2 = Conv2 - nu / N * dL_dConv2
      W1 = W1 - nu / N * dL_dW1
      W2 = W2 - nu / N * dL_dW2
      b1 = b1 - nu / N * dL_db1
      b2 = b2 - nu / N * dL_db2
      b3 = b3 - nu / N * dL_db3
      b4 = b4 - nu / N * dL_db4

      # setting partial derivatives to 0
      dL_dConv1 = np.zeros((hw1, Ww1, d, D1))
      dL_dConv2 = np.zeros((hw2, Ww2, D1, D2))
      dL_dW1 = np.zeros((c1, vectorized_len))
      dL_dW2 = np.zeros((c, c1))
      dL_db1 = 0
      dL_db2 = 0
      dL_db3 = 0
      dL_db4 = 0

      counter = 0

  print('\nLoss:', total_loss / N_train)

  return np.array(matrix) @ np.array(input)
  return np.array(loss) @ np.array(X).T
  return np.array(matrix).T @ np.array(loss)
  return jac * np.array(loss)
  3%|▎         | 28/900 [00:08<04:16,  3.41it/s]


KeyboardInterrupt: ignored

## Evaluation

For evaluation I will use intersection over union. The implementation is taken from this [source](https://pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/)

In [None]:
def bb_intersection_over_union(boxA, boxB):
	# determine the (x, y)-coordinates of the intersection rectangle
	xA = max(boxA[0], boxB[0])
	yA = max(boxA[1], boxB[1])
	xB = min(boxA[2], boxB[2])
	yB = min(boxA[3], boxB[3])
	# compute the area of intersection rectangle
	interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
	# compute the area of both the prediction and ground-truth
	# rectangles
	boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
	boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
	# compute the intersection over union by taking the intersection
	# area and dividing it by the sum of prediction + ground-truth
	# areas - the interesection area
	iou = interArea / float(boxAArea + boxBArea - interArea)
	# return the intersection over union value
	return iou

In [None]:
total_iou = 0
total_loss = 0
for i in tqdm(range(N_test)):
    x = x_test[i, :, :] / 255 #normalize pixels, so they will be from 0 to 1
    y_true = y_test[i].reshape(c, 1)


    y1 = conv2d_forward_many(x, Conv1, pad=pad) #applying convoltion filters from layer 1
    y2 = y1 + b1 #adding bias
    y3 = RelU_forward_prop(y2) #applying RelU
    y4 =  conv2d_forward_many(y3, Conv2, pad=pad) #applying convoltion filters from layer 2
    y5 = y4 + b2 #adding bias
    y6 = RelU_forward_prop(y5) #applying RelU

    y7 = np.reshape(y6, (vectorized_len, 1))

    y8 = MatMul_forward_prop(W1, y7) #applying matrix multiplication with the first weight matrix
    y9 = y8 + b3 #adding bias
    y10 = RelU_forward_prop(y9) #applying RelU
    y11 =  MatMul_forward_prop(W2, y10) #applying matrix multiplication with the second weight matrix
    y_pred = y11 + b4 #adding bias
    # no ReLU after bias addition, since we have nonlinearity further (softmax)
    loss = SE_forward(y_pred, y_true)

    total_loss += loss #adding current loss to total loss

    # Define two bounding boxes as (x, y, w, h)
    iou = bb_intersection_over_union(y_pred, y_true)
    total_iou += iou

In [None]:
print("Mean intersection over union:", total_iou.item() / N_test)
print("Mean loss:", total_loss / N_test)