In [None]:
#importing everything required for the lab
import numpy as np
import time
from tensorflow import keras
from tqdm import tqdm
from sklearn import metrics

# Copying functions from previous labs

##Compute padding

In [None]:
import math

def compute_pad_filt(input_size, output_size):
  Hx, Wx = input_size
  Hy, Wy = output_size

  min_pad_h, min_pad_w = 1, 1 # minimal padding is (1, 1)

  min_h, min_w = Hx-Hy+3, Wx-Wy+3 # for minimal padding kernel size is: min_h = Hx - Hy + 2 * pad[0] + 1, min_w = Wx - Wy + 2 * pad[1] + 1

  if min_h < 1: # minimal size for height of a kernel = 1
    min_pad_h = 1 + math.ceil((1-min_h) / 2) # add paddings so kernel height greater or equal to 1
    min_h = min_h + (min_pad_h-1) * 2 # calculate new height


  if min_w < 1: # minimal size for width of a kernel = 1
    min_pad_w = 1 + math.ceil((1-min_w) / 2) # add paddings so kernel width greater or equal to 1
    min_w = min_w + (min_pad_w-1) * 2  # calculate new height


  return (min_pad_h, min_pad_w), (min_h, min_w)

## Convolution

In [None]:
# forward propagation for convolution
# add padings
def conv2d_forward(matrix, filter, pad=(0,0)):
  if pad != (0,0): # if there are paddings, apply them
    matrix = np.pad(matrix, ((pad[0], pad[0]),(pad[1], pad[1]),(0,0)))
  h_x, w_x, _ = matrix.shape # getting matrix shape
  h_w, w_w, _  = filter.shape # getting filter shape
  output = np.zeros((h_x - h_w + 1, w_x - w_w + 1)) # initializing output matrix
  for i in range(len(output)): # for each pixel
    for j in range(len(output[i])):
        output[i][j] = np.sum(matrix[i:i+h_w, j:j+w_w, :] * filter) # calculate sum of hadamart product between
                                                                    # matrix batch and filter, save value in output cell
  return output

In [None]:
# backward propagation for convolution (dL/dZ)
def conv2d_backward_pad(upstream, filter, pad=(0,0)):
  # if there are paddings, we send them to conv2d_forward
  h_w, w_w, d_w  = filter.shape # getting filter shape
  rotated_filter = np.rot90(np.rot90(filter)) # rotate filter by 180 degree
  dL_dZ = [] # initializing output
  for i in range(d_w): # for each channel
    dL_dZ.append(conv2d_forward(upstream, rotated_filter[:, :, i, np.newaxis], pad)) # adding dL/dZ
  return np.array(dL_dZ)

In [None]:
# backward propagation for convolution (dL/dW)
def conv2d_backward_weights(weights, upstream, pad=(0,0)):
   # if there are paddings, apply them
  if pad != (0,0):
    weights = np.pad(weights, ((pad[0], pad[0]),(pad[1], pad[1]),(0,0)))
  h_x, w_x, d_x  = weights.shape # getting filter shape
  dL_dZ = [] # initializing output
  for i in range(d_x): # for each channel
    dL_dZ.append(conv2d_forward(weights[:, :, i, np.newaxis], upstream)) # adding dL/dZ
  return np.transpose(np.array(dL_dZ), (1, 2, 0))

## ReLU

In [None]:
#function for RelU forward and backward propagation
#taken from previous assignments
def RelU_jacobian(input):
  return 1 * (input > 0)

def RelU_forward_prop(input):
  return np.maximum(input, 0)

def RelU_backward_prop(input, loss):
  jac = RelU_jacobian(input) # finding jacobian for RelU according to input
  return jac * np.array(loss)

## Matmul

In [None]:
#functions for matmul backward and forward propagation from previous assignments
def MatMul_forward_prop(matrix, input):
  return np.array(matrix) @ np.array(input)

#function that finds dL/dx
def MatMul_backward_prop(matrix, loss):
  return np.array(matrix).T @ np.array(loss)

#function that finds dL/dW
def MatMul_matrix_backward_prop(X, loss):
  return np.array(loss) @ np.array(X).T

## Labels vectorization

In [None]:
#function taken from previous assignments
#it translates label number into the vector of 0s and 1s
def label_vec_func(labels):
  labels_matrix = np.zeros([len(labels), c])
  for i in range(len(labels)):
    labels_matrix[i, labels[i]] = 1
  return labels_matrix

## Convolution for many filters

In [None]:
# forward propagation for convolution
# now we have filters - array of size: (height, width, depth, number of filters)
# add paddings to the function calls
def conv2d_forward_many(matrix, filters, pad=(0,0)):
  h_x, w_x, _ = matrix.shape # getting matrix shape
  h_w, w_w, _, d  = filters.shape # getting filter shape
  output = np.zeros((h_x - h_w + 1 + 2 *pad[0], w_x - w_w + 1 + 2 * pad[1], d)) # initializing output matrix
  for k in range(d): #for each filter
    output[:, :, k] = conv2d_forward(matrix, filters[:, :, :, k], pad)
  return output

In [None]:
# backward propagation for convolution (dL/dZ)
# now we have filters - array of size: (height, width, depth, number of filters)
# add paddings to the function calls
def conv2d_backward_many(upstream, filters, pad=(0,0)):
  h_w, w_w, d_w, D  = filters.shape # getting filter shape
  dL_dZ = []
  for i in range(D):  # for each filter
    dL_dZ.append(conv2d_backward_pad(upstream[:, :, i, np.newaxis], filters[:, :, : , i], pad))
  return np.transpose(np.sum(dL_dZ, 0), (1, 2, 0))

In [None]:
# backward propagation for convolution (dL/dW)
# now we have filters - array of size: (height, width, depth, number of filters)
# add paddings to the function calls
def conv2d_backward_weights_many(weight, upstream, pad=(0,0)):
  _, _, D  = upstream.shape # getting filter shape
  dL_dWs = [] # initializing output
  for i in range(D): # for each channel
    dL_dWs.append(conv2d_backward_weights(weight, upstream[:, :, i, np.newaxis], pad))
  return np.transpose(np.array(dL_dWs), (1, 2, 3, 0))

## Resize

In [None]:
# for rescaling I used Nearest-neighbour interpolation
def nearest(input, width, height):
  """Function that takes photo, rescales it to specified width and height, given as a parameters

    Reference to the source that I used: https://kwojcicki.github.io/blog/NEAREST-NEIGHBOUR

    Keyword arguments:
      input (np.array): input image represented as numpy array
      width (int) -- width of a new rescaled image
      height (int) -- height of a new rescaled image

    Returns:
      output (np.array): output (rescaled) image represented as numpy array
  """
  # initialization of output variable
  output = np.zeros((width, height, 3), dtype=int)
  # calculation of scales between input's width, height and
  sx = input.shape[0] / output.shape[0]
  sy = input.shape[1] / output.shape[1]
  # for each pixel from output image
  for y in range(len(output)):
    for x in range(len(output[y])):
      # finding nearest input's pixel for current output's pixel
      proj_x = math.floor(x * sx)
      proj_y = math.floor(y * sy)

      # initialize output's pixel using obtained nearest input's pixel
      output[y][x] = input[proj_y][proj_x]

  return output

## Squared Error

In [None]:
def SE_forward(y_pred, y_true):
  output = y_pred - y_true
  return np.sum(output ** 2)

In [None]:
def SE_backward(y_pred, y_true):
  output = y_pred - y_true
  return 2 * output

## Labels vectorization

In [None]:
#function taken from previous assignments
#it translates label number into the vector of 0s and 1s
def label_vec_func(labels, c):
  labels_matrix = np.zeros([len(labels), c])
  for i in range(len(labels)):
    labels_matrix[i, labels[i]] = 1
  return labels_matrix

## SoftMax

In [None]:
#function for softmax forward and backward propagation
#taken from previous assignments
def SoftMax_forward_prop(input, normalization=False):
  output = np.array(input, dtype=np.longdouble)
  if normalization: # if we use normalization
    output = output - np.max(input) # we substract maximal value from each number
  output = np.exp(output)
  return output / np.sum(output)

def SoftMax_jacobian(input, normalization=False): # function for calculating jacobian of SoftMax according to input
  output = SoftMax_forward_prop(input, normalization)
  jacobian = np.zeros((len(input), len(input)))
  for i in range(len(input)):
    for j in range(len(input)):
      if i == j:
        jacobian[i][j] = output[i] * (1 - output[j])
      else:
        jacobian[i][j] = -output[i] * output[j]
  return jacobian

def SoftMax_backward_prop(input, loss, normalization=False): # backpropagation
  jac = SoftMax_jacobian(input, normalization) # calculating jacobian
  return jac @ np.array(loss)

## Log softmax

In [None]:
#functions for log_softmax forward and backward propagation
#this node applies softmax and then finds logorithm of the result
def log_softmax(x):
  x_max = np.max(x)
  return x - x_max - np.log(np.sum(np.exp(x - x_max)))

def log_softmax_jacobian(input): # function for calculating jacobian of SoftMax according to input
  output = SoftMax_forward_prop(input, True)
  jacobian = np.zeros((len(input), len(input)))
  for i in range(len(input)):
    for j in range(len(input)):
      if i == j:
        jacobian[i][j] = (1 - output[j])
      else:
        jacobian[i][j] = -output[j]
  return jacobian

def log_softmax_backward_prop(input, loss): # backpropagation
  jac = log_softmax_jacobian(input) # calculating jacobian
  return jac @ np.array(loss)

# Downloading and preprocessing dataset

Downloading [dataset](https://app.roboflow.com/fourcolorsgame-gmail-com/cockroachesvsbushes/1)

In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="iwiRB6xLJgD2LBn0pWPH")
project = rf.workspace("fourcolorsgame-gmail-com").project("cockroachesvsbushes")
dataset = project.version(1).download("coco")


Function to read and preprocess dataset from downloaded folder

In [None]:
import cv2

def read_dataset(directory, h, w):
  annotation =  eval(open(directory + '/_annotations.coco.json').read())
  x = []
  boxes = []
  classes = []
  for data, image in zip(annotation['annotations'], annotation['images']):
    path = image['file_name']
    box  = data['bbox']
    classes.append(data['category_id'])
    box[2], box[3] = box[0] + box[2], box[1] + box[3] # convert from [x,y,width,height] to [x1,y1,x2,y2] notation
    boxes.append(np.array(box))
    image = cv2.imread(directory + '/' + path)
    # image = nearest(image, h, w)
    x.append(image)
  return np.array(x), np.array(boxes), np.array(classes)

In [None]:
h, w = 64, 64 # initial height and width of pictures are 64, 64
n = h * w #number of pixels for one picture

In [None]:
x_train, y_train, y_classes_train = read_dataset('/content/CockroachesVsBushes-1/train', h, w) # read nad preprocess train dataset
x_test, y_test, y_classes_test = read_dataset('/content/CockroachesVsBushes-1/valid', h, w) # read nad preprocess test dataset

In [None]:
c = 4 # number of outputs for last linear layer for boxes
k = 3 # number of classes

In [None]:
y_classes_train = label_vec_func(y_classes_train - 1, k) #converting labels to vector of 0s and 1s

In [None]:
N_train, N_test = len(x_train), len(x_test) # calculating number of samples in train and test dataset

# Training

## Scheme 2 (from lecture 12, slide 9)

### Localization part

#### Defining localization model

In [None]:
pad, kernel = compute_pad_filt((h, w), (h, w)) # we want output to have the smae dimensions

hw1, Ww1  = kernel # initialize kernel sizes for first layer
d, D1 = 3, 2 # initialize depth and number of filters for first layer

hw2, Ww2, D2 = hw1, Ww1, 1 # initialize kernel sizes, number of filters for second layer

Conv1_loc =  np.random.uniform(-1, 1, (hw1, Ww1, d, D1)) # initial convolution filters for layer 1
b1_loc = np.random.uniform(-1, 1 , (h, w, D1)) #b1 - initial bias
Conv2_loc =  np.random.uniform(-1, 1, (hw2, Ww2, D1, D2))# initial convolution filters for layer 2
b2_loc = np.random.uniform(-1, 1 , (h, w, D2)) #b2 - initial bias

vectorized_len = h * w * D2 # size for vectorized tensor after 2 convulitions

c1 = vectorized_len // 4 # dimension of the first layer
W1_loc = np.random.uniform(-1, 1, (c1, vectorized_len)) #W1 - initial weights
W2_loc = np.random.uniform(-1, 1, (c, c1)) #W2 - initial weights
b3_loc = np.random.uniform(-1, 1 , (c1, 1)) #b1 - initial bias
b4_loc = np.random.uniform(-1, 1 , (c, 1)) #b2 - initial bias


nu = 0.001 # learning rate
num_epochs = 20 # amount of epochs

N = 2 # number of images in minibatch

# initialize partial derivatives
dL_dConv1_loc = np.zeros((hw1, Ww1, d, D1))
dL_dConv2_loc = np.zeros((hw2, Ww2, D1, D2))
dL_dW1_loc = np.zeros((c1, vectorized_len))
dL_dW2_loc = np.zeros((c, c1))
dL_db1_loc = 0
dL_db2_loc = 0
dL_db3_loc = 0
dL_db4_loc = 0

#### Training process

In [None]:
for i in range(num_epochs): #for each epoch
  total_loc_loss = 0 #sum of losses of localization for one epoch
  counter = 0 #counter to check that batch ended
  for i in tqdm(range(N_train)): #for each picture
    # if y_classes_train[i][2] == 1: # skip if it is a 'none' class
    #   continue

    x = x_train[i, :, :] / 255 #normalize pixels, so they will be from 0 to 1

    y_true_loc = y_train[i].reshape(c, 1) / 64
    #forward propagation

    y1 = conv2d_forward_many(x, Conv1_loc, pad=pad) #applying convoltion filters from layer 1
    y2 = y1 + b1_loc #adding bias
    y3 = RelU_forward_prop(y2) #applying RelU
    y4 =  conv2d_forward_many(y3, Conv2_loc, pad=pad) #applying convoltion filters from layer 2
    y5 = y4 + b2_loc #adding bias
    y6 = RelU_forward_prop(y5) #applying RelU

    y7 = np.reshape(y6, (vectorized_len, 1))

    # Segmentation
    y8_loc = MatMul_forward_prop(W1_loc, y7) #applying matrix multiplication with the first weight matrix
    y9_loc = y8_loc + b3_loc #adding bias
    y10_loc = RelU_forward_prop(y9_loc) #applying RelU
    y11_loc =  MatMul_forward_prop(W2_loc, y10_loc) #applying matrix multiplication with the second weight matrix
    y12_loc = y11_loc + b4_loc #adding bias
    # no ReLU after bias addition, since we have nonlinearity further (softmax)
    loc_loss = SE_forward(y12_loc, y_true_loc)


    total_loc_loss += loc_loss #adding current loss to total loss

    #backward propagation over segmentation

    back_loc = SE_backward(y12_loc, y_true_loc) #backpropagation from loss to the input of softmax

    dL_db4_loc += back_loc #backpropagation from loss to the input of addition, finding dL/db4

    dL_dW2_loc += MatMul_matrix_backward_prop(y10_loc, back_loc) #fiding dL/dW2

    back_loc = MatMul_backward_prop(W2_loc, back_loc) #backpropagation from loss to the input of matrix multiplication with matrix W2

    back_loc = RelU_backward_prop(y9_loc, back_loc) #backpropagation from loss to the input of RelU

    dL_db3_loc += back_loc #backpropagation from loss to the input of addition, finding dL/db3

    dL_dW1_loc += MatMul_matrix_backward_prop(y7, back_loc) #fiding dL/dW1

    back_loc = MatMul_backward_prop(W1_loc, back_loc) #backpropagation from addition to the input of matrix multiplication with matrix W1

    back_loc = back_loc.reshape((h, w, D2)) # backpropagation of reshaping

    back = back_loc

    back = RelU_backward_prop(y5, back) # backpropagation from reshaping to the input of RelU

    dL_db2_loc += back #backpropagation from ReLU to the input of addition, finding dL/db2

    dL_dConv2_loc += conv2d_backward_weights_many(y3, back, pad=pad) #finding dL/dConv2

    back = conv2d_backward_many(back, Conv2_loc, pad=pad) # backpropagation from addition to the input convolution
    back = RelU_backward_prop(y2, back) # backpropagation from convolution to the input of RelU

    dL_db1_loc += back #backpropagation from ReLU to the input of addition, finding dL/db1
    dL_dConv1_loc += conv2d_backward_weights_many(x, back, pad=(1,1)) #finding dL/dConv1

    counter += 1 # increasing counter

    if counter == N or i == N_train - 1: # if batch ended or dataset ended. We apply gradient descent only in batches
    #applying gradient descent for weights and biases

      Conv1_loc = Conv1_loc - nu / N * dL_dConv1_loc
      Conv2_loc = Conv2_loc - nu / N * dL_dConv2_loc

      W1_loc = W1_loc - nu / N * dL_dW1_loc
      W2_loc = W2_loc - nu / N * dL_dW2_loc


      b1_loc = b1_loc - nu / N * dL_db1_loc
      b2_loc = b2_loc - nu / N * dL_db2_loc

      b3_loc = b3_loc - nu / N * dL_db3_loc
      b4_loc = b4_loc - nu / N * dL_db4_loc


      # setting partial derivatives to 0
      dL_dConv1_loc = np.zeros((hw1, Ww1, d, D1))
      dL_dConv2_loc = np.zeros((hw2, Ww2, D1, D2))

      dL_dW1_loc = np.zeros((c1, vectorized_len))
      dL_dW2_loc = np.zeros((c, c1))


      dL_db1_loc = 0
      dL_db2_loc = 0

      dL_db3_loc = 0
      dL_db4_loc = 0


      counter = 0
  print('\nSegmentation Loss:', total_loc_loss / N_train)

100%|██████████| 80/80 [00:31<00:00,  2.54it/s]



Segmentation Loss: 153812.3568583004


100%|██████████| 80/80 [00:28<00:00,  2.80it/s]



Segmentation Loss: 0.8142951589539035


100%|██████████| 80/80 [00:28<00:00,  2.80it/s]



Segmentation Loss: 0.6959230166060532


100%|██████████| 80/80 [00:28<00:00,  2.82it/s]



Segmentation Loss: 0.5995813418137018


100%|██████████| 80/80 [00:28<00:00,  2.81it/s]



Segmentation Loss: 0.520833482027385


100%|██████████| 80/80 [00:28<00:00,  2.80it/s]



Segmentation Loss: 0.4562675502081685


100%|██████████| 80/80 [00:28<00:00,  2.81it/s]



Segmentation Loss: 0.40321683346786685


100%|██████████| 80/80 [00:28<00:00,  2.83it/s]



Segmentation Loss: 0.3589370020319063


100%|██████████| 80/80 [00:27<00:00,  2.90it/s]



Segmentation Loss: 0.3240311049343034


100%|██████████| 80/80 [00:27<00:00,  2.87it/s]



Segmentation Loss: 0.2959570734945965


100%|██████████| 80/80 [00:30<00:00,  2.64it/s]



Segmentation Loss: 0.27253646371884205


100%|██████████| 80/80 [00:28<00:00,  2.80it/s]



Segmentation Loss: 0.2530009238140344


100%|██████████| 80/80 [00:28<00:00,  2.80it/s]



Segmentation Loss: 0.23670952853649613


100%|██████████| 80/80 [00:28<00:00,  2.82it/s]



Segmentation Loss: 0.22312739214440352


100%|██████████| 80/80 [00:28<00:00,  2.80it/s]



Segmentation Loss: 0.2118076265961144


100%|██████████| 80/80 [00:28<00:00,  2.81it/s]



Segmentation Loss: 0.20237664645583808


100%|██████████| 80/80 [00:28<00:00,  2.80it/s]



Segmentation Loss: 0.19452207591086107


100%|██████████| 80/80 [00:27<00:00,  2.86it/s]



Segmentation Loss: 0.18798272699808913


100%|██████████| 80/80 [00:27<00:00,  2.89it/s]



Segmentation Loss: 0.18254025481394384


100%|██████████| 80/80 [00:34<00:00,  2.33it/s]


Segmentation Loss: 0.17801218718568007





### Classificatioon

#### Defining classification model

In [None]:
pad, kernel = compute_pad_filt((h, w), (h, w)) # we want output to have the smae dimensions

hw1, Ww1  = kernel # initialize kernel sizes for first layer
d, D1 = 3, 2 # initialize depth and number of filters for first layer

hw2, Ww2, D2 = hw1, Ww1, 2 # initialize kernel sizes, number of filters for second layer

Conv1_cl =  np.random.uniform(-1, 1, (hw1, Ww1, d, D1)) # initial convolution filters for layer 1
b1_cl = np.random.uniform(-1, 1 , (h, w, D1)) #b1 - initial bias
Conv2_cl =  np.random.uniform(-1, 1, (hw2, Ww2, D1, D2))# initial convolution filters for layer 2
b2_cl = np.random.uniform(-1, 1 , (h, w, D2)) #b2 - initial bias

vectorized_len = h * w * D2 # size for vectorized tensor after 2 convulitions

c1 = vectorized_len // 4 # dimension of the first layer
k1 = c1

W1_cl = np.random.uniform(-1, 1, (k1, vectorized_len)) #W1 - initial weights
W2_cl = np.random.uniform(-1, 1, (k, k1)) #W2 - initial weights
b3_cl = np.random.uniform(-1, 1 , (k1, 1)) #b1 - initial bias
b4_cl = np.random.uniform(-1, 1 , (k, 1)) #b2 - initial bias


nu = 0.001 # learning rate
num_epochs = 20 # amount of epochs

N = 2 # number of images in minibatch

# initialize partial derivatives
dL_dConv1_cl = np.zeros((hw1, Ww1, d, D1))
dL_dConv2_cl = np.zeros((hw2, Ww2, D1, D2))


dL_dW1_cl = np.zeros((k1, vectorized_len))
dL_dW2_cl = np.zeros((k, k1))

dL_db1_cl = 0
dL_db2_cl = 0


dL_db3_cl = 0
dL_db4_cl = 0

#### Training process

In [None]:
for i in range(num_epochs): #for each epoch
  total_cl_loss = 0 #sum of losses of classification for one epoch
  counter = 0 #counter to check that batch ended
  for i in tqdm(range(N_train)): #for each picture

    x = x_train[i, :, :] / 255 #normalize pixels, so they will be from 0 to 1

    y_true_cl = np.array(y_classes_train[i].reshape(k,  1))
    #forward propagation

    y1 = conv2d_forward_many(x, Conv1_cl, pad=pad) #applying convoltion filters from layer 1
    y2 = y1 + b1_cl #adding bias
    y3 = RelU_forward_prop(y2) #applying RelU
    y4 =  conv2d_forward_many(y3, Conv2_cl, pad=pad) #applying convoltion filters from layer 2
    y5 = y4 + b2_cl #adding bias
    y6 = RelU_forward_prop(y5) #applying RelU

    y7 = np.reshape(y6, (vectorized_len, 1))

    # Classification
    y8_cl = MatMul_forward_prop(W1_cl, y7) #applying matrix multiplication with the first weight matrix
    y9_cl = y8_cl + b3_cl #adding bias
    y10_cl = RelU_forward_prop(y9_cl) #applying RelU
    y11_cl =  MatMul_forward_prop(W2_cl, y10_cl) #applying matrix multiplication with the second weight matrix
    y12_cl = y11_cl + b4_cl #adding bias
    y13_cl = log_softmax(y12_cl) #applying log softmax
    # no ReLU after bias addition, since we have nonlinearity further (softmax)
    cl_loss = y_true_cl.T @ y13_cl #finding loss


    total_cl_loss += -cl_loss.item() #adding current loss to total loss

    #backward propagation over calssification

    back_cl = -y_true_cl + SoftMax_forward_prop(y12_cl, True) #backpropagation from loss to the input of softmax

    dL_db4_cl += back_cl #backpropagation from loss to the input of addition, finding dL/db4

    dL_dW2_cl += MatMul_matrix_backward_prop(y10_cl, back_cl) #fiding dL/dW2

    back_cl = MatMul_backward_prop(W2_cl, back_cl) #backpropagation from loss to the input of matrix multiplication with matrix W2

    back_cl = RelU_backward_prop(y9_cl, back_cl) #backpropagation from loss to the input of RelU

    dL_db3_cl += back_cl #backpropagation from loss to the input of addition, finding dL/db3

    dL_dW1_cl += MatMul_matrix_backward_prop(y7, back_cl) #fiding dL/dW1

    back_cl = MatMul_backward_prop(W1_cl, back_cl) #backpropagation from addition to the input of matrix multiplication with matrix W1

    back_cl = back_cl.reshape((h, w, D2)) # backpropagation of reshaping

    back = back_cl

    back = RelU_backward_prop(y5, back) # backpropagation from reshaping to the input of RelU

    dL_db2_cl += back #backpropagation from ReLU to the input of addition, finding dL/db2

    dL_dConv2_cl += conv2d_backward_weights_many(y3, back, pad=pad) #finding dL/dConv2

    back = conv2d_backward_many(back, Conv2_cl, pad=pad) # backpropagation from addition to the input convolution
    back = RelU_backward_prop(y2, back) # backpropagation from convolution to the input of RelU

    dL_db1_cl += back #backpropagation from ReLU to the input of addition, finding dL/db1
    dL_dConv1_cl += conv2d_backward_weights_many(x, back, pad=(1,1)) #finding dL/dConv1

    counter += 1 # increasing counter

    if counter == N or i == N_train - 1: # if batch ended or dataset ended. We apply gradient descent only in batches
    #applying gradient descent for weights and biases

      Conv1_cl = Conv1_cl - nu / N * dL_dConv1_cl
      Conv2_cl = Conv2_cl - nu / N * dL_dConv2_cl


      W1_cl = W1_cl - nu / N * dL_dW1_cl
      W2_cl = W2_cl - nu / N * dL_dW2_cl

      b1_cl = b1_cl - nu / N * dL_db1_cl
      b2_cl = b2_cl - nu / N * dL_db2_cl


      b3_cl = b3_cl - nu / N * dL_db3_cl
      b4_cl = b4_cl - nu / N * dL_db4_cl

      # setting partial derivatives to 0
      dL_dConv1_cl = np.zeros((hw1, Ww1, d, D1))
      dL_dConv2_cl = np.zeros((hw2, Ww2, D1, D2))


      dL_dW1_cl = np.zeros((k1, vectorized_len))
      dL_dW2_cl = np.zeros((k, k1))

      dL_db1_cl = 0
      dL_db2_cl = 0


      dL_db3_cl = 0
      dL_db4_cl = 0

      counter = 0
  print('\nClassification Loss:', total_cl_loss / N_train)

100%|██████████| 80/80 [02:03<00:00,  1.54s/it]



Classification Loss: 53.93980977544809937


100%|██████████| 80/80 [01:58<00:00,  1.48s/it]



Classification Loss: 3.8029473681674140883


100%|██████████| 80/80 [01:58<00:00,  1.48s/it]



Classification Loss: 2.2444955191707742258


100%|██████████| 80/80 [02:00<00:00,  1.51s/it]



Classification Loss: 1.6391668648615694483


100%|██████████| 80/80 [01:58<00:00,  1.48s/it]



Classification Loss: 1.2088372106614907326


100%|██████████| 80/80 [01:58<00:00,  1.48s/it]



Classification Loss: 0.8202005505109583461


100%|██████████| 80/80 [02:00<00:00,  1.50s/it]



Classification Loss: 0.7011958847908114399


100%|██████████| 80/80 [01:58<00:00,  1.48s/it]



Classification Loss: 0.6247908421174317648


100%|██████████| 80/80 [02:00<00:00,  1.50s/it]



Classification Loss: 0.58067801295048149924


100%|██████████| 80/80 [01:59<00:00,  1.49s/it]



Classification Loss: 0.5475409978445247157


100%|██████████| 80/80 [01:58<00:00,  1.48s/it]



Classification Loss: 0.51869862547589010176


100%|██████████| 80/80 [02:00<00:00,  1.51s/it]



Classification Loss: 0.52228665173767035084


100%|██████████| 80/80 [01:58<00:00,  1.49s/it]



Classification Loss: 0.48732316769358207512


100%|██████████| 80/80 [02:00<00:00,  1.51s/it]



Classification Loss: 0.47731770120562610877


100%|██████████| 80/80 [01:58<00:00,  1.49s/it]



Classification Loss: 0.46947975644692576156


100%|██████████| 80/80 [01:58<00:00,  1.48s/it]



Classification Loss: 0.6247400328700462134


100%|██████████| 80/80 [02:01<00:00,  1.52s/it]



Classification Loss: 0.45871415396062884245


100%|██████████| 80/80 [01:58<00:00,  1.49s/it]



Classification Loss: 0.42412431966032972485


100%|██████████| 80/80 [02:01<00:00,  1.51s/it]



Classification Loss: 0.41494146877228135846


100%|██████████| 80/80 [01:59<00:00,  1.49s/it]


Classification Loss: 0.40115462276645235475





# Evaluation

## Classification model

In [None]:
y_pred = np.zeros((N_test, 1), int) #predictions
for i in tqdm(range(N_test)):
    x = x_test[i, :, : ] / 255 #normalize pixels, so they will be from 0 to 1

    y1 = conv2d_forward_many(x, Conv1_cl, pad=(1,1)) #applying matrix multiplication with the first weight matrix
    y2 = y1 + b1_cl #adding bias
    y3 = RelU_forward_prop(y2) #applying RelU
    y4 =  conv2d_forward_many(y3, Conv2_cl, pad=(1,1)) #applying matrix multiplication with the second weight matrix
    y5 = y4 + b2_cl #adding bias
    y6 = RelU_forward_prop(y5) #applying RelU

    y7 = np.reshape(y6, (vectorized_len, 1))

    y8 = MatMul_forward_prop(W1_cl, y7) #applying matrix multiplication with the first weight matrix
    y9 = y8 + b3_cl #adding bias
    y10 = RelU_forward_prop(y9) #applying RelU
    y11 =  MatMul_forward_prop(W2_cl, y10) #applying matrix multiplication with the second weight matrix
    y12 = y11 + b4_cl #adding bias
    out = SoftMax_forward_prop(y12, True) #applying softmax to find outputs

    y_pred[i] = np.argmax(out) #setting prediction (the greatest number among outputs)
print("\nAccuracy using L1 distance:", metrics.accuracy_score(y_true=(y_classes_test - 1), y_pred=y_pred))

100%|██████████| 15/15 [00:08<00:00,  1.85it/s]


Accuracy using L1 distance: 0.6





## Localization model

For evaluation I will use intersection over union. The implementation is taken from this [source](https://pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/)

In [None]:
def bb_intersection_over_union(boxA, boxB):
	# determine the (x, y)-coordinates of the intersection rectangle
	xA = max(boxA[0], boxB[0])
	yA = max(boxA[1], boxB[1])
	xB = min(boxA[2], boxB[2])
	yB = min(boxA[3], boxB[3])
	# compute the area of intersection rectangle
	interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
	# compute the area of both the prediction and ground-truth
	# rectangles
	boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
	boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
	# compute the intersection over union by taking the intersection
	# area and dividing it by the sum of prediction + ground-truth
	# areas - the interesection area
	iou = interArea / float(boxAArea + boxBArea - interArea)
	# return the intersection over union value
	return iou

In [None]:
total_iou = 0
total_loss = 0
for i in tqdm(range(N_test)):
    x = x_test[i, :, :] / 255 #normalize pixels, so they will be from 0 to 1
    y_true = y_test[i].reshape(c, 1) / 64


    y1 = conv2d_forward_many(x, Conv1_loc, pad=pad) #applying convoltion filters from layer 1
    y2 = y1 + b1_loc #adding bias
    y3 = RelU_forward_prop(y2) #applying RelU
    y4 =  conv2d_forward_many(y3, Conv2_loc, pad=pad) #applying convoltion filters from layer 2
    y5 = y4 + b2_loc #adding bias
    y6 = RelU_forward_prop(y5) #applying RelU

    y7 = np.reshape(y6, (vectorized_len, 1))

    y8 = MatMul_forward_prop(W1_loc, y7) #applying matrix multiplication with the first weight matrix
    y9 = y8 + b3_loc #adding bias
    y10 = RelU_forward_prop(y9) #applying RelU
    y11 =  MatMul_forward_prop(W2_loc, y10) #applying matrix multiplication with the second weight matrix
    y_pred = y11 + b4_loc #adding bias
    # no ReLU after bias addition, since we have nonlinearity further (softmax)
    loss = SE_forward(y_pred, y_true)

    total_loss += loss #adding current loss to total loss

    # Define two bounding boxes as (x, y, w, h)
    iou = bb_intersection_over_union(y_pred, y_true)
    total_iou += iou

100%|██████████| 15/15 [00:04<00:00,  3.35it/s]


In [None]:
print("Mean intersection over union:", total_iou.item() / N_test)
print("Mean loss:", total_loss / N_test)

Mean intersection over union: 0.6390189333963597
Mean loss: 0.17675812375175765
