In [None]:
#importing everything required for the lab
import numpy as np
import time
from tensorflow import keras
from tqdm import tqdm
from sklearn import metrics

In [None]:
# loading cifar10 dataset
dataset=keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = dataset.load_data() #loading data
c = 10 # number of labels

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
#function taken from previous assignments
#it translates label number into the vector of 0s and 1s
def label_vec_func(labels):
  labels_matrix = np.zeros([len(labels), c])
  for i in range(len(labels)):
    labels_matrix[i, labels[i]] = 1
  return labels_matrix

In [None]:
n = 28 * 28 #number of pixels for one picture
N_train = 60000 #number of pictures in the train dataset
N_test = 10000 #number of pictures in the test dataset
x_train = x_train.reshape(N_train, n, 1)
x_test = x_test.reshape(N_test, n, 1)

In [None]:
y_train = label_vec_func(y_train) #converting labels to vector of 0s and 1s

In [None]:
#functions for matmul backward and forward propagation from previous assignments
def MatMul_forward_prop(matrix, input):
  return np.array(matrix) @ np.array(input)

#function that finds dL/dx
def MatMul_backward_prop(matrix, loss):
  return np.array(matrix).T @ np.array(loss)

#function that finds dL/dW
def MatMul_matrix_backward_prop(X, loss):
  return np.array(loss) @ np.array(X).T

In [None]:
#function for softmax forward and backward propagation
#taken from previous assignments
def SoftMax_forward_prop(input, normalization=False):
  output = np.array(input, dtype=np.longdouble)
  if normalization: # if we use normalization
    output = output - np.max(input) # we substract maximal value from each number
  output = np.exp(output)
  return output / np.sum(output)

def SoftMax_jacobian(input, normalization=False): # function for calculating jacobian of SoftMax according to input
  output = SoftMax_forward_prop(input, normalization)
  jacobian = np.zeros((len(input), len(input)))
  for i in range(len(input)):
    for j in range(len(input)):
      if i == j:
        jacobian[i][j] = output[i] * (1 - output[j])
      else:
        jacobian[i][j] = -output[i] * output[j]
  return jacobian

def SoftMax_backward_prop(input, loss, normalization=False): # backpropagation
  jac = SoftMax_jacobian(input, normalization) # calculating jacobian
  return jac @ np.array(loss)

In [None]:
#functions for log_softmax forward and backward propagation
#this node applies softmax and then finds logorithm of the result
def log_softmax(x):
  x_max = np.max(x)
  return x - x_max - np.log(np.sum(np.exp(x - x_max)))

def log_softmax_jacobian(input): # function for calculating jacobian of SoftMax according to input
  output = SoftMax_forward_prop(input, True)
  jacobian = np.zeros((len(input), len(input)))
  for i in range(len(input)):
    for j in range(len(input)):
      if i == j:
        jacobian[i][j] = (1 - output[j])
      else:
        jacobian[i][j] = -output[j]
  return jacobian

def log_softmax_backward_prop(input, loss): # backpropagation
  jac = log_softmax_jacobian(input) # calculating jacobian
  return jac @ np.array(loss)

In [None]:
#function for RelU forward and backward propagation
#taken from previous assignments
def RelU_jacobian(input):
  jac = np.zeros((len(input), len(input)))
  for i in range(len(input)):
    if input[i] > 0:
      jac[i][i] = 1
  return jac

def RelU_forward_prop(input):
  output = np.array([max(0, x.item()) for x in input]) # applying RelU to the input
  return np.expand_dims(output, axis=1)

def RelU_backward_prop(input, loss):
  jac = RelU_jacobian(input) # finding jacobian for RelU according to input
  return jac @ np.array(loss)

#Crucial formula for backward propagation
This will be used in both neural networks during backward propagation

$$\frac{\partial L}{\partial s_{in}} = s_{out} - y_{true}$$

Where $s_{in}$ is the input of softmax layer, $s_{out}$ is the output of softmax layer, $y_{true}$ is a vector of true label (1 at the index equal to the label number, and 0s at other indices)

##source:
https://www.mldawn.com/back-propagation-with-cross-entropy-and-softmax/

#One-layer neural network

##Training

In [None]:
W = np.random.uniform(0, 1, (c, n)) #W_0 - initial weights
nu = 0.18 # learning rate
num_epochs = 10 # amount of epochs

for i in range(num_epochs): #for each epoch
  total_loss = 0 #sum of losses for one epoch
  correct = 0 #number of correct predictions
  for i in tqdm(range(N_train)): #for each picture

    x = x_train[i] / 255 #normalize pixels, so they will be from 0 to 1

    y_true = np.array(y_train[i].reshape(c,  1)) #true value of y

    #forward propagation

    y1 = MatMul_forward_prop(W, x) #applying matrix multiplication
    y2 = log_softmax(y1) #applying log softmax

    if np.argmax(y2) == np.argmax(y_true): #check if predictions are correct
      correct += 1

    loss = y_true.T @ y2 #finding loss
    total_loss += loss #adding current loss to total loss

    #backward propagation

    back = -y_true + SoftMax_forward_prop(y1, True) #backpropagation from loss to the input of softmax

    back = MatMul_matrix_backward_prop(x, back) #fiding dL/dW
    dL_dW = back

    #applying gradient descent

    W = W - nu * dL_dW

  print('\nAccuracy:', correct / N_train)
  print('Loss:', -total_loss.item() / N_train)


  0%|          | 0/60000 [00:00<?, ?it/s]


(784, 1)

Accuracy: 0.0


AttributeError: ignored

##Testing

In [None]:
y_pred = np.zeros((10000, 1), int) #predictions
for i in tqdm(range(N_test)):
    x = x_test[i]

    y1 = MatMul_forward_prop(W, x) #applying matrix multiplication
    out = SoftMax_forward_prop(y1, True) #finding predictions

    y_pred[i] = np.argmax(out) #setting prediction (the greatest number among outputs)

In [None]:
print("Accuracy using L1 distance:", metrics.accuracy_score(y_true=y_test, y_pred=y_pred))

#Two-layer neural network

##Training

In [None]:
c1 = 14 * 14 # dimension of the first layer
W1 = np.random.uniform(-1, 1, (c1, n)) #W1 - initial weights
W2 = np.random.uniform(-1, 1, (c, c1)) #W2 - initial weights
b1 = np.random.uniform(-1, 1 , (c1, 1)) #b1 - initial bias
b2 = np.random.uniform(-1, 1 , (c, 1)) #b2 - initial bias
nu = 0.001 # learning rate
num_epochs = 10 # amount of epochs

for i in range(num_epochs): #for each epoch
  total_loss = 0 #sum of losses for one epoch
  correct = 0 #number of correct predictions
  for i in tqdm(range(N_train)): #for each picture

    x = x_train[i] / 255 #normalize pixels, so they will be from 0 to 1

    y_true = np.array(y_train[i].reshape(c,  1))  #true value of y

    #forward propagation

    y1 = MatMul_forward_prop(W1, x) #applying matrix multiplication with the first weight matrix
    y2 = y1 + b1 #adding bias
    y3 = RelU_forward_prop(y2) #applying RelU
    y4 =  MatMul_forward_prop(W2, y3) #applying matrix multiplication with the second weight matrix
    y5 = y4 + b2 #adding bias
    y6 = log_softmax(y5) #applying log softmax

    if np.argmax(y6) == np.argmax(y_true): #check if predictions are correct
      correct += 1

    loss = y_true.T @ y6 #finding loss
    total_loss += loss #adding current loss to total loss

    #backward propagation

    back = -y_true + SoftMax_forward_prop(y5, True) #backpropagation from loss to the input of softmax

    dL_db2 = back #backpropagation from loss to the input of addition, finding dL/db2

    back = MatMul_backward_prop(W2, back) #backpropagation from loss to the input of matrix multiplication with matrix W2

    dL_dW2 = MatMul_matrix_backward_prop(y3, back) #fiding dL/dW2

    back = RelU_backward_prop(y2, back) #backpropagation from loss to the input of RelU

    dL_db1 = back #backpropagation from loss to the input of addition, finding dL/db1

    dL_dW1 = MatMul_matrix_backward_prop(x, back) #fiding dL/dW1

    #applying gradient descent for weights and biases

    W1 = W1 - nu * dL_dW1
    W2 = W2 - nu * dL_dW2
    b1 = b1 - nu * dL_db1
    b2 = b2 - nu * dL_db2


  print('\nAccuracy:', correct / N_train)
  print('Loss:', -total_loss.item() / N_train)


##Testing

In [None]:
y_pred = np.zeros((10000, 1), int) #predictions
for i in tqdm(range(N_test)):
    x = x_test[i]

    y1 = MatMul_forward_prop(W1, x) #applying matrix multiplication with the first weight matrix
    y2 = y1 + b1 #adding bias
    y3 = RelU_forward_prop(y2) #applying RelU
    y4 =  MatMul_forward_prop(W2, y3) #applying matrix multiplication with the second weight matrix
    y5 = y4 + b2 #adding bias
    out = SoftMax_forward_prop(y5, True) #applying softmax to find outputs

    y_pred[i] = np.argmax(out) #setting prediction (the greatest number among outputs)

In [None]:
print("Accuracy using L1 distance:", metrics.accuracy_score(y_true=y_test, y_pred=y_pred))