# Hacettepe University
### BBM 409 4th ASSIGNMENT


#### Name and Surname: Fatih Pehlivan – Göktuğ Ocaklıoğlu
#### Identity Number: 21946529 - 2200356841
#### Course:  BBM 409

#### Advisor: Burçak Asal

In [472]:
import os


import pandas as pd
import numpy as np
from PIL import Image
from sklearn.utils import shuffle
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, recall_score, precision_score


#### PART 1-2
We were asked to implement neural network to classify given images.
We were asked to compare our implementations with different functions (sigmoid, relu, tanh), different learning (0.005, 0.01, 0.02),
different batch sizes (16,32,64), different hidden numbers (0,1,2) and comment their results according to accuracy, recall, precision and f1_score.

In [473]:
# epsilon is a small value to prevent overflow in some operations
epsilon = 10 ** -12

# define pixel number
px = 60
# Read images
folders = np.array(["Bean", "Bitter_Gourd", "Bottle_Gourd", "Brinjal", "Broccoli", "Cabbage", "Capsicum", "Carrot",
                    "Cauliflower", "Cucumber", "Papaya", "Potato", "Pumpkin", "Radish", "Tomato"])

In [474]:
def load_images_from_folder(images, folder, count):
    """
    load images into an array
    :param images: array of images
    :param folder: folder path
    :param count: load images in order
    :return: images array and count value
    """
    for filename in os.listdir(folder):
        if any([filename.endswith(".jpg")]):
            # reshape, grayscala and normalization
            img = np.asarray(Image.open(os.path.join(folder, filename)).convert('L').resize((px, px))) / 255
            if img is not None:
                images[count] = img.flatten()
                count += 1
    return images, count

In [475]:
# calculate sigmoid
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [476]:
# calculate softmax
def softmax(x):
    return np.exp(x) / np.exp(x).sum()

In [477]:
# calculate tanh
def tanh(x):
    return np.tanh(x)

In [478]:
# calculate derivative of tanh
def derivative_tanh(x):
    return 1 - np.square(tanh(x))

In [479]:
# calculate ReLU
def relu(x):
    x[x <= 0] = 0
    return x

In [480]:
# calculate derivative of ReLU
def derivative_relu(x):
    x[x <= 0] = 0
    x[x > 0] = 1
    return x

In [481]:
def initialize_weight(layers):
    """
    initialize weight and bias
    :param layers: [px*px, ......, 15]
    :return: weight and bias
    """
    w = dict()
    b = dict()
    for i in range(len(layers) - 1):
        w[i] = np.random.uniform(-0.2, 0.2, (layers[i], layers[i + 1]))
        b[i] = np.zeros(layers[i + 1])
    return w, b

In [482]:
def back_prop(bias, weight, activation, y_batch, function_name, batch, alpha, z, gradient_array):
    """
    makes back propagation
    :param bias: bias dict
    :param weight: weight dict
    :param activation: activation dict
    :param y_batch:
    :param function_name: derivative of functions are different, get the function name
    :param batch: batch size
    :param alpha: learning rate
    :param z:
    :param gradient_array: for debugging not necessary
    :return: updated bias and weight dict
    """
    # first step is the same for all because last functions are the same
    #
    if function_name == "sigmoid":
        index1 = list(bias.keys())[-1]
        index2 = index1
        # take first derivate is the same for all
        dBias = (activation[index1 + 1] - y_batch) / batch
        # print(f"dbias {index1}", np.sum(dBias) - gradient_array[0][index1])
        bias[index1] = bias[index1] - alpha * np.sum(dBias, axis=0)

        while index1 > 0:
            index1 -= 1
            # activation[index1 + 1] * (1 - activation[index1 + 1] is derivative of sigmoid
            dBias = np.dot(dBias, weight[index1 + 1].T) * activation[index1 + 1] * (1 - activation[index1 + 1])
            # print(f"dbias {index1}", np.sum(dBias) - gradient_array[0][index1])
            bias[index1] = bias[index1] - alpha * np.sum(dBias, axis=0)

        weight_copy = weight.copy()
        cons = (activation[index2 + 1] - y_batch) / batch
        dweight = np.dot(activation[index2].T, cons)
        # print(f"dweight {index2}", np.sum(dweight) - gradient_array[1][index2])
        weight[index2] = weight[index2] - alpha * dweight
        while index2 > 0:
            index2 -= 1
            cons = np.dot(cons, weight_copy[index2 + 1].T) * activation[index2 + 1] * (1 - activation[index2 + 1])
            dweight = np.dot(activation[index2].T, cons)
            # print(f"dweight {index2}", np.sum(dweight) - gradient_array[1][index2])
            weight[index2] = weight[index2] - alpha * dweight


    elif function_name == "relu":
        index1 = list(bias.keys())[-1]
        index2 = index1
        dBias = (activation[index1 + 1] - y_batch) / batch
        bias[index1] = bias[index1] - alpha * np.sum(dBias, axis=0)

        while index1 > 0:
            index1 -= 1
            dBias = np.dot(dBias, weight[index1 + 1].T) * derivative_relu(z[index1])
            bias[index1] = bias[index1] - alpha * np.sum(dBias, axis=0)

        weight_copy = weight.copy()
        cons = (activation[index2 + 1] - y_batch) / batch
        dweight = np.dot(activation[index2].T, cons)
        # print(f"dweight {index2}", np.sum(dweight) - gradient_array[1][index2])
        weight[index2] = weight[index2] - alpha * dweight
        while index2 > 0:
            index2 -= 1
            cons = np.dot(cons, weight_copy[index2 + 1].T) * derivative_relu(z[index2])
            dweight = np.dot(activation[index2].T, cons)
            # print(f"dweight {index2}", np.sum(dweight) - gradient_array[1][index2])
            weight[index2] = weight[index2] - alpha * dweight
    else:
        index1 = list(bias.keys())[-1]
        index2 = index1
        dBias = (activation[index1 + 1] - y_batch) / batch
        bias[index1] = bias[index1] - alpha * np.sum(dBias, axis=0)

        while index1 > 0:
            index1 -= 1
            dBias = np.dot(dBias, weight[index1 + 1].T) * derivative_tanh(z[index1])
            bias[index1] = bias[index1] - alpha * np.sum(dBias, axis=0)

        weight_copy = weight.copy()
        cons = (activation[index2 + 1] - y_batch) / batch
        dweight = np.dot(activation[index2].T, cons)
        # print(f"dweight {index2}", np.sum(dweight) - gradient_array[1][index2])
        weight[index2] = weight[index2] - alpha * dweight
        while index2 > 0:
            index2 -= 1
            cons = np.dot(cons, weight_copy[index2 + 1].T) * derivative_tanh(z[index2])
            dweight = np.dot(activation[index2].T, cons)
            # print(f"dweight {index2}", np.sum(dweight) - gradient_array[1][index2])
            weight[index2] = weight[index2] - alpha * dweight
    return bias, weight

In [483]:
def forward_prog(layers, z, activation, weight, bias, func, hidden_numbers):
    """
    makes forward propagation
    :param layers: [px*px,..........,15]
    :param z:
    :param activation: activation dict
    :param weight: weight dict
    :param bias: bias dict
    :param func: the function such as relu, sigmoid
    :param hidden_numbers: number of hidden layers
    :return:
    """
    k = 0
    for j in range(len(layers) - 1):
        z[j] = np.dot(activation[j], weight[j]) + bias[j]
        activation[j + 1] = func(z[j])
        k = j
    activation[k + 1] = sigmoid(z[k])
    return activation[hidden_numbers + 1], z, activation

In [484]:
def gradient_check(loss, hidden_numbers, weight, bias, batch, y_batch, layers, z, activation, func):
    """
    use for only debugging
    :param loss: loss function value
    :param hidden_numbers: number of hidden layers
    :param weight: weight dict
    :param bias: bias dict
    :param batch: batch size
    :param y_batch:
    :param layers: [px*px,..........,15]
    :param z:
    :param activation: activation dict
    :param func: the function such as relu, sigmoid
    :return: an array with graident check value
    """
    result = np.zeros((2, hidden_numbers + 1), dtype='float64')
    for i in range(hidden_numbers + 1):
        temp = weight[i]
        weight[i] = weight[i] + epsilon
        y_hat = forward_prog(layers, z, activation, weight, bias, func, hidden_numbers)[0]
        loss1 = -1 / batch * (np.sum(y_batch * np.log(y_hat) + (1 - y_batch) * np.log(1 - y_hat)))
        result[1][i] = (loss1 - loss) / (epsilon * 2)
        weight[i] = temp
        temp = bias[i]
        bias[i] += epsilon
        y_hat = forward_prog(layers, z, activation, weight, bias, func, hidden_numbers)[0]
        loss1 = -1 / batch * (np.sum(y_batch * np.log(y_hat) + (1 - y_batch) * np.log(1 - y_hat)))
        result[0][i] = (loss1 - loss) / epsilon
        bias[i] = temp
    return result

In [485]:
def listToStr(liste):
    """
    make list to str
    :param liste: is a list
    :return: string
    """
    s = ""
    for i in liste:
        s += str(i) + " "
    return s

In [486]:
# initialize NN
def neural_network(func, layers, alpha, batch, hidden_numbers=0):
    """
    make NN according to given params
    :param func: such as sigmoid, relu, tanh
    :param layers: hidden layers
    :param alpha: learning rate
    :param batch: batch size
    :param hidden_numbers: number of hidden layers
    :return:
    """
    if hidden_numbers == 0:
        layers = []
    layers.insert(0, px * px)
    layers.append(15)
    weight, bias = initialize_weight(layers)
    z = dict()
    activation = dict()
    los_ = 99999

    while True:
        for e in range(10):
            counter = 0
            for i in range(0, 15000 - batch, batch):
                counter += batch
                activation[0] = images[i: i + batch]
                y_batch = imagesY[i: i + batch]

                # forward prog
                y_hat, z, activation = forward_prog(layers, z, activation, weight, bias, func, hidden_numbers)

                # loss function
                loss = -1 / batch * (
                    np.sum(y_batch * np.log(y_hat) + (1 - y_batch) * np.log(1 - y_hat)))

                # gradient_array = gradient_check(loss, hidden_numbers, weight, bias, batch, y_batch, layers, z, activation,
                # func)
                # back prog
                bias, weight = back_prop(bias, weight, activation, y_batch, func.__name__, batch, alpha, z,
                                         gradient_array=[])

        # Validation part
        activation[0] = images_validation
        y_batch = images_validation_Y
        y_hat, z, activation = forward_prog(layers, z, activation, weight, bias, func, hidden_numbers)
        new_loss = (-1 / batch * (
            np.sum(y_batch * np.log(y_hat) + (1 - y_batch) * np.log(1 - y_hat))))

        # calculate accuracy
        y_hat = (y_hat == y_hat.max(axis=1)[:,None]).astype(int)
        acc = accuracy_score(y_batch, y_hat)
        pre = precision_score(y_batch, y_hat, average=None, zero_division=0)
        recall = recall_score(y_batch, y_hat, average=None, zero_division=0)
        f1 = f1_score(y_batch, y_hat, average=None, zero_division=0)
        cnf = confusion_matrix(y_batch.argmax(axis=1), y_hat.argmax(axis=1))
        if new_loss >= los_:
            break
        los_ = new_loss
    return func.__name__, listToStr(layers), alpha, batch, hidden_numbers, acc, pre, recall,f1, cnf, weight, bias, layers, func

In [487]:
images = np.zeros((15000, px * px), dtype='float64')
imagesY = np.zeros((15000, 15), dtype='int8')

countY = 0
count = 0
back_count = 0

# init train set
for folder in folders:
    path = 'Vegetable Images/train/' + folder
    images, count = load_images_from_folder(images, path, count)
    imagesY[back_count: count, countY:countY + 1] = 1
    countY += 1
    back_count = count
images, imagesY = shuffle(images, imagesY, random_state=0)

countY = 0
count = 0
back_count = 0

# init validation set
images_validation = np.zeros((3000, px * px), dtype='float64')
images_validation_Y = np.zeros((3000, 15), dtype='int8')
for folder in folders:
    path = 'Vegetable Images/validation/' + folder
    images_validation, count = load_images_from_folder(images_validation, path, count)
    images_validation_Y[back_count: count, countY:countY + 1] = 1
    countY += 1
    back_count = count

In [488]:
# for loop makes the code very very slow (only one parameter takes more than 8 hours)
results = []
hidden1 = 25
hidden2 = 20
for i in range(81):
    results.append([])
results[0] = neural_network(sigmoid, [], 0.005, 16, 0)
results[1] = neural_network(sigmoid, [], 0.005, 32, 0)
results[2] = neural_network(sigmoid, [], 0.005, 64, 0)
results[3] = neural_network(sigmoid, [], 0.01, 16, 0)
results[4] = neural_network(sigmoid, [], 0.01, 32, 0)
results[5] = neural_network(sigmoid, [], 0.01, 64, 0)
results[6] = neural_network(sigmoid, [], 0.02, 16, 0)
results[7] = neural_network(sigmoid, [], 0.02, 32, 0)
results[8] = neural_network(sigmoid, [], 0.02, 64, 0)
results[9] = neural_network(sigmoid, [hidden1], 0.005, 16, 1)
results[10] = neural_network(sigmoid, [hidden1], 0.005, 32, 1)
results[11] = neural_network(sigmoid, [hidden1], 0.005, 64, 1)
results[12] = neural_network(sigmoid, [hidden1], 0.01, 16, 1)
results[13] = neural_network(sigmoid, [hidden1], 0.01, 32, 1)
results[14] = neural_network(sigmoid, [hidden1], 0.01, 64, 1)
results[15] = neural_network(sigmoid, [hidden1], 0.02, 16, 1)
results[16] = neural_network(sigmoid, [hidden1], 0.02, 32, 1)
results[17] = neural_network(sigmoid, [hidden1], 0.02, 64, 1)
results[18] = neural_network(sigmoid, [hidden1, hidden2], 0.005, 16, 2)
results[19] = neural_network(sigmoid, [hidden1, hidden2], 0.005, 32, 2)
results[20] = neural_network(sigmoid, [hidden1, hidden2], 0.005, 64, 2)
results[21] = neural_network(sigmoid, [hidden1, hidden2], 0.01, 16, 2)
results[22] = neural_network(sigmoid, [hidden1, hidden2], 0.01, 32, 2)
results[23] = neural_network(sigmoid, [hidden1, hidden2], 0.01, 64, 2)
results[24] = neural_network(sigmoid, [hidden1, hidden2], 0.02, 16, 2)
results[25] = neural_network(sigmoid, [hidden1, hidden2], 0.02, 32, 2)
results[26] = neural_network(sigmoid, [hidden1, hidden2], 0.02, 64, 2)

In [489]:
results[27] = neural_network(relu, [], 0.005, 16, 0)
results[28] = neural_network(relu, [], 0.005, 32, 0)
results[29] = neural_network(relu, [], 0.005, 64, 0)
results[30] = neural_network(relu, [], 0.01, 16, 0)
results[31] = neural_network(relu, [], 0.01, 32, 0)
results[32] = neural_network(relu, [], 0.01, 64, 0)
results[33] = neural_network(relu, [], 0.02, 16, 0)
results[34] = neural_network(relu, [], 0.02, 32, 0)
results[35] = neural_network(relu, [], 0.02, 64, 0)
results[36] = neural_network(relu, [hidden1], 0.005, 16, 1)
results[37] = neural_network(relu, [hidden1], 0.005, 32, 1)
results[38] = neural_network(relu, [hidden1], 0.005, 64, 1)
results[39] = neural_network(relu, [hidden1], 0.01, 16, 1)
results[40] = neural_network(relu, [hidden1], 0.01, 32, 1)
results[41] = neural_network(relu, [hidden1], 0.01, 64, 1)
results[42] = neural_network(relu, [hidden1], 0.02, 16, 1)
results[43] = neural_network(relu, [hidden1], 0.02, 32, 1)
results[44] = neural_network(relu, [hidden1], 0.02, 64, 1)
results[45] = neural_network(relu, [hidden1, hidden2], 0.005, 16, 2)
results[46] = neural_network(relu, [hidden1, hidden2], 0.005, 32, 2)
results[47] = neural_network(relu, [hidden1, hidden2], 0.005, 64, 2)
results[48] = neural_network(relu, [hidden1, hidden2], 0.01, 16, 2)
results[49] = neural_network(relu, [hidden1, hidden2], 0.01, 32, 2)
results[50] = neural_network(relu, [hidden1, hidden2], 0.01, 64, 2)
results[51] = neural_network(relu, [hidden1, hidden2], 0.02, 16, 2)
results[52] = neural_network(relu, [hidden1, hidden2], 0.02, 32, 2)
results[53] = neural_network(relu, [hidden1, hidden2], 0.02, 64, 2)

In [490]:
results[54] = neural_network(tanh, [], 0.005, 16, 0)
results[55] = neural_network(tanh, [], 0.005, 32, 0)
results[56] = neural_network(tanh, [], 0.005, 64, 0)
results[57] = neural_network(tanh, [], 0.01, 16, 0)
results[58] = neural_network(tanh, [], 0.01, 32, 0)
results[59] = neural_network(tanh, [], 0.01, 64, 0)
results[60] = neural_network(tanh, [], 0.02, 16, 0)
results[61] = neural_network(tanh, [], 0.02, 32, 0)
results[62] = neural_network(tanh, [], 0.02, 64, 0)
results[63] = neural_network(tanh, [hidden1], 0.005, 16, 1)
results[64] = neural_network(tanh, [hidden1], 0.005, 32, 1)
results[65] = neural_network(tanh, [hidden1], 0.005, 64, 1)
results[66] = neural_network(tanh, [hidden1], 0.01, 16, 1)
results[67] = neural_network(tanh, [hidden1], 0.01, 32, 1)
results[68] = neural_network(tanh, [hidden1], 0.01, 64, 1)
results[69] = neural_network(tanh, [hidden1], 0.02, 16, 1)
results[70] = neural_network(tanh, [hidden1], 0.02, 32, 1)
results[71] = neural_network(tanh, [hidden1], 0.02, 64, 1)
results[72] = neural_network(tanh, [hidden1, hidden2], 0.005, 16, 2)
results[73] = neural_network(tanh, [hidden1, hidden2], 0.005, 32, 2)
results[74] = neural_network(tanh, [hidden1, hidden2], 0.005, 64, 2)
results[75] = neural_network(tanh, [hidden1, hidden2], 0.01, 16, 2)
results[76] = neural_network(tanh, [hidden1, hidden2], 0.01, 32, 2)
results[77] = neural_network(tanh, [hidden1, hidden2], 0.01, 64, 2)
results[78] = neural_network(tanh, [hidden1, hidden2], 0.02, 16, 2)
results[79] = neural_network(tanh, [hidden1, hidden2], 0.02, 32, 2)
results[80] = neural_network(tanh, [hidden1, hidden2], 0.02, 64, 2)

In [491]:
result = np.array(results)
results = sorted(results,key=lambda l:l[5], reverse=True)
results

  result = np.array(results)


[('sigmoid',
  '3600 25 15 ',
  0.005,
  64,
  1,
  0.448,
  array([0.4       , 0.50649351, 0.50230415, 0.41293532, 0.38461538,
         0.30097087, 0.44537815, 0.51396648, 0.41621622, 0.65294118,
         0.53888889, 0.41577061, 0.51798561, 0.43459916, 0.32467532]),
  array([0.25 , 0.585, 0.545, 0.415, 0.4  , 0.155, 0.53 , 0.46 , 0.385,
         0.555, 0.485, 0.58 , 0.36 , 0.515, 0.5  ]),
  array([0.30769231, 0.54292343, 0.52278177, 0.41396509, 0.39215686,
         0.20462046, 0.48401826, 0.48548813, 0.4       , 0.6       ,
         0.51052632, 0.48434238, 0.42477876, 0.47139588, 0.39370079]),
  array([[ 50,  12,   5,   4,  20,   3,   5,   5,   2,   9,  13,  31,   3,
           12,  26],
         [  7, 117,   7,   9,   8,   1,   3,  10,   2,  10,   6,   5,   8,
            2,   5],
         [  4,   3, 109,  15,   8,   5,   6,   5,  11,   1,   8,   6,   2,
            4,  13],
         [  3,  14,  13,  83,   2,  12,  14,   9,   4,   6,   6,  16,   5,
            7,   6],
         [  9,

In [492]:
result[:, 0:6]

array([['sigmoid', '3600 15 ', 0.005, 16, 0, 0.38333333333333336],
       ['sigmoid', '3600 15 ', 0.005, 32, 0, 0.3943333333333333],
       ['sigmoid', '3600 15 ', 0.005, 64, 0, 0.387],
       ['sigmoid', '3600 15 ', 0.01, 16, 0, 0.37166666666666665],
       ['sigmoid', '3600 15 ', 0.01, 32, 0, 0.39266666666666666],
       ['sigmoid', '3600 15 ', 0.01, 64, 0, 0.398],
       ['sigmoid', '3600 15 ', 0.02, 16, 0, 0.3536666666666667],
       ['sigmoid', '3600 15 ', 0.02, 32, 0, 0.372],
       ['sigmoid', '3600 15 ', 0.02, 64, 0, 0.4036666666666667],
       ['sigmoid', '3600 25 15 ', 0.005, 16, 1, 0.4206666666666667],
       ['sigmoid', '3600 25 15 ', 0.005, 32, 1, 0.44033333333333335],
       ['sigmoid', '3600 25 15 ', 0.005, 64, 1, 0.448],
       ['sigmoid', '3600 25 15 ', 0.01, 16, 1, 0.417],
       ['sigmoid', '3600 25 15 ', 0.01, 32, 1, 0.44066666666666665],
       ['sigmoid', '3600 25 15 ', 0.01, 64, 1, 0.43633333333333335],
       ['sigmoid', '3600 25 15 ', 0.02, 16, 1, 0.42066666666

In [554]:
layers = results[0][-2]
func = results[0][-1]
w = results[0][-4]
b = results[0][-3]
acc = results[0][5]
btch = results[0][3]
cnf = results[0][10]
layers = [x for x in layers if x != 3600 and x!= 15]

In [494]:
countY = 0
count = 0
back_count = 0

# init test set
images_test = np.zeros((3000, px * px), dtype='float64')
images_test_Y = np.zeros((3000, 15), dtype='int8')
for folder in folders:
    path = 'Vegetable Images/test/' + folder
    images_test, count = load_images_from_folder(images_test, path, count)
    images_test_Y[back_count: count, countY:countY + 1] = 1
    countY += 1
    back_count = count

In [555]:
w

{0: array([[-0.1229478 ,  0.46977128,  0.1050115 , ...,  0.14654102,
         -0.3720055 , -0.1617666 ],
        [ 0.03822914,  0.46801735,  0.14677575, ...,  0.04311331,
         -0.30635177,  0.02329265],
        [ 0.08068023,  0.02338151,  0.14093648, ..., -0.13560126,
          0.15964695, -0.16676253],
        ...,
        [ 0.02044797,  0.17372573,  0.14410571, ...,  0.10466211,
         -0.37557994,  0.09311727],
        [-0.18374935,  0.34220643,  0.1278131 , ..., -0.13434632,
         -0.37491345,  0.03288985],
        [ 0.02641285,  0.25617182, -0.08650849, ..., -0.13559411,
         -0.31848268,  0.0668531 ]]),
 1: array([[-9.11187245e-01, -5.88997614e-01,  2.54108444e+00,
          2.66043862e+00,  1.38329536e+00,  4.62250154e-01,
         -1.72334620e+00, -2.48290538e-01,  3.49637388e-01,
         -3.40865089e+00,  1.62383352e+00,  5.30611343e-01,
         -2.53365860e+00,  1.65741040e+00,  7.49523769e-01],
        [-1.81838691e-01, -7.45498002e-01, -1.24339820e+00,
      

In [556]:
b

{0: array([-2.8513343 ,  0.40463416,  0.01425268, -0.24123919,  0.02545594,
        -0.42030862, -1.30368613,  1.01410615,  0.01671434,  0.02342883,
         0.01902306, -0.04170786, -1.39292651,  0.01990649, -0.38703747,
        -2.07554797, -1.41412101,  0.00866756, -0.94826831,  1.15115114,
         0.01530207,  1.40249503,  0.0222656 , -1.29909715,  0.02209893]),
 1: array([-0.48977481, -0.34216012, -0.13857165, -0.39006362, -0.75702866,
        -0.55501732,  0.37324124, -0.1568283 , -0.44275364, -0.54452536,
        -0.05389132, -0.88046585,  0.10695783, -1.22805201, -0.72562754])}

In [557]:
cnf

{0: array([[-0.1229478 ,  0.46977128,  0.1050115 , ...,  0.14654102,
         -0.3720055 , -0.1617666 ],
        [ 0.03822914,  0.46801735,  0.14677575, ...,  0.04311331,
         -0.30635177,  0.02329265],
        [ 0.08068023,  0.02338151,  0.14093648, ..., -0.13560126,
          0.15964695, -0.16676253],
        ...,
        [ 0.02044797,  0.17372573,  0.14410571, ...,  0.10466211,
         -0.37557994,  0.09311727],
        [-0.18374935,  0.34220643,  0.1278131 , ..., -0.13434632,
         -0.37491345,  0.03288985],
        [ 0.02641285,  0.25617182, -0.08650849, ..., -0.13559411,
         -0.31848268,  0.0668531 ]]),
 1: array([[-9.11187245e-01, -5.88997614e-01,  2.54108444e+00,
          2.66043862e+00,  1.38329536e+00,  4.62250154e-01,
         -1.72334620e+00, -2.48290538e-01,  3.49637388e-01,
         -3.40865089e+00,  1.62383352e+00,  5.30611343e-01,
         -2.53365860e+00,  1.65741040e+00,  7.49523769e-01],
        [-1.81838691e-01, -7.45498002e-01, -1.24339820e+00,
      

In [558]:
layers

[25]

In [498]:
layers.insert(0, px * px)
layers.append(15)
activation = dict()
z = dict()
weight = w.copy()
bias = b.copy()
activation[0] = images_test
y_batch = images_test_Y
y_hat, z, activation = forward_prog(layers, z, activation, weight, bias, func, len(layers) - 2)
new_loss = (-1 / btch * (np.sum(y_batch * np.log(y_hat) + (1 - y_batch) * np.log(1 - y_hat))))
count = 0
for i in range(3000):
    if np.argmax(images_test_Y[i]) == np.argmax(y_hat[i]):
        count += 1
acc_test = count / 3000
acc_test
# test accuaracy

0.4126666666666667

In [499]:
new_loss
# test loss

133.58627023125396

In [500]:
# Visualize
for i in weight.keys():
    weight[i] = weight[i] - np.min(weight[i])
    weight[i] = weight[i] / np.max(weight[i])

    img = Image.fromarray(np.uint8(weight[i] * 255), 'L').resize((px, px))
    img.show()

In [545]:
table = pd.DataFrame(np.row_stack(result[:, :9]),columns=["Function Name", "Layers", "Learning Rate", "Batch", "Number Of Hidden Layers", "Accuracy", 'Precision', 'Recall', 'F1_score'])
table['Precision'] = table['Precision'].map(lambda x : x.mean())
table['Recall'] = table['Recall'].map(lambda x : x.mean())
table['F1_score'] = table['F1_score'].map(lambda x : x.mean())
table

Unnamed: 0,Function Name,Layers,Learning Rate,Batch,Number Of Hidden Layers,Accuracy,Precision,Recall,F1_score
0,sigmoid,3600 15,0.005,16,0,0.383333,0.402639,0.383333,0.380847
1,sigmoid,3600 15,0.005,32,0,0.394333,0.402654,0.394333,0.392017
2,sigmoid,3600 15,0.005,64,0,0.387,0.393464,0.387000,0.386148
3,sigmoid,3600 15,0.01,16,0,0.371667,0.431839,0.371667,0.372429
4,sigmoid,3600 15,0.01,32,0,0.392667,0.406698,0.392667,0.385497
...,...,...,...,...,...,...,...,...,...
76,tanh,3600 25 20 15,0.01,32,2,0.352667,0.371174,0.352667,0.330364
77,tanh,3600 25 20 15,0.01,64,2,0.367333,0.377207,0.367333,0.357489
78,tanh,3600 25 20 15,0.02,16,2,0.363,0.402322,0.363000,0.364709
79,tanh,3600 25 20 15,0.02,32,2,0.347333,0.359784,0.347333,0.336129


In [546]:
func_table = table[['Function Name', 'Precision', 'Recall', 'F1_score']]
func_table = func_table.groupby(['Function Name']).mean()
func_table

Unnamed: 0_level_0,Precision,Recall,F1_score
Function Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
relu,0.066667,1.0,0.125
sigmoid,0.428047,0.415086,0.409659
tanh,0.390413,0.370519,0.364834


In [547]:
func_table = table[['Function Name', 'Accuracy']]
func_table = func_table.groupby(['Function Name']).mean()
func_table

Unnamed: 0_level_0,Accuracy
Function Name,Unnamed: 1_level_1
relu,0.0
sigmoid,0.415086
tanh,0.370519


Relu function predicts only one class this make the precision 0.0667. Even though recall value is high, others are low we shouldn't use this.
In Relu function is f(x) = x for x >= 0. This makes the numbers very high. And make the others 0. Therefore, Relu function is unreliable to use.

Sigmoid and tanh functions are better than relu, Because their range are between (0,1) and (-1,1). So that, values cannot be high that gives better scores.
Accuracy is 0 for relu because of accuracy_metric (it gives only one class)

In [548]:
learning_rate_table = table[['Learning Rate', 'Accuracy', 'Precision', 'Recall', 'F1_score']]
learning_rate_table = learning_rate_table.groupby(['Learning Rate']).mean()
learning_rate_table

Unnamed: 0_level_0,Precision,Recall,F1_score
Learning Rate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.005,0.292269,0.599951,0.304679
0.01,0.296171,0.596012,0.300752
0.02,0.296687,0.589642,0.294061


In [549]:
learning_rate_table = table[['Learning Rate', 'Accuracy']]
learning_rate_table = learning_rate_table.groupby(['Learning Rate']).mean()
learning_rate_table

Unnamed: 0_level_0,Accuracy
Learning Rate,Unnamed: 1_level_1
0.005,0.266617
0.01,0.262679
0.02,0.256309


When learning rate = 0.005 it gives better scores for general. However, there is trade off between better scores and power (time). I think we should use 0.01 for learning rate because it gives better time than 0.005 and better recall and gives better scores than 0.02

In [550]:
batch_table = table[['Batch', 'Accuracy', 'Precision', 'Recall', 'F1_score']]
batch_table = batch_table.groupby(['Batch']).mean()
batch_table

Unnamed: 0_level_0,Precision,Recall,F1_score
Batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
16,0.2984,0.590309,0.296363
32,0.291762,0.594074,0.297191
64,0.294965,0.601222,0.305938


In [551]:
batch_table = table[['Batch', 'Accuracy']]
batch_table = batch_table.groupby(['Batch']).mean()
batch_table

Unnamed: 0_level_0,Accuracy
Batch,Unnamed: 1_level_1
16,0.256975
32,0.260741
64,0.267889


With batch we can divide inputs, if you have low ram choose lower batch size, but it gives you worse scores.
If you have enough hardware choose max batch size.

In [552]:
hidden_layer_number_table = table[['Number Of Hidden Layers', 'Accuracy', 'Precision', 'Recall', 'F1_score']]
hidden_layer_number_table = hidden_layer_number_table.groupby(['Number Of Hidden Layers']).mean()
hidden_layer_number_table

Unnamed: 0_level_0,Precision,Recall,F1_score
Number Of Hidden Layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.29791,0.588728,0.294748
1,0.29742,0.599444,0.305531
2,0.289797,0.597432,0.299214


In [553]:
hidden_layer_number_table = table[['Number Of Hidden Layers', 'Accuracy']]
hidden_layer_number_table = hidden_layer_number_table.groupby(['Number Of Hidden Layers']).mean()
hidden_layer_number_table

Unnamed: 0_level_0,Accuracy
Number Of Hidden Layers,Unnamed: 1_level_1
0,0.255395
1,0.266111
2,0.264099


If we have more data increasing number of hidden layer may better idea. However, for this data set it causes overflow (for hidden layer = 2)
With 1 hidden layer gives us better scores.