In [3]:
import pickle, gzip
import numpy as np


def sigmoid(z):  # 1/1+e^-z
    return 1 / (1 + np.exp(-z))


def softmax(z):  # e^z/ suma de e^z
    e_z = np.exp(z)
    return e_z / np.sum(e_z)


def init_weights(layers_sizes):
    weights = []
    #normal -> Draw random samples from a normal (Gaussian) distribution.
    # weight initialization to avoid saturation of the neurons  
    # random values from a normal distribution with mean  0 and standard deviation
    # standard deviation -> 1/sqrt(total number of connection that go into that neuron -> 784)
    weights = [np.random.normal(0, np.power(np.sqrt(layers_sizes[i - 1]), (-1)), (layers_sizes[i], layers_sizes[i - 1]))
               for i in range(1, len(layers_sizes))]
    return weights


def init_biases(layers_sizes):
    biases = []
    biases = [np.random.standard_normal( layers_sizes[i]) for i in range(1, len(layers_sizes))]
    return biases


def update_weights(error, previous, learning_rate):
    gradient = error * previous
    return learning_rate * gradient


def update_biases(error, learning_rate):
    copyError = np.ravel(error)  # A 1-D array, containing the elements of the input, is returned
    return learning_rate * copyError


def train(train_set):
    images = train_set[0]
    labels = train_set[1]
    layers_sizes = [784, 100, 10]

    weights = init_weights(layers_sizes)

    biases = init_biases(layers_sizes)

    nrIterations = 1

    while nrIterations > 0:
        # pentru fiecare imagine din train set
        for i in range(0, len(images)):
            # calculam z pentru imaginea de pe poz i din dataset trecut prin toti neuronii de la l1 catre l2
            # si ii aplicam functia de activare sigmoid
            Z1 = np.add(np.dot(weights[0], images[i]), biases[0])
            Z1sigmoid = sigmoid(Z1)  # (1,100)
            # calculam z pentru ce am obtinut la pasul anterior l2->l3 si il trecem prin functia de activare softmax      
            Z2 = np.add(np.dot(weights[1], Z1sigmoid), biases[1])
            Z2softmax = softmax(Z2)  # (1,10)

# vector care reprez targetul imaginii: initial este un vector cu 0 unde punem 1 pe pozitia poz unde poz este labelu img curente
            target = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
            poz = labels[i]
            target[poz] = 1

# calculam eroarea prin scaderea targetului din z pentru ultimul layer trecut prin softmax -> cross entropy slide 15/4
            error2 = Z2softmax - target

            # ii facem transpusa (valorile sa fie cate una pe linie) pentru a putea inmulti cu weights care are (10,100)
            error2 = error2.reshape((10, 1))

            # pentru a calcula eroarea de la layer precedent - prima eroare folosim formula curs3/slide 34
            # outputul * (1- output) *  suma (weights* eroarea lu final layer)
            # o sa se adune de la neuronul 1 din l2 l3n1+l3n2+..l3n10
# se aduna matricea produsul rezultat pe coloane a.i. vom avea suma fiecarui neuron din l2 pentru toti neuronii din l3
            error1 = (Z1sigmoid * (1 - Z1sigmoid)) * np.sum(weights[1] * error2, axis=0)

            learningRate = 0.08

# facem transpusa -> reshape la 100,1 pentru a putea inm cu matricea de imagine (o linie cu 784 elem) -> sa fie o singura coloana
            error1 = error1.reshape((100, 1))

            # https://hmkcode.com/ai/backpropagation-step-by-step/

            # updatam weights si biases -> scadem din weights actual learning rateul imn cu gradientul
            # gradientul este eroarea * ce am obtinut de la hidden layer (outputul) sau pt weights[0] inputul
            # pt bias gradientul este eroarea
            weights[1] = weights[1] - update_weights(error2, Z1sigmoid, learningRate)
            weights[0] = weights[0] - update_weights(error1, images[i], learningRate)

            biases[1] = biases[1] - update_biases(error2, learningRate)
            biases[0] = biases[0] - update_biases(error1, learningRate)

        nrIterations = nrIterations - 1

    return weights, biases


def acuratete(weights, biases, test_set):
    corecte = 0

    images = test_set[0]
    labels = test_set[1]
    for i in range(0, len(images)):
        Z1sigmoid = sigmoid(np.add(np.dot(weights[0], images[i]), biases[0]))
        Z2softmax = softmax(np.add(np.dot(weights[1], Z1sigmoid), biases[1]))
        if np.argmax(Z2softmax) == labels[i]:
            corecte = corecte + 1
    print("Accuracy:")
    print(corecte / len(images) * 100)


if __name__ == '__main__':
    # load dataset
    with gzip.open('mnist.pkl.gz', 'rb') as fin:
        train_set, valid_set, test_set = pickle.load(fin, encoding='latin1')
    weights, biases = train(train_set)
    acuratete(weights, biases, test_set)


Accuracy:
95.71
