 Load MNIST dataset.

In [21]:
import numpy as np
from torchvision.datasets import MNIST

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
                    transform=lambda x: np.array(x).flatten(),  #flatten the 2D image(28x28 pixels) into a 1D array with 784 elements
                    download=True,
                    train=is_train)
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)

    return np.array(mnist_data), np.array(mnist_labels)

train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

print(f"Training data shape: {train_X.shape}, Training labels shape: {train_Y.shape}") #60000 imagini rep de 784 pixeli
print(f"Test data shape: {test_X.shape}, Test labels shape: {test_Y.shape}")

Training data shape: (60000, 784), Training labels shape: (60000,)
Test data shape: (10000, 784), Test labels shape: (10000,)


Normalize data and convert labels to one-hot-encoding.

In [22]:
from sklearn.preprocessing import OneHotEncoder

train_X = train_X.astype(np.float32) / 255.0 #convert la float32 si fac valorile intre [0,1](pixelii iau valori de la 0 la 255)
test_X = test_X.astype(np.float32) / 255.0

encoder = OneHotEncoder(sparse_output=False) #ca sa am la output un array numpy dens
train_Y_one_hot = encoder.fit_transform(train_Y.reshape(-1, 1)) #transform in matrice de o coloana, convertesc labelurile in vectori de 10 elem(clasele)
test_Y_one_hot = encoder.transform(test_Y.reshape(-1, 1))

print(f"Normalized Training data shape: {train_X.shape}, One-hot Training labels shape: {train_Y_one_hot.shape}")
print(f"Normalized Test data shape: {test_X.shape}, One-hot Test labels shape: {test_Y_one_hot.shape}")


Normalized Training data shape: (60000, 784), One-hot Training labels shape: (60000, 10)
Normalized Test data shape: (10000, 784), One-hot Test labels shape: (10000, 10)


Initialize Weights and Biases.

In [23]:
input_size = 784  #each sample - 784 input features
output_size = 10  #output classes

np.random.seed(42) #reproducibility, sa dea aceleasi nr cand rulez
W = np.random.randn(input_size, output_size) * 0.01  #matrice de shape(784,10) cu valori random(de medie 0, varianta 1), *0.01 le fac mai mici ca sa nu creasca gradientul prea mult
b = np.zeros(output_size) #0 ca sa nu aiba bias catre vreo clasa particulara

Implement Softmax Function.

In [24]:
def softmax(z): #z matrice de forma (batch_size(nr of examples), 10)
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True)) #calc valoarea maxima a lui z pe fiecare linie si scad ca sa am maxim 0, apoi ridic e la fiecare element
    return exp_z / np.sum(exp_z, axis=1, keepdims=True) #impart la suma de valori exponentiate, ca sa obtin o distributie probabilistica -normalizare

Implement Forward Propagation.


In [25]:
def forward_propagation(X, W, b): #X matrice de batch_size linii cu 784 elem pe linie, W weight matrix cu 784 linii cu 10 elem care sunt output classes, b - bias vector(10, )
    z = np.dot(X, W) + b #inmultesc matricile=>(batch_size, 10)
    return softmax(z) #transf in probabilitati(valorile pe fiec linie adunate sunt 1)

 Implement Backward Propagation - Gradient Descent Algorithm.

In [26]:
def backward_propagation(X, Y, output, W, b, learning_rate=0.1): #output e matricea obt la forw propagation
    m = X.shape[0]  #batch size, nr linii in X
    error = output - Y
    dW = np.dot(X.T, error) / m #inmultesc transpusa lui X cu eroarea si impart la m => gradient of loss
    db = np.sum(error, axis=0) / m #gradient of bias
    W -= learning_rate * dW
    b -= learning_rate * db
    return W, b

Cross-Entropy

In [27]:
def cross_entropy_loss(Y, output):
    return -np.mean(np.sum(Y * np.log(output + 1e-12), axis=1))  #adaug 1e-12 ca sa nu am 0, si fac media sumei pe fiecare linie

Train model.

In [28]:
def accuracy(X, Y):
    predictions = forward_propagation(X, W, b)
    predicted_classes = np.argmax(predictions, axis=1)  #iau indexul probabilitatii max pt fiec ex
    true_classes = np.argmax(Y, axis=1)
    return np.mean(predicted_classes == true_classes) * 100

initial_accuracy = accuracy(test_X, test_Y_one_hot)
print(f"Initial accuracy: {initial_accuracy:.2f}%")

epochs = 500 #de cate ori trec prin datele de antrenament
batch_size = 100 #prin cate exemple trec in for
learning_rate = 0.1

for epoch in range(epochs):
    indices = np.arange(train_X.shape[0]) #vector de indici
    np.random.shuffle(indices)
    train_X_shuffled = train_X[indices] #shuffle the training data
    train_Y_shuffled = train_Y_one_hot[indices]

    epoch_loss = 0
    for start in range(0, train_X.shape[0], batch_size):
        end = start + batch_size
        X_batch = train_X_shuffled[start:end] #iau batch-ul curent
        Y_batch = train_Y_shuffled[start:end]

        output = forward_propagation(X_batch, W, b) #calc probabilitatile pt batch-ul curent
        W, b = backward_propagation(X_batch, Y_batch, output, W, b, learning_rate)

        loss = cross_entropy_loss(Y_batch, output)
        epoch_loss += loss

    epoch_loss /= (train_X.shape[0] // batch_size) # // e div
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}")

final_accuracy = accuracy(test_X, test_Y_one_hot)
print(f"Final accuracy after training: {final_accuracy:.2f}%")

Initial accuracy: 8.37%
Epoch 1/500, Loss: 0.5363
Epoch 2/500, Loss: 0.3591
Epoch 3/500, Loss: 0.3312
Epoch 4/500, Loss: 0.3164
Epoch 5/500, Loss: 0.3070
Epoch 6/500, Loss: 0.3002
Epoch 7/500, Loss: 0.2949
Epoch 8/500, Loss: 0.2910
Epoch 9/500, Loss: 0.2874
Epoch 10/500, Loss: 0.2845
Epoch 11/500, Loss: 0.2819
Epoch 12/500, Loss: 0.2795
Epoch 13/500, Loss: 0.2777
Epoch 14/500, Loss: 0.2758
Epoch 15/500, Loss: 0.2743
Epoch 16/500, Loss: 0.2729
Epoch 17/500, Loss: 0.2717
Epoch 18/500, Loss: 0.2705
Epoch 19/500, Loss: 0.2693
Epoch 20/500, Loss: 0.2683
Epoch 21/500, Loss: 0.2675
Epoch 22/500, Loss: 0.2664
Epoch 23/500, Loss: 0.2655
Epoch 24/500, Loss: 0.2645
Epoch 25/500, Loss: 0.2641
Epoch 26/500, Loss: 0.2631
Epoch 27/500, Loss: 0.2624
Epoch 28/500, Loss: 0.2618
Epoch 29/500, Loss: 0.2612
Epoch 30/500, Loss: 0.2606
Epoch 31/500, Loss: 0.2601
Epoch 32/500, Loss: 0.2595
Epoch 33/500, Loss: 0.2591
Epoch 34/500, Loss: 0.2585
Epoch 35/500, Loss: 0.2582
Epoch 36/500, Loss: 0.2576
Epoch 37/500,