<a href="https://colab.research.google.com/github/Tensor-Reloaded/Neural-Networks-Template-2024/blob/main/Lab02/NumpyExcercises.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment 2 - Căprioară Alina

### 1. Load the MNIST dataset

In [6]:
import numpy as np
from torchvision.datasets import MNIST

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
                    transform=lambda x: np.array(x).flatten(),
                    download=True, 
                    train=is_train)
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    return np.array(mnist_data), np.array(mnist_labels)


train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

### 2. Normalize the data and convert the labels to one-hot-encoding

In [7]:
train_X = np.array(train_X)/255.0
test_X = np.array(test_X)/255.0

def convert_labels(labels):

    labels = np.array(labels).astype(int)
    classes = 10
    matrix = np.zeros((labels.shape[0], classes))
    matrix[np.arange(labels.shape[0]), labels] = 1
    
    return matrix

train_Y = convert_labels(train_Y)
test_Y = convert_labels(test_Y)

### 3. Train the perceptron for 50-500 epochs

In [None]:
np.random.seed(50)
input_size = train_X.shape[1]  # 784 
classes = 10

W = np.random.randn(input_size, classes) * 0.01
bias = np.zeros(classes) 

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  #vrem pe fiecare rand
    return exp_z / exp_z.sum(axis=1, keepdims=True)
    

def cross_entropy_loss(y, y_pred):
    eps = 1e-8
    return -np.sum(y * np.log(y_pred+eps))


def gradient_descent(X, y, W, b, learning_rate=0.01): 
    #forward propagation
    y_pred = softmax(np.dot(X, W) + b)

    error = y_pred - y # target-y

    W -= learning_rate*np.dot(X.transpose(), error)
    b -= learning_rate*np.sum(error, axis=0)
    
    loss = cross_entropy_loss(y, y_pred)
    
    return W, b, loss


def train(train_X, train_Y, W, b, epochs=100, batch_size=100, learning_rate=0.01):
    
    num_batches = np.ceil(train_X.shape[0] / batch_size).astype(int)
    
    for epoch in range(epochs):
        
        epoch_loss = 0
        for i in range(num_batches):
            
            start = i * batch_size
            end = min(start + batch_size, train_X.shape[0])
            
            X_batch = train_X[start:end]
            y_batch = train_Y[start:end]
            
            W, b, batch_loss = gradient_descent(X_batch, y_batch, W, b, learning_rate)
            epoch_loss += batch_loss
        
        epoch_loss /= num_batches
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")
    
    return W, b


W, bias = train(train_X, train_Y, W, bias, epochs=100, batch_size=100, learning_rate=0.01)


def accuracy(X, y, W, b):
    y_pred = softmax(np.dot(X, W) + b)
    predicted_classes = np.argmax(y_pred, axis=1)
    true_classes = np.argmax(y, axis=1)
    return np.mean(predicted_classes == true_classes)


test_accuracy = accuracy(test_X, test_Y, W, bias)
print(f"Testing Data Accuracy: {test_accuracy * 100:.2f}%")