<a href="https://colab.research.google.com/github/deliablendea/Neural-Networks-Ioana-Delia-Blendea-2025/blob/main/Assignment2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment 2 (15 points) - Ioana-Delia Blendea

In [None]:
import pickle
import numpy as np
import pandas as pd

In [None]:
train_file = "/kaggle/input/fii-nn-2025-homework-2/extended_mnist_train.pkl"
test_file = "/kaggle/input/fii-nn-2025-homework-2/extended_mnist_test.pkl"

with open(train_file, "rb") as fp:
    train = pickle.load(fp)

with open(test_file, "rb") as fp:
    test = pickle.load(fp)

In [None]:
train_data = []
train_labels = []
for image, label in train:
    train_data.append(image.flatten())
    train_labels.append(label)

test_data = []
test_labels = []
for image, label in test:
    test_data.append(image.flatten())
    test_labels.append(label)

train_x = np.array(train_data) / 255.0
test_x = np.array(test_data) / 255.0

In [None]:
def convert_labels(labels):
    labels = np.array(labels).astype(int)
    classes = 10
    matrix = np.zeros((labels.shape[0], classes))
    matrix[np.arange(labels.shape[0]), labels] = 1

    return matrix

train_y = convert_labels(train_labels)
test_y = convert_labels(test_labels)

In [None]:
w = np.random.randn(784, 10) * 0.01
b = np.zeros(10)

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis = 1, keepdims = True))
    return exp_z / exp_z.sum(axis = 1, keepdims = True)

def cross_entropy_loss(y, y_pred):
    eps = 1e-8
    return -np.sum(y * np.log(y_pred + eps)) / y.shape[0]

def gradient_descent(x, y, w, b, learning_rate = 0.01):
    # forward propagation
    y_pred = softmax(np.dot(x, w) + b)

    # target - y
    error = y_pred - y

    w = w - learning_rate * np.dot(x.transpose(), error) / x.shape[0]
    b = b - learning_rate * np.sum(error, axis = 0) / x.shape[0]

    loss = cross_entropy_loss(y, y_pred)

    return w, b, loss

In [None]:
def train(train_x, train_y, w, b, epochs = 100, batch_size = 128, learning_rate = 0.15):
    num_batches = int(np.ceil(train_x.shape[0] / batch_size))

    for epoch in range(epochs):
        epoch_loss = 0

        permutation = np.random.permutation(train_x.shape[0])
        shuffled_x = train_x[permutation]
        shuffled_y = train_y[permutation]

        for i in range(num_batches):
            start = i * batch_size
            end = min(start + batch_size, train_x.shape[0])

            x_batch = shuffled_x[start:end]
            y_batch = shuffled_y[start:end]

            w, b, batch_loss = gradient_descent(x_batch, y_batch, w, b, learning_rate)
            epoch_loss = epoch_loss + batch_loss

        epoch_loss = epoch_loss / num_batches
        print(f"Epoch {epoch + 1} / {epochs}, Loss: {epoch_loss:.4f}")

    return w, b

w,b = train(train_x, train_y, w, b, epochs = 100, batch_size = 128, learning_rate = 0.15)

def accuracy(x, y, w, b):
    y_pred = softmax(np.dot(x, w) + b)
    predicted_classes = np.argmax(y_pred, axis = 1)
    true_classes = np.argmax(y, axis = 1)
    return np.mean(predicted_classes == true_classes)

test_accuracy = accuracy(test_x, test_y, w, b)
print(f"Testing Data Accuracy: {test_accuracy * 100:.2f}%")

final_predictions_probs = softmax(np.dot(test_x, w) + b)
predictions = np.argmax(final_predictions_probs, axis = 1)

In [None]:
# This is how you prepare a submission for the competition
predictions_csv = {
    "ID": [],
    "target": [],
}

for i, label in enumerate(predictions):
    predictions_csv["ID"].append(i)
    predictions_csv["target"].append(label)

df = pd.DataFrame(predictions_csv)
df.to_csv("submission.csv", index=False)