In [1]:
import pickle
import os
import pandas as pd
import numpy as np

In [2]:
train_file = "/kaggle/input/fii-nn-2025-homework-2/extended_mnist_train.pkl"
test_file = "/kaggle/input/fii-nn-2025-homework-2/extended_mnist_test.pkl"

with open(train_file, "rb") as fp:
    train = pickle.load(fp)

with open(test_file, "rb") as fp:
    test = pickle.load(fp)

In [3]:
train_data = []
train_labels = []
for image, label in train:
    train_data.append(image.flatten() / 255.0)
    train_labels.append(label)

In [4]:
test_data = []
for image, label in test:
    test_data.append(image.flatten() / 255.0)

In [5]:
import numpy as np

num_inputs = 784
num_outputs = 10
initial_lr = 0.75
decay = 0.0005
epochs = 2000

m = len(train_data)
X = np.array(train_data)
y = np.array(train_labels)

Y = np.zeros((m, num_outputs))
Y[np.arange(m), y] = 1

np.random.seed(42)
W = np.random.randn(num_inputs, num_outputs) * 0.01
b = np.zeros((num_outputs,))

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

for epoch in range(epochs):
    learning_rate = initial_lr / (1 + decay * epoch)
    
    Z = X.dot(W) + b
    Y_hat = softmax(Z)

    loss = -np.sum(Y * np.log(Y_hat + 1e-8)) / m

    dZ = (Y_hat - Y) / m
    dW = X.T.dot(dZ)
    db = np.sum(dZ, axis=0)

    W -= learning_rate * dW
    b -= learning_rate * db

    if epoch % 50 == 0:
            print(f"Epoch {epoch}, Loss: {loss:.4f}, lr: {learning_rate:.4f}")

Z_train = X.dot(W) + b
Y_hat_train = softmax(Z_train)
train_preds = np.argmax(Y_hat_train, axis=1)
train_accuracy = np.mean(train_preds == y)

Epoch 0, Loss: 2.3054, lr: 0.7500
Epoch 50, Loss: 0.4159, lr: 0.7317
Epoch 100, Loss: 0.3648, lr: 0.7143
Epoch 150, Loss: 0.3421, lr: 0.6977
Epoch 200, Loss: 0.3285, lr: 0.6818
Epoch 250, Loss: 0.3192, lr: 0.6667
Epoch 300, Loss: 0.3123, lr: 0.6522
Epoch 350, Loss: 0.3069, lr: 0.6383
Epoch 400, Loss: 0.3025, lr: 0.6250
Epoch 450, Loss: 0.2989, lr: 0.6122
Epoch 500, Loss: 0.2958, lr: 0.6000
Epoch 550, Loss: 0.2932, lr: 0.5882
Epoch 600, Loss: 0.2909, lr: 0.5769
Epoch 650, Loss: 0.2889, lr: 0.5660
Epoch 700, Loss: 0.2870, lr: 0.5556
Epoch 750, Loss: 0.2854, lr: 0.5455
Epoch 800, Loss: 0.2839, lr: 0.5357
Epoch 850, Loss: 0.2826, lr: 0.5263
Epoch 900, Loss: 0.2813, lr: 0.5172
Epoch 950, Loss: 0.2802, lr: 0.5085
Epoch 1000, Loss: 0.2792, lr: 0.5000
Epoch 1050, Loss: 0.2782, lr: 0.4918
Epoch 1100, Loss: 0.2773, lr: 0.4839
Epoch 1150, Loss: 0.2764, lr: 0.4762
Epoch 1200, Loss: 0.2756, lr: 0.4688
Epoch 1250, Loss: 0.2749, lr: 0.4615
Epoch 1300, Loss: 0.2741, lr: 0.4545
Epoch 1350, Loss: 0.2735

In [6]:
X_test = np.array(test_data)
Z_test = X_test.dot(W) + b
Y_hat_test = softmax(Z_test)
predictions = np.argmax(Y_hat_test, axis=1)

In [7]:
# This is how you prepare a submission for the competition
predictions_csv = {
    "ID": [],
    "target": [],
}

for i, label in enumerate(predictions):
    predictions_csv["ID"].append(i)
    predictions_csv["target"].append(label)

df = pd.DataFrame(predictions_csv)
df.to_csv("submission.csv", index=False)