In [1197]:
import pickle
import os
import pandas as pd
import numpy as np

In [1198]:
train_file = "fii-nn-2025-homework-2/extended_mnist_train.pkl"
test_file = "fii-nn-2025-homework-2/extended_mnist_test.pkl"

with open(train_file, "rb") as fp:
    train = pickle.load(fp)

with open(test_file, "rb") as fp:
    test = pickle.load(fp)

train_data = []
train_labels = []
for image, label in train:
    train_data.append(image.flatten())
    train_labels.append(label)

test_data = []
for image, label in test:
    test_data.append(image.flatten())


In [1199]:

shuffle_idx = np.random.permutation(len(train_data))
train_data = np.array(train_data)[shuffle_idx]
train_labels = np.array(train_labels)[shuffle_idx]

test_data = np.array(test_data)

print("Train samples: ", len(train_data))
print("Test samples: ", len(test_data))
print("Image shape: ", train_data[1].shape)

Train samples:  60000
Test samples:  10000
Image shape:  (784,)


In [1200]:
def normalize(data):
    return data / 255.0

train_data = np.array(normalize(train_data)).astype(np.float64)
test_data = np.array(normalize(test_data)).astype(np.float64)

In [1201]:
#Initialization
n_perceptrons = 10
learning_rate = 0.01
learning_rate_decay = 0.001
step_size = 5
batch_size = 32
epochs = 30
n_inputs = train_data.shape[1]
weights = np.random.normal(0, np.sqrt(1 / n_inputs), (n_inputs, n_perceptrons)).astype(np.float64) # xavier initialization
biases = np.zeros(10, dtype=np.float64)

print("Weights shape: ", weights.shape)
print("Biases shape: ", biases.shape)


Weights shape:  (784, 10)
Biases shape:  (10,)


In [1202]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # Numerical stability improvement
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def softmax_test(z):
    exp_z = np.exp(z)
    return exp_z / np.sum(exp_z)


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


In [1203]:
def one_hot(batch, output_size):
    result = np.zeros((len(batch), output_size))
    for i, l in enumerate(batch):
        result[i, l] = 1
    return result

In [1204]:
def cross_entropy(target, y):
    return -np.sum(target * np.log(y + 1e-10))


In [1205]:
def split(data, batch_size):
    return np.array_split(data, len(data) / batch_size)

In [None]:
# Batch Training
batches = split(train_data, batch_size)
label_batches = split(train_labels, batch_size)
label_batches = [one_hot(batch, n_perceptrons) for batch in label_batches]
print("Batch shape: ", batches[0].shape)
print("Batch label shape: ", label_batches[0].shape)
print("Weights shape: ", weights.shape)
print("Biases shape: ", biases.shape)
print("Starting training...")

for epoch in range(epochs):

    epoch_loss = np.float64(0.0)
    if epoch and epoch % step_size == 0:
        learning_rate = learning_rate - learning_rate_decay
        print("Lower lr: ", learning_rate)

    for x, target in zip(batches, label_batches):
        z = np.dot(x, weights) + biases
        y = softmax(z)

        loss = cross_entropy(target, y)
        gradient_w = x.T @ (y - target)
        gradient_b = np.sum(y - target, axis = 0)
        
        weights -= gradient_w * learning_rate
        biases -= gradient_b * learning_rate

        epoch_loss += loss
    
    print(f"Epoch {epoch + 1} completed. Loss: {epoch_loss / len(batches)}")

Batch shape:  (32, 784)
Batch label shape:  (32, 10)
Weights shape:  (784, 10)
Biases shape:  (10,)
Starting training...
Epoch 1 completed. Loss: 11.744640682781226
Epoch 2 completed. Loss: 9.72997327060997
Epoch 3 completed. Loss: 9.364912445927366
Epoch 4 completed. Loss: 9.158546265342887
Epoch 5 completed. Loss: 9.016673100871905
Lower lr:  0.009000000000000001
Epoch 6 completed. Loss: 8.808890530951034
Epoch 7 completed. Loss: 8.72675425057386
Epoch 8 completed. Loss: 8.661367022275137
Epoch 9 completed. Loss: 8.605598379978245
Epoch 10 completed. Loss: 8.557034422337631
Lower lr:  0.008
Epoch 11 completed. Loss: 8.419747101345724
Epoch 12 completed. Loss: 8.380726610354127
Epoch 13 completed. Loss: 8.34900546643942
Epoch 14 completed. Loss: 8.3203810648433
Epoch 15 completed. Loss: 8.294150359296623
Lower lr:  0.007
Epoch 16 completed. Loss: 8.180374458221177
Epoch 17 completed. Loss: 8.15585317246548
Epoch 18 completed. Loss: 8.136531543671849
Epoch 19 completed. Loss: 8.1187722

In [1207]:
def test_network(test_data, weights, biases):
    results = []
    for sample_v in test_data:
        predictions = np.array([])
        for i in range(n_perceptrons):
            prediction = np.dot(weights[:, i], sample_v) + biases[i]
            predictions = np.append(predictions, prediction)
        softmax_preds = softmax_test(predictions)
        argmax_pred = np.argmax(softmax_preds)
        results.append(argmax_pred)
    
    return results

In [1208]:
predictions_csv = {
    "ID": [],
    "target": [],
}

predictions = test_network(test_data, weights, biases)

for i, label in enumerate(predictions):
    predictions_csv["ID"].append(i)
    predictions_csv["target"].append(label)

df = pd.DataFrame(predictions_csv)
df.to_csv("submission.csv", index=False)