In [1]:
import pickle
import os
import pandas as pd
import numpy as np

In [2]:
train_file = "fii-nn-2025-homework-2/extended_mnist_train.pkl"
test_file = "fii-nn-2025-homework-2/extended_mnist_test.pkl"

with open(train_file, "rb") as fp:
    train = pickle.load(fp)

with open(test_file, "rb") as fp:
    test = pickle.load(fp)

train_data = []
train_labels = []
for image, label in train:
    train_data.append(image.flatten())
    train_labels.append(label)

test_data = []
for image, label in test:
    test_data.append(image.flatten())


In [3]:

shuffle_idx = np.random.permutation(len(train_data))
train_data = np.array(train_data)[shuffle_idx]
train_labels = np.array(train_labels)[shuffle_idx]

test_data = np.array(test_data)

print("Train samples: ", len(train_data))
print("Test samples: ", len(test_data))
print("Image shape: ", train_data[1].shape)

Train samples:  60000
Test samples:  10000
Image shape:  (784,)


In [4]:
def normalize(data):
    return data / 255.0

train_data = np.array(normalize(train_data)).astype(np.float32)
test_data = np.array(normalize(test_data)).astype(np.float32)

In [5]:
#Initialization
n_perceptrons = 10
learning_rate = 0.008
batch_size = 64
epochs = 30
n_inputs = train_data.shape[1]
weights = np.random.normal(0, np.sqrt(1 / n_inputs), (n_perceptrons, n_inputs)).astype(np.float32) # xavier initialization
biases = np.zeros(10, dtype=np.float32)

print("Weights shape: ", weights.shape)


Weights shape:  (10, 784)


In [6]:
def softmax(z):
    z -= np.max(z)
    exp_z = np.exp(z)
    return exp_z / exp_z.sum(axis=0)


In [7]:
def one_hot(labels, n_classes):
    result = np.zeros((len(labels), n_classes))
    for i, l in enumerate(labels):
        result[i, l] = 1
    return result

In [8]:
# Parallel version - Numba not working with current setup

# @njit(parallel=True, fastmath=True)
# def train_parallel(batch_data, batch_labels, weights, biases):
#     delta = np.array(np.zeros(weights.shape, dtype=np.float32) * n_perceptrons)
#     beta = np.array(np.zeros(biases.shape, dtype=np.float32) * n_perceptrons)
#     error_sum = np.float32(0.0)

#     for i in prange(n_perceptrons):
#         specific_batch_label = [1 if lable == i else -1 for lable in batch_labels]

#         for sample_index in prange(batch_size):
#             sample_v = batch_data[sample_index]
#             label = specific_batch_label[sample_index]

#             prediction = np.dot(weights[i], sample_v) + biases[i]
#             error = label - prediction
            
#             delta[i] += sample_v * error * learning_rate
#             beta[i] += error * learning_rate
#             error_sum += error
        
#     error_rate = error_sum / batch_size * n_perceptrons

#     return (delta, beta, error_rate)

In [9]:

def train(batch_data, batch_labels, weights, biases):
    n_perceptrons = weights.shape[0]
    delta = np.zeros_like(weights, dtype=np.float32)
    beta = np.zeros_like(biases, dtype=np.float32)
    loss_sum = 0.0
    batch_labels_onehot = one_hot(batch_labels, n_perceptrons)

    for index, sample_v in enumerate(batch_data):
        predictions = np.dot(weights, sample_v) + biases
        softmax_preds = softmax(predictions)
        error = batch_labels_onehot[index] - softmax_preds
        delta += learning_rate * np.outer(error, sample_v) # pentru fiecare perceptron eroarea in functie de input 
        beta += learning_rate * error
        
        # Compute loss
        true_class = batch_labels[index]
        loss_sum += -np.log(softmax_preds[true_class] + 1e-10)
    
    loss_rate = loss_sum / len(batch_data)
    return delta, beta, loss_rate




In [10]:
def split(data, batch_size):
    return np.array_split(data, len(data) / batch_size)

In [11]:
# Batch Training
train_batches = split(train_data, batch_size)
train_label_batches = split(train_labels, batch_size)
print("Starting training...")

for epoch in range(epochs):
    epoch_loss = np.float32(0.0)
    for batch_data, batch_labels in zip(train_batches, train_label_batches):
        delta, beta, loss = train(batch_data, batch_labels, weights, biases)
        weights = weights + delta
        biases = biases + beta
        epoch_loss += loss
    epoch_loss /= len(train_batches)
    print(f"Epoch {epoch + 1} completed. Loss rate: {epoch_loss}")

Starting training...
Epoch 1 completed. Loss rate: 0.3778839707374573
Epoch 2 completed. Loss rate: 0.3052937388420105
Epoch 3 completed. Loss rate: 0.293049156665802
Epoch 4 completed. Loss rate: 0.286184161901474
Epoch 5 completed. Loss rate: 0.28150686621665955
Epoch 6 completed. Loss rate: 0.27799907326698303
Epoch 7 completed. Loss rate: 0.2752143442630768
Epoch 8 completed. Loss rate: 0.27291861176490784
Epoch 9 completed. Loss rate: 0.2709742486476898
Epoch 10 completed. Loss rate: 0.2692946791648865
Epoch 11 completed. Loss rate: 0.2678205966949463
Epoch 12 completed. Loss rate: 0.26651114225387573
Epoch 13 completed. Loss rate: 0.26533547043800354
Epoch 14 completed. Loss rate: 0.2642712891101837
Epoch 15 completed. Loss rate: 0.26330113410949707
Epoch 16 completed. Loss rate: 0.262410968542099
Epoch 17 completed. Loss rate: 0.26158979535102844
Epoch 18 completed. Loss rate: 0.2608288526535034
Epoch 19 completed. Loss rate: 0.2601205110549927
Epoch 20 completed. Loss rate: 0.2

In [12]:
def test_network(test_data, weights, biases):
    results = []
    for sample_v in test_data:
        predictions = np.array([])
        for i in range(n_perceptrons):
            prediction = np.dot(weights[i], sample_v) + biases[i]
            predictions = np.append(predictions, prediction)
        softmax_preds = softmax(predictions)
        argmax_pred = np.argmax(softmax_preds)
        #print("Predicted: ", argmax_pred, "Probability: ", softmax_preds[argmax_pred], "Proabilties: ", softmax_preds)
        results.append(argmax_pred)
    
    return results

In [13]:
predictions_csv = {
    "ID": [],
    "target": [],
}

predictions = test_network(test_data, weights, biases)

for i, label in enumerate(predictions):
    predictions_csv["ID"].append(i)
    predictions_csv["target"].append(label)

df = pd.DataFrame(predictions_csv)
df.to_csv("submission.csv", index=False)