In [2]:
%pip install numpy
%pip install torchvision

Collecting numpy
  Downloading numpy-2.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Downloading numpy-2.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m:01[0m01[0m
[?25hInstalling collected packages: numpy
Successfully installed numpy-2.1.2
Note: you may need to restart the kernel to use updated packages.
Collecting torchvision
  Downloading torchvision-0.20.0-cp312-cp312-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting torch==2.5.0 (from torchvision)
  Downloading torch-2.5.0-cp312-cp312-manylinux1_x86_64.whl.metadata (28 kB)
Collecting pillow!=8.3.*,>=5.3.0 (from torchvision)
  Downloading pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (9.1 kB)
Collecting filelock (from torch==2.5.0->torchvision)
  Downloading filelock-3.16.1-py3-none-any.whl.metadata (2.9

In [1]:
import numpy as np
from torchvision.datasets import MNIST

In [2]:
def download_mnist(is_train: bool):
    dataset = MNIST(root='./data', transform=lambda x: np.array(x).flatten(),
                    download=True,
                    train=is_train)

    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)

    return mnist_data, mnist_labels

In [3]:
train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

In [4]:
# Get the train set
training_set = np.array(train_X)

# Transform the labels to one-hot-encoding
np_arr_train_y = np.array(train_Y)
dim_labels = len(np_arr_train_y)
labels = np.zeros((dim_labels, np_arr_train_y.max() + 1))
labels[np.arange(np_arr_train_y.size), np_arr_train_y] = 1

In [76]:
def train(training_set, labels, weights, beta):
    weights_counter = weights.shape[0]
    a = 0.05
    d = np.random.randn(weights_counter, 10)
    b = np.random.randn(10)
    z = np.zeros(10)
    
    for (sample, label) in zip(training_set, labels):
        # for i in range(10):
        #     z[i] = weights[:, i] @ sample + beta[i]

        z = weights.transpose() @ sample + beta

        # Normalize data to make the operations, because otherwise the results are too big
        max_z = z.max()
        tmp = np.exp(z - max_z).sum()
        y = np.exp(z - max_z) / tmp

        # for i in range(10):
            # d[:,i] = d[:,i] + (label[i] - y[i]) * sample * a
            # b[i] = b[i] + a * (label[i] - y[i])
        
        d = d + np.array([sample]).transpose() @ np.array([label - y]) * a
        b = b + a * (label - y)

    return d, b

In [77]:
weights = np.random.randn(784, 10)
beta = np.random.randn(10)

batches = np.split(training_set, len(training_set) / 100)
batches_labels = np.split(labels, len(labels) / 100)

def create_batches(training_set, labels, batch_size=100):
    batches = []
    labels_batches = []
    
    for i in range(0, len(training_set), batch_size):
        batches.append(training_set[i:i + batch_size])
        labels_batches.append(labels[i:i + batch_size])
        
    return batches, labels_batches

In [78]:
from concurrent.futures import ProcessPoolExecutor

for epoch in range(60):

    p = np.random.permutation(len(training_set))
    training_set, labels = training_set[p], labels[p]
    batches, labels_batches = create_batches(training_set, labels)
    
    with ProcessPoolExecutor() as executor:
        futures = {
            executor.submit(train, batch, labels_batches[i], weights, beta): i
            for i, batch in enumerate(batches)
        }
        
        d_list = [np.zeros_like(weights) for _ in batches]
        b_list = [np.zeros(10) for _ in batches]
        
        for future in futures:
            idx = futures[future]
            d, b = future.result()
            d_list[idx] += d
            b_list[idx] += b

    weights += sum(d_list)
    beta += sum(b_list)

In [70]:
# Test set
test_set = np.array(test_X)
# Transform the labels to one-hot-encoding
np_arr_test_y = np.array(test_Y)
dim_labels_test = len(np_arr_test_y)
labels_test = np.zeros((dim_labels_test, np_arr_test_y.max() + 1))
labels_test[np.arange(np_arr_test_y.size), np_arr_test_y] = 1


In [79]:
# Test the model

def calculate_accuracy():
    correct_results = 0
    
    for (sample, label) in zip(test_set, labels_test):
        z = weights.transpose() @ sample + beta
        max_z = z.max()
        tmp = np.exp(z - max_z).sum()
        y = np.exp(z - max_z) / tmp
    
        test_value = (y.max() == y).astype(float)
        if (label == test_value).all():
            correct_results += 1

    return (correct_results / len(test_set)) * 100

In [80]:
print(f"Accuracy is: {calculate_accuracy()}%")

Accuracy is: 90.12%
