In [31]:
import numpy as np
from torchvision.datasets import MNIST


def download_mnist(is_train: bool):
    dataset = MNIST(
        root="./data",
        transform=lambda x: np.array(x).flatten(),
        download=True,
        train=is_train,
    )

    mnist_data = []
    mnist_labels = []

    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append([int(i == label) for i in range(10)])

    return mnist_data, mnist_labels


train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

# normalize data
train_X = np.divide(np.array(train_X, dtype=np.float128), 255)
test_X = np.divide(np.array(test_X, dtype=np.float128), 255)

trainset_size = len(train_X)

In [40]:
learn_rate = 0.001
epochs = 50
running = True

batch_size = 100
image_size = 784
output_size = 10

np.set_printoptions(threshold=np.inf)


def softmax(weighted_sum):
    powed = np.power(np.e, weighted_sum, dtype=np.float128)
    return np.divide(powed, powed.sum())


def cross_entropy(prob, target):
    return np.negative(target).dot(np.log2(prob, dtype=np.float128))


# def train(trainset, labels, weights, bias):
#     # iterative version
#     global learn_rate

#     w = np.zeros((image_size, output_size), dtype=np.float128)
#     b = np.zeros((output_size), dtype=np.float128)

#     for input, label in zip(trainset, labels):
#         classification = softmax(input.dot(weights) + bias)
#         error = label - classification

#         w += learn_rate * np.atleast_2d(input).T.dot(np.atleast_2d(error))
#         b += learn_rate * error

#     return w, b


def train(trainset, labels, weights, bias):
    # math version
    global learn_rate

    weighted_sums = trainset.dot(weights) + bias
    classification = np.array([softmax(weighted_sum) for weighted_sum in weighted_sums])
    error = learn_rate * (labels - classification)

    return trainset.T.dot(error), sum(error)


w = np.zeros((image_size, output_size), dtype=np.float128)
b = np.zeros((output_size), dtype=np.float128)

while epochs > 0:
    # shuffle data
    train_zipped = list(zip(train_X, train_Y))
    np.random.shuffle(train_zipped)
    train_X, train_Y = list(zip(*train_zipped))

    for trainset, labels in zip(
        np.array_split(train_X, trainset_size // batch_size),
        np.array_split(train_Y, trainset_size // batch_size),
    ):
        dw, db = train(trainset, labels, w, b)

        w += dw
        b += db

    epochs -= 1

In [41]:
def softmax(weighted_sum):
    powed = np.power(np.e, weighted_sum, dtype=np.float128)
    return np.divide(powed, powed.sum())

def test(testset, labels, weights, bias):
    correct = 0

    for input, label in zip(testset, labels):
        classification = softmax(input.dot(weights) + bias)

        if label[classification.argmax()] == 1:
            correct += 1

    print(f"Got {correct} / {len(testset)} correct answers")
    return correct / len(testset)


print(test(test_X, test_Y, w, b))

Got 9247 / 10000 correct answers
0.9247
