In [5]:
# import packages
import pandas as pd
import numpy as np
from tqdm import tqdm
import os

from scipy.special import softmax as sf
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

PATH = os.getcwd()
DATAPATH = os.path.join(PATH, "data")
filenames = {
    "X_test": "test_in - Copy.csv",
    "X_train": "train_in - Copy.csv",
    "y_test": "test_out - Copy.csv",
    "y_train": "train_out - Copy.csv",
}


# Import all data files
X_train = pd.read_csv(os.path.join(DATAPATH, filenames["X_train"]), header=None)
y_train = pd.read_csv(
    os.path.join(DATAPATH, filenames["y_train"]), header=None, names=["digit"]
)
expected_train = pd.get_dummies(y_train.digit).to_numpy()

X_test = pd.read_csv(os.path.join(DATAPATH, filenames["X_test"]), header=None)
y_test = pd.read_csv(
    os.path.join(DATAPATH, filenames["y_test"]), header=None, names=["digit"]
)

In [6]:
# We use scipy's softmax, due to issues with NaN's
def softmax(x):
    return sf(x, axis=1)


def random_weights_gauss(shape, sigma):
    return np.random.normal(0, sigma, shape)


def append_one(X, axis=0):
    shape = (X.shape[axis], 1) if axis == 1 else (1, X.shape[axis])
    return np.append(X.T, np.ones(shape=shape), axis=axis)


def classify(weights, X=X_test, actf=softmax):
    y_out = actf(np.dot(append_one(X).T, weights))
    return y_out.argmax(axis=1)


def calc_error(weights, X=X_test, y=y_test, actf=softmax):
    output = actf(np.dot(append_one(X).T, weights))
    rows = list(np.arange(output.shape[0]))
    return -np.log10(output)[rows, y.digit].sum()


def calc_gradient(weights, X=X_train, expected=expected_train, actf=softmax):
    s = actf(np.dot(append_one(np.array(X)).T, weights))
    y = expected
    return np.dot(append_one(np.array(X)), (s - y))


def train_perceptron(
    n_loops=1000, eta=0.01, X=X_train, y=y_train, small_batch=False, batch_size=10
):
    weights = random_weights_gauss((257, 10), np.sqrt(2 / (256 + 10)))
    expected = pd.get_dummies(y.digit).to_numpy()

    for epoch in tqdm(range(n_loops)):
        if small_batch:
            # only use a small batch of the input and expected
            idxs = np.random.choice(X.shape[0], batch_size)
            weights -= eta * calc_gradient(weights, X.iloc[idxs], expected[idxs])
        else:
            weights -= eta * calc_gradient(weights, X, expected)

    err = calc_error(weights)
    acc_train = sum(classify(weights, X_train) == y_train.digit) / len(y_train.digit)
    acc_test = sum(classify(weights, X_test) == y_test.digit) / len(y_test.digit)
    print(f"Final error: {err}")
    print(
        f"Final accuraccy:\n\tOn training data: {acc_train:.3f}\n\tOn testing data: {acc_test:.3f}"
    )
    return weights, acc_test, err

small_batch = False
accs = []
errs = []
n = 10
for i in range(n):
    weights,acc_test,err = train_perceptron(n_loops=1000, small_batch=small_batch, batch_size=100)
    accs.append(acc_test)
    errs.append(err)
print(f"Mean accuracy on test set after {n} runs is {np.mean(accs):.3f}")
print(f"Mean error on test set after {n} runs is {np.mean(errs):.3f}")


100%|██████████| 1000/1000 [00:08<00:00, 122.56it/s]


Final error: 2379.494837290569
Final accuraccy:
	On training data: 1.000
	On testing data: 0.867


100%|██████████| 1000/1000 [00:08<00:00, 117.97it/s]


Final error: 2380.3194221282038
Final accuraccy:
	On training data: 1.000
	On testing data: 0.873


 12%|█▏        | 121/1000 [00:01<00:07, 112.15it/s]


KeyboardInterrupt: 