In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

from tqdm import tqdm
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay


In [2]:
PATH = os.getcwd()
DATAPATH = os.path.join(PATH, "data")
filenames = {
    "X_test": "test_in - Copy.csv",
    "X_train": "train_in - Copy.csv",
    "y_test": "test_out - Copy.csv",
    "y_train": "train_out - Copy.csv",
}


# Import all data files
X_train = pd.read_csv(os.path.join(DATAPATH, filenames["X_train"]), header=None)
y_train = pd.read_csv(
    os.path.join(DATAPATH, filenames["y_train"]), header=None, names=["digit"]
)
X_test = pd.read_csv(os.path.join(DATAPATH, filenames["X_test"]), header=None)
y_test = pd.read_csv(
    os.path.join(DATAPATH, filenames["y_test"]), header=None, names=["digit"]
)

In [42]:
from scipy.special import softmax as sf
def softmax(x): return sf(x, axis=1)


def random_weights_gauss(shape, sigma):
    return np.random.normal(0, sigma, shape)

def random_weights(shape, bounds=[-1, 1]):
    return np.random.uniform(*bounds, size=shape)


In [67]:
def append_one(X, axis=0):
    shape = (X.shape[axis], 1) if axis==1 else (1, X.shape[axis])
    return np.append(X.T, np.ones(shape=shape), axis=axis)

def classify(weights, X=X_test, actf=softmax):
    y_out = actf(np.dot(append_one(X).T, weights))
    return y_out.argmax(axis=1)

def calc_error(weights, X=X_test, y=y_test, actf=softmax):
    output = actf(np.dot(append_one(X).T, weights))
    rows = list(np.arange(output.shape[0]))
    return -np.log10(output)[rows, y.digit].sum()

expected = pd.get_dummies(y_train.digit).to_numpy()
def calc_gradient(weights, X=X_train, expected=expected, actf=softmax):
    s = actf(np.dot(append_one(np.array(X)).T, weights))
    y = expected
    return np.dot(append_one(np.array(X)), (s-y))

def train_perceptron(n_loops, eta):
    weights = random_weights_gauss((257, 10), np.sqrt(2/(256+10)))
    expected = pd.get_dummies(y_train.digit).to_numpy()

    for epoch in tqdm(range(n_loops)):
        weights -= eta * calc_gradient(weights, X_train, expected)

    acc = y_test.digit.eq(classify(weights)).sum()/len(y_test.digit)
    print(f"Final error: {calc_error(weights)}")
    print(f"Final accuraccy: {acc:.3f}")
    return weights

weights = train_perceptron(1000, 0.001)

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:15<00:00, 63.82it/s]

Final error: 304.41567826850525
Final accuraccy: 0.890



