In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

from functools import lru_cache

In [3]:
PATH = os.getcwd()
DATAPATH = os.path.join(PATH, "data")
filenames = {
    "X_test": "test_in - Copy.csv",
    "X_train": "train_in - Copy.csv",
    "y_test": "test_out - Copy.csv",
    "y_train": "train_out - Copy.csv",
}


# Import all data files
X_train = pd.read_csv(os.path.join(DATAPATH, filenames["X_train"]), header=None)
y_train = pd.read_csv(
    os.path.join(DATAPATH, filenames["y_train"]), header=None, names=["digit"]
)
X_test = pd.read_csv(os.path.join(DATAPATH, filenames["X_test"]), header=None)
y_test = pd.read_csv(
    os.path.join(DATAPATH, filenames["y_test"]), header=None, names=["digit"]
)

In [11]:
def actf(x):
    return np.tanh(x)

def dactf(x):
    return np.cosh(x)**-2

def softmax(x): 
    return np.exp(x)/(np.exp(x)).sum(axis=0)

# def dsoftmax(x): return x

@lru_cache()
def sigmoid(x):
    return (1 + np.exp(-x))**-1

@lru_cache()
def dsigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))

def random_weights(shape, bounds=[-1, 1]):
    return pd.DataFrame(np.random.uniform(*bounds, size=shape))


In [26]:
expected = np.zeros((len(X_train), 10))
for i, digit in enumerate(y_train.digit):
    expected[i, digit] = 1

def append_one(X):
    return pd.concat([X, pd.DataFrame(np.ones(X.shape[0]), columns=[256])], axis=1)

def grd_desc(inputs: pd.DataFrame, expected: pd.DataFrame, weights: pd.DataFrame):
    gradient = pd.DataFrame(np.zeros(weights.shape))
    w_x = append_one(inputs) @ weights
    gradient += 2 * (w_x.applymap(sigmoid) - expected) * w_x.applymap(dsigmoid) * append_one(X_train)
    return gradient

def classify(weights, X=X_test):
    y_out = (append_one(X) @ weights).applymap(sigmoid)
    return y_out.apply(np.argmax, axis=1)

def mse(weights, X, expected):
    return (((append_one(X) @ weights).applymap(sigmoid) - expected)**2).sum(axis=1).sum()

In [30]:
def train_perceptron(eta = 0.01, n_loops=1000, 
                     X: pd.DataFrame = X_train, y: pd.DataFrame= y_train,
                     show: bool = True):
    expected = np.zeros((len(X), 10))
    for i, digit in enumerate(y.digit):
        expected[i, digit] = 1

    weights = random_weights((X.shape[1] + 1, 10))
    for epoch in range(n_loops):
        weights -= eta*grd_desc(X, expected, weights)
        if show and epoch % (n_loops//10) == 0:
            print(f"Epoch {epoch:4}, mse: {mse(weights, X, expected):.3f}")

    return weights

weights = train_perceptron(eta=0.1, n_loops=100)

y_out = classify(weights, X_train)
acc = sum(y_out.eq(y_train.digit))/len(y_out)
print(f"accuraccy on train data: {acc:.3f}")

y_out = classify(weights, X_test)
acc = sum(y_out.eq(y_test.digit))/len(y_out)
print(f"accuraccy on test data: {acc:.3f}")

Epoch    0, mse: 8639.207
Epoch   10, mse: 6685.533
Epoch   20, mse: 5567.197
Epoch   30, mse: 4675.576
Epoch   40, mse: 4383.138
Epoch   50, mse: 4153.446
Epoch   60, mse: 3912.412
Epoch   70, mse: 3707.518
Epoch   80, mse: 3516.507
Epoch   90, mse: 3308.314
accuraccy on train data: 0.063
accuraccy on test data: 0.083
