In [2]:
import time

import numpy as np
import matplotlib.pyplot as plt

In [3]:
def import_data(filename):
    return np.loadtxt(filename)

def plot_char(char):
    rep = char[1:257].reshape((16, 16))
    plt.imshow(rep, cmap='viridis', interpolation='none')
    plt.title(f'The number {int(char[0])}')
    plt.show()

In [4]:
def kernal(a, b):
    return (np.dot(a, b) ** 3)

def my_sign(x):
    return -1.0 if x <= 0.0 else 1.0

def clear_glb_cls(data):
    return np.zeros((3, len(data)))

def class_pred_k(dat, pat, cl):
    return np.sum([cl[i] * kernal(pat, dat[i, 1:257]) for i in range(len(cl))])

In [5]:
def train_gen(dat, glb_cls):
    mistakes = 0
    for i in range(len(dat)):
        val = dat[i, 0]
        preds = [class_pred_k(dat, dat[i, 1:257], glb_cls[j]) for j in range(3)]
        maxc = -1e16
        maxi = -1
        for j in range(3):
            y = 1.0 if val == j + 1 else -1.0
            if y * preds[j] <= 0:
                glb_cls[j, i] -= my_sign(preds[j])
            if preds[j] > maxc:
                maxc = preds[j]
                maxi = j + 1
        if maxi != val:
            mistakes += 1
    return mistakes

def test_classifiers(dat, test_dat, glb_cls):
    mistakes = 0
    for i in range(len(test_dat)):
        val = test_dat[i, 0]
        preds = [class_pred_k(dat, test_dat[i, 1:257], glb_cls[j]) for j in range(3)]
        maxc = -1e9
        maxi = -1
        for j in range(3):
            y = 1.0 if val == j + 1 else -1.0
            if preds[j] > maxc:
                maxc = preds[j]
                maxi = j + 1
        if maxi != val:
            mistakes += 1
    return mistakes / len(test_dat)

In [13]:
def demo(train, test):
    glb_cls = clear_glb_cls(train)
    for i in range(3):
        start_time = time.time()
        train_errors = train_gen(train, glb_cls)
        end_time = time.time()
        print(f"Training - epoch {i + 1} required {end_time - start_time:.6f} with {train_errors} mistakes out of {len(train)} items.")
        
        start_time = time.time()
        test_error = test_classifiers(train, test, glb_cls)
        end_time = time.time()
        print(f"Testing - epoch {i + 1} required {end_time - start_time:.6f} with a test error of {test_error * 100:.3f}%.")
    return glb_cls

In [14]:
dtrain123 = import_data("dtrain123.dat")
dtest123 = import_data("dtest123.dat")

glb_cls = demo(dtrain123, dtest123)

Training - epoch 1 required 2.440072 with 15 mistakes out of 329 items.
Testing - epoch 1 required 2.700847 with a test error of 3.509%.
Training - epoch 2 required 1.964080 with 3 mistakes out of 329 items.
Testing - epoch 2 required 2.759072 with a test error of 2.851%.
Training - epoch 3 required 2.049710 with 0 mistakes out of 329 items.
Testing - epoch 3 required 2.375020 with a test error of 2.632%.


In [15]:
np.sum(np.abs(glb_cls) > 1)

0