In [1]:
import gzip
import pickle
from datetime import datetime
from typing import Tuple
import re
import numpy as np
from numpy import ndarray

In [2]:
def load_data() -> Tuple[Tuple[ndarray, ndarray], Tuple[ndarray, ndarray], Tuple[ndarray, ndarray]]:
    with gzip.open("mnist.pkl.gz", "rb") as fd:
        train_set, valid_set, test_set = pickle.load(fd, encoding="latin")
    return train_set, valid_set, test_set

load_data()

((array([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
  array([5, 0, 4, ..., 8, 4, 8], dtype=int64)),
 (array([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
  array([3, 8, 6, ..., 5, 6, 8], dtype=int64)),
 (array([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
  array([7, 2, 1, ..., 4, 5, 6], dtype=int64)))

In [3]:
def get_training_data() -> ndarray:
    return load_data()[0][0]


get_training_data()

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [4]:
def get_one_hot_training_labels() -> ndarray:
    one_hot_labels = np.full((50000, 10), -1)
    labels = load_data()[0][1]
    for i in range(labels.shape[0]):
        one_hot_labels[i, labels[i]] = 1
    return one_hot_labels


get_one_hot_training_labels()

array([[-1, -1, -1, ..., -1, -1, -1],
       [ 1, -1, -1, ..., -1, -1, -1],
       [-1, -1, -1, ..., -1, -1, -1],
       ...,
       [-1, -1, -1, ..., -1,  1, -1],
       [-1, -1, -1, ..., -1, -1, -1],
       [-1, -1, -1, ..., -1,  1, -1]])

In [5]:
def blur_image(image: ndarray) -> ndarray:
    """

    :param image: ndarray(28, 28)
    :return: box-blurred image
    """
    copy = np.array(image, copy=True)
    dx = [-1, -1, -1, 0, 1, 1, 1, 0]
    dy = [-1, 0, 1, 1, 1, 0, -1, -1]
    for i in range(copy.shape[0]):
        for j in range(copy.shape[1]):
            s = copy[i, j] * 8
            for ii, jj in zip(dx, dy):
                x = i + ii
                y = j + jj
                if 0 <= x < copy.shape[0] and 0 <= y < copy.shape[1]:
                    s += copy[x, y]
            s /= 16
            copy[i, j] = s
    return copy

In [6]:
def augment_data(data: ndarray, labels: ndarray) -> Tuple[ndarray, ndarray]:
    blurred_set = np.array([blur_image(image.reshape([28, 28])).reshape((784, )) for image in data])
    return np.concatenate((data, blurred_set), axis=0), np.concatenate((labels, labels), axis=0)

# augment_data(get_training_data(), get_one_hot_training_labels())

In [7]:
memo = None

def get_data_and_labels(augment: bool = False) -> Tuple[ndarray, ndarray]:
    global memo
    if memo is None:
        if augment:
            memo = augment_data(get_training_data(), get_one_hot_training_labels())
        memo = get_training_data(), get_one_hot_training_labels()
    return memo

get_data_and_labels()

(array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 array([[-1, -1, -1, ..., -1, -1, -1],
        [ 1, -1, -1, ..., -1, -1, -1],
        [-1, -1, -1, ..., -1, -1, -1],
        ...,
        [-1, -1, -1, ..., -1,  1, -1],
        [-1, -1, -1, ..., -1, -1, -1],
        [-1, -1, -1, ..., -1,  1, -1]]))

In [8]:
def get_weights(random: bool = False) -> ndarray:
    """

    :param random: if False, weights are zero, otherwise weights are random
    :return: ndarray(10, 785) where 10 is the number of perceptrons, 784 is
    the input size and 1 is the bias
    """
    if random:
        return np.random.rand(10, 785) - 0.5
    return np.zeros((10, 785), dtype="float64")


get_weights(True).shape

(10, 785)

In [9]:
def shuffle_data_and_labels(data: ndarray, labels: ndarray) -> Tuple[ndarray, ndarray]:
    """

    :return: data and labels shuffled in unison using numpy advanced indexing
    """
    assert len(data) == len(labels), "Data is not consistent with labels"
    p = np.random.permutation(len(data))
    return data[p], labels[p]

shuffle_data_and_labels(*get_data_and_labels())

(array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 array([[-1, -1, -1, ..., -1, -1, -1],
        [-1, -1, -1, ..., -1,  1, -1],
        [-1, -1, -1, ..., -1, -1, -1],
        ...,
        [-1, -1, -1, ..., -1, -1, -1],
        [-1, -1, -1, ..., -1, -1, -1],
        [-1, -1, -1, ..., -1, -1,  1]]))

In [10]:
def get_validation_data_and_labels() -> Tuple[ndarray, ndarray]:
    return load_data()[1]

get_validation_data_and_labels()

(array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 array([3, 8, 6, ..., 5, 6, 8], dtype=int64))

In [11]:
def test_prediction_on_set(weights:ndarray, get_set: callable) -> float:
    predictions, prediction_ok = 0, 0
    for image, label in zip(*get_set()):
        predictions += 1
        image = np.insert(image, 0, 1)
        if np.argmax(np.dot(weights, image)) == label:
            prediction_ok += 1
    return prediction_ok / predictions * 100

In [12]:
def validate(weights: ndarray) -> float:
    return test_prediction_on_set(weights, get_validation_data_and_labels)

In [13]:
def get_test_data_and_labels() -> Tuple[ndarray, ndarray]:
    return load_data()[2]

get_test_data_and_labels()

(array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 array([7, 2, 1, ..., 4, 5, 6], dtype=int64))

In [14]:
def test(weights: ndarray) -> float:
    return test_prediction_on_set(weights, get_test_data_and_labels)

In [15]:
memo = None
def train(iterations: int = 30, random_weights: bool = False, niu: float = 0.01, augment: bool = False,
          mini_batch_size: int = 10) -> ndarray:
    weights = get_weights(random_weights)
    for i in range(iterations):
        print(f"Iteration {i}: {round(validate(weights), 2)}% on validation")
        data, labels = shuffle_data_and_labels(*get_data_and_labels(augment))
        mini_batches = [
            (data[x:x + mini_batch_size], labels[x: x + mini_batch_size])
            for x in range(0, labels.shape[0], mini_batch_size)
        ]
        for images, labels in mini_batches:
            bias = np.ones((mini_batch_size, 1))
            images = np.c_[bias, images]
            # weights.T because weights should have been (785, 10), not (10, 785)
            z = np.where(np.dot(images, weights.T) >= 0, 1, -1)
            # last param must be same value as the default in one-hot-vectors. Using -1 gives better results
            diff = labels - z
            weights += np.dot(diff.T, images) * niu
        # Without mini batch
        # for image, label in zip(*shuffle_data_and_labels(*get_data_and_labels(augment))):
        #     image = np.insert(image, 0, 1)  # inserting bias on the first position
        #     z = np.where(np.dot(weights, image) >= 0, 1, -1)

        #     diff = np.expand_dims(label - z, axis=0)
        #     weights += (image * diff.T) * niu
    np.save(f"weights-{iterations}-{random_weights}-{niu}-{re.sub(r'[ :]', '_', str(datetime.now()))}", weights)
    return weights

trained_weights:ndarray = train(iterations=15, random_weights=True, niu=0.001, augment=False)

Iteration 0: 7.02% on validation
Iteration 1: 86.11% on validation
Iteration 2: 87.68% on validation
Iteration 3: 87.96% on validation
Iteration 4: 88.83% on validation
Iteration 5: 89.05% on validation
Iteration 6: 88.11% on validation
Iteration 7: 88.5% on validation
Iteration 8: 88.29% on validation
Iteration 9: 88.96% on validation
Iteration 10: 89.24% on validation
Iteration 11: 89.56% on validation
Iteration 12: 89.22% on validation
Iteration 13: 88.85% on validation
Iteration 14: 89.28% on validation


In [16]:
print(f"Accuracy on test set: {test(trained_weights)}%")

Accuracy on test set: 88.87%
