In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def get_x(folder_name, number_images):
    x_input_points = np.zeros((0, 784))
    for i in range(1, number_images + 1): # +1 Since it's exclusive
        img_path = '{}/{}.jpg'.format(folder_name, i)
        x_input_points = np.append(x_input_points, plt.imread(img_path).reshape(1, 784), axis=0)

    return x_input_points / 255

In [None]:
def get_t(labels_path):
    with open(labels_path) as f:
        training_labels = [int(x) for x in f.read().splitlines()]
    return np.array(training_labels)

In [None]:
def get_confusion_matrix(true_labels, predictions):    
    conf_matrix = np.zeros((10, 10))
    for i, predicted_class in enumerate(predictions):
        conf_matrix[true_labels[i]][predicted_class] += 1

    return conf_matrix

In [None]:
def gaussian(x, mean, variance):
    denominator = (2 * np.pi * variance) ** 0.5
    exponential = -1 * ((x-mean)**2 / (2*variance))
    return (1/denominator) * np.exp(exponential)

In [None]:
# Loading in a separate cell to avoid multiple loads.
x_train = get_x('Train', 2400)

In [None]:
# Calculate the mean&variance per class for each feature.
means = np.zeros((10, 784))
variances = np.zeros((10, 784))

classes_inputs = np.split(x_train, 10)

for i, c in enumerate(classes_inputs):
    means[i] = np.mean(c, axis=0)
    
for i, c in enumerate(classes_inputs):
    variances[i] = np.var(c, axis=0)
    variances[i][variances[i] < 0.01] = 0.01

In [None]:
# Load test data (images&labels).
x_test = get_x('Test', 200)
test_true_labels = get_t('Test/Test Labels.txt')

In [None]:
# Calculate the probability of each class per image which the mulitplication
# of the probabilty if each feature belonging to that class (Independence assumed)
probabilities = np.ones((200, 10))
for i in range(200): # Image "i"
    for c in range(10): # Class "c"
        for f in range(784): # Feature "f"
            probabilities[i][c] *= gaussian(x_test[i][f], means[c][f], variances[c][f])

In [None]:
predictions = np.argmax(probabilities, axis=1)
confusion_matrix = get_confusion_matrix(test_true_labels, predictions)
plt.imshow(confusion_matrix)
plt.savefig('Confusion-Gauss.jpg')