In [1]:
from matplotlib import pyplot as plt
import numpy as np
import math
import scipy.linalg as linalg
from pandas import crosstab

def safelog(x):
    return np.log(x + 1e-100)

In [2]:
data_set_images = np.genfromtxt("images.csv", delimiter = ",")
data_set_labels = np.genfromtxt("abels.csv", delimiter = "\n")

In [3]:
x_train = data_set_images[0:200,:]
y_train = data_set_labels[0:200].astype(int)

N = x_train.shape[0]
K = np.max(y_train)
D = x_train.shape[1]

In [4]:
x_test = data_set_images[200:,:]
y_test = data_set_labels[200:].astype(int)

In [5]:
means = np.array((np.mean(x_train[y_train == 1], axis = 0),
                  np.mean(x_train[y_train == 2], axis = 0))).T

In [6]:
deviations = np.array((np.std(x_train[y_train == 1], axis = 0),
                       np.std(x_train[y_train == 2], axis = 0))).T

In [7]:
priors = [np.mean(y_train == (c+1)) for c in range(K)]

In [8]:
def score_function(x):
    return [np.log(priors[c]) - (D/2)*np.log(2*math.pi) - np.sum(safelog(deviations[:,c]))  
            - np.sum(((x - means[:,c])**2) / (2*(deviations[:,c]**2))) for c in range(K)]

In [9]:
y_train_scores = [score_function(x) for x in x_train]
y_train_predictions = np.argmax(y_train_scores, axis = 1) + 1

In [10]:
print(crosstab(y_train, y_train_predictions, rownames=["y_train"], colnames=["y_hat"]))

y_hat     1    2
y_train         
1        18    2
2        24  156


In [11]:
y_test_scores = [score_function(x) for x in x_test]
y_test_predictions = np.argmax(y_test_scores, axis = 1) + 1

In [12]:
print(crosstab(y_test, y_test_predictions,  rownames=["y_test"], colnames=["y_hat"]))

y_hat    1    2
y_test         
1       15    5
2       19  161
