In [0]:
import numpy as np
import math
from scipy.stats import multivariate_normal
from keras import datasets
from sklearn.utils import shuffle
from mlxtend.data import loadlocal_mnist

In [0]:
def generate_semi_definite(dim):
    '''
    generate a semi definite matrix to be sure that the matrix is inversible
    '''
    tmp = np.random.rand(dim, dim)
    return np.dot(tmp.T, tmp)

In [0]:
def compute_cov(X, mean):
    '''
    Compute the covariance of the data
    '''
    n, dim = X.shape
    mean = np.reshape(mean, (dim, 1))
    res = np.zeros((dim, dim))
    for i in range(n):
        x = X[i]
        x = np.reshape(x, (dim, 1))
        res = np.add(res, np.dot((x - mean), (x - mean).T))
    tmp = res / n
    return np.dot(tmp.T, tmp)

In [0]:
class GaussianClassifier:
    def __init__(self, nb_gaussian, dim, cov_type='identity'):
        '''
        Instantiation of the gausian classifier
        nb_gaussian: number of gaussian to create
        cov_type: type of covariance matrix, can be: ['identity', 'same', 'unique']
        dim: dimension of the data
        'identity': all the gaussian have the identity covariance
        'same': all the gaussian have the same random 
        '''
        self.nb_gaussian = nb_gaussian
        self.cov_type = cov_type
        
        ## 
        self.means = np.zeros((nb_gaussian, dim))
        self.covs = np.zeros((nb_gaussian, dim, dim))
    
    def fit(self, X, y):
        '''
        Fit the Gaussian Classifier
        Compute the mean and the covariance for each gaussian
        X: datas
        y: labels
        '''
        n, dim = X.shape
        
        if (self.cov_type == 'same'): ## same covariance for all gaussian
            self.covs = generate_semi_definite(dim)
        
        for i in range(self.nb_gaussian):
            ## compute the mean for each gaussian
            datas = X[y == i] ## get the data that have the label i
            self.means[i] = np.mean(datas, axis=0)
            if (self.cov_type == 'identity'): ## identity covariance for all gaussian
                self.covs[i] = np.identity(dim)
            elif (self.cov_type == 'unique'):
                self.covs[i] = compute_cov(datas, self.means[i])
    
    def predict(self, X):
        '''
        Predict many data points
        X: vector of many data points
        '''
        res = np.zeros(X.shape[0])
        for i in range(X.shape[0]):
            pred = np.zeros(self.nb_gaussian) ## predictions for all gaussian
            for j in range(self.nb_gaussian):
                pred[j] = multivariate_normal.logpdf(X[i], self.means[j], self.covs[j])
            print("Prediction nb {}".format(i))
            res[i] = np.argmax(pred)
        return res

In [0]:
def measure_accuracy(predictions, y):
    '''
    Measure the accuracy of a model
    predicitons: vector of predictions (label prediction)
    y: vector of the real labels of the data
    '''
    n = predictions.shape[0]
    nb_good = 0
    for i in range(n):
        nb_good += (predictions[i] == y[i])
        print("prediction: {}   expected: {}".format(int(predictions[i]), y[i]))
    return nb_good / n

In [0]:
(X_train, y_train), (X_test, y_test) = datasets.mnist.load_data()

In [0]:
X_train = np.reshape(X_train, (60000, 784))
X_test = np.reshape(X_test, (10000, 784))

In [0]:
model_id = GaussianClassifier(nb_gaussian=10, dim=784, cov_type='identity')
model_same = GaussianClassifier(nb_gaussian=10, dim=784, cov_type='same')
model_unique = GaussianClassifier(nb_gaussian=10, dim=784, cov_type='unique')

In [0]:
## fit the 3 model
model_id.fit(X_train, y_train)
model_same.fit(X_train, y_train)
# model_unique.fit(X_train, y_train) ## too long to compute

In [0]:
test_data = X_test[:50]
test_labels = y_test[:50]

In [49]:
pred_id = model_id.predict(test_data)

Prediction nb 0
Prediction nb 1
Prediction nb 2
Prediction nb 3
Prediction nb 4
Prediction nb 5
Prediction nb 6
Prediction nb 7
Prediction nb 8
Prediction nb 9
Prediction nb 10
Prediction nb 11
Prediction nb 12
Prediction nb 13
Prediction nb 14
Prediction nb 15
Prediction nb 16
Prediction nb 17
Prediction nb 18
Prediction nb 19
Prediction nb 20
Prediction nb 21
Prediction nb 22
Prediction nb 23
Prediction nb 24
Prediction nb 25
Prediction nb 26
Prediction nb 27
Prediction nb 28
Prediction nb 29
Prediction nb 30
Prediction nb 31
Prediction nb 32
Prediction nb 33
Prediction nb 34
Prediction nb 35
Prediction nb 36
Prediction nb 37
Prediction nb 38
Prediction nb 39
Prediction nb 40
Prediction nb 41
Prediction nb 42
Prediction nb 43
Prediction nb 44
Prediction nb 45
Prediction nb 46
Prediction nb 47
Prediction nb 48
Prediction nb 49


In [50]:
pred_same = model_same.predict(test_data)

Prediction nb 0
Prediction nb 1
Prediction nb 2
Prediction nb 3
Prediction nb 4
Prediction nb 5
Prediction nb 6
Prediction nb 7
Prediction nb 8
Prediction nb 9
Prediction nb 10
Prediction nb 11
Prediction nb 12
Prediction nb 13
Prediction nb 14
Prediction nb 15
Prediction nb 16
Prediction nb 17
Prediction nb 18
Prediction nb 19
Prediction nb 20
Prediction nb 21
Prediction nb 22
Prediction nb 23
Prediction nb 24
Prediction nb 25
Prediction nb 26
Prediction nb 27
Prediction nb 28
Prediction nb 29
Prediction nb 30
Prediction nb 31
Prediction nb 32
Prediction nb 33
Prediction nb 34
Prediction nb 35
Prediction nb 36
Prediction nb 37
Prediction nb 38
Prediction nb 39
Prediction nb 40
Prediction nb 41
Prediction nb 42
Prediction nb 43
Prediction nb 44
Prediction nb 45
Prediction nb 46
Prediction nb 47
Prediction nb 48
Prediction nb 49


In [51]:
acc_id = measure_accuracy(pred_id, y_test)
print("Model (Cov=identity) accuracy: {}".format(acc_id))

prediction: 7   expected: 7
prediction: 2   expected: 2
prediction: 1   expected: 1
prediction: 0   expected: 0
prediction: 4   expected: 4
prediction: 1   expected: 1
prediction: 4   expected: 4
prediction: 9   expected: 9
prediction: 2   expected: 5
prediction: 9   expected: 9
prediction: 0   expected: 0
prediction: 2   expected: 6
prediction: 9   expected: 9
prediction: 0   expected: 0
prediction: 1   expected: 1
prediction: 5   expected: 5
prediction: 9   expected: 9
prediction: 7   expected: 7
prediction: 3   expected: 3
prediction: 4   expected: 4
prediction: 7   expected: 9
prediction: 6   expected: 6
prediction: 4   expected: 6
prediction: 5   expected: 5
prediction: 4   expected: 4
prediction: 0   expected: 0
prediction: 7   expected: 7
prediction: 4   expected: 4
prediction: 0   expected: 0
prediction: 1   expected: 1
prediction: 3   expected: 3
prediction: 1   expected: 1
prediction: 3   expected: 3
prediction: 6   expected: 4
prediction: 7   expected: 7
prediction: 2   expe

In [52]:
acc_same = measure_accuracy(pred_same, y_test)
print("Model (Cov=same) accuracy: {}".format(acc))

prediction: 7   expected: 7
prediction: 3   expected: 2
prediction: 1   expected: 1
prediction: 0   expected: 0
prediction: 4   expected: 4
prediction: 1   expected: 1
prediction: 4   expected: 4
prediction: 9   expected: 9
prediction: 2   expected: 5
prediction: 9   expected: 9
prediction: 0   expected: 0
prediction: 2   expected: 6
prediction: 9   expected: 9
prediction: 0   expected: 0
prediction: 1   expected: 1
prediction: 3   expected: 5
prediction: 9   expected: 9
prediction: 7   expected: 7
prediction: 3   expected: 3
prediction: 4   expected: 4
prediction: 7   expected: 9
prediction: 6   expected: 6
prediction: 4   expected: 6
prediction: 5   expected: 5
prediction: 4   expected: 4
prediction: 0   expected: 0
prediction: 7   expected: 7
prediction: 4   expected: 4
prediction: 0   expected: 0
prediction: 1   expected: 1
prediction: 3   expected: 3
prediction: 1   expected: 1
prediction: 3   expected: 3
prediction: 6   expected: 4
prediction: 7   expected: 7
prediction: 2   expe