# LDA Classification

<font color='blue'>Step 15</font>

In [2]:
# Mount Google drive and cd to working folder
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
%cd /content/drive/My Drive/Learning/EIE4105/lab2/python/

In [3]:
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from scipy.stats import multivariate_normal as mvn

In [4]:
# loading the data
from mnist import load_mnist
trainpath = '../../lab1/data/noisy_train_digits.mat'
testpath = '../../lab1/data/noisy_test_digits.mat'
train_data, train_labels, test_data, test_labels= load_mnist(trainpath,testpath)
print(train_data.shape)

(60000, 784)


In [21]:
# Load 1000 training samples per class
from mnist import load_SampleMnist
nSamples = 1000
train_data, train_labels, test_data, test_labels = load_SampleMnist(trainpath,testpath,nSamples)
print(train_data.shape)

(10000, 784)


In [22]:
# Train an LDA model
lda = LinearDiscriminantAnalysis()
lda.fit(train_data, train_labels)

LinearDiscriminantAnalysis()

In [23]:
# Transform the training and test data
X_trn = lda.transform(train_data)
X_tst = lda.transform(test_data)
print(X_trn.shape)
print(X_tst.shape)

(10000, 9)
(10000, 9)


In [24]:
class Gauss_class_diag():
    """
    This class implements a Gaussian classifier with diagonal covariance matrices 
    """
    def fit(self, X, y, epsilon = 0.5e-1): 
        self.stats = dict()
        self.priors = dict()
        self.labels = set(y.astype(int))    # Unique class labels: 0,1,2,...,K-1 

        for k in self.labels:    
            X_k = X[y==k,:]     # Select data from the k-th class
            self.stats[k] = {"mean":X_k.mean(axis=0), "cov":X_k.var(axis=0) + epsilon }
            self.priors[k]=len(X_k)/len(X)

    def predict(self, X):
        N, D = X.shape
        P_hat = np.zeros((N,len(self.labels)))
        for k, s in self.stats.items():
            P_hat[:,k] = mvn.logpdf(X, s["mean"], s["cov"]) + np.log(self.priors[k]) 
  
        return P_hat.argmax(axis=1)

In [25]:
class Gauss_class_full():
    """
    This class implements a Gaussian classifier with full covariance matrices
    """
    def fit(self, X,y, epsilon=0.5e-1):
        self.stats = dict()
        self.priors = dict()
        self.labels = set(y.astype(int))
        
        for k in self.labels:
            X_k = X[y==k,:]
            N_k,D = X_k.shape   # N_k=total number of observations of that class
            mu_k = X_k.mean(axis=0)
            self.stats[k] = {"mean":X_k.mean(axis=0), 
                              "cov": (1/(N_k-1))*np.matmul((X_k-mu_k).T, X_k-mu_k) + 
                             epsilon*np.identity(D)}
            self.priors[k] = len(X_k)/len(X)
    
    
    def predict(self, X):
        N,D = X.shape
        P_hat = np.zeros((N,len(self.labels)))
        
        for k,s in self.stats.items():
            P_hat[:,k] = mvn.logpdf(X, s["mean"], s["cov"]) + np.log(self.priors[k])
        
        return P_hat.argmax(axis=1)

In [26]:
def accuracy(y, y_hat):
    return np.mean(y==y_hat)

In [27]:
gcf = Gauss_class_full()
gcf.fit(X_trn, train_labels)
tst_pred = gcf.predict(X_tst)
acc = accuracy(test_labels, tst_pred)
print(acc)

0.8981
