In [None]:
#importing libraries
import math
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split as tts
import numpy.linalg as la

In [None]:
#loading MNIST dataset and splitting it into training and testing sets

digits = datasets.load_digits()
X_train, X_test, Y_train, Y_test = tts(digits.data, digits.target, test_size = 0.1, random_state = 4)
x_mean = np.vstack(np.mean(X_train, axis = 0))

LDA classification

In [None]:
#Covariance function 
def covariance(x):
    ans = np.zeros((x.shape[1],x.shape[1]))
    n = x.shape[1]
    for i in range(n):
        for j in range(n):
            for k in range(x.shape[0]):
                ans[i][j] += (x[k][i] - x_mean[i])*(x[k][j] - x_mean[j])
            ans[i][j] /= (x.shape[0]-1)
    return ans

In [None]:
#finding class-wise elements to find the priory probability

def get_class_examples(k):
    ans = []
    for index,i in enumerate(X_train):
        if(Y_train[index] == k):
            ans.append(i)
    return np.matrix(ans)

class_wise_elements = []

for i in range(10):
    class_wise_elements.append(get_class_examples(i))

In [None]:
#here we will find logarithmic of the multivariate gaussian function(Strictly increasing)

cov = covariance(X_train)
prior_probability = []
first_term = []
second_term = []
class_wise_mean = []

for i in range(10):
    prior_probability.append((len(class_wise_elements[i]))/(len(X_train)))
    first_term.append(np.log(prior_probability[i]))
    second_term.append(-1*(np.log(((2*math.pi)**(5))*(np.sqrt(la.norm(cov))))))
    class_wise_mean.append(np.vstack(np.mean(get_class_examples(i),axis = 0)))

def third_term(x):
    ans = []
    for i in range(10):
        value = np.array(x - class_wise_mean[i][0])
        t3 = ((-1/2)*(np.dot(value,np.dot(la.pinv(cov),value.T))))[0][0]
        ans.append(t3)
    return ans

In [None]:
#making prediction
def predict(x):
    ans = np.add(first_term,np.add(second_term,third_term(x)))
    return np.argmax(ans)

In [None]:
#finding accuracy on test-set
count = 0
for index,i in enumerate(X_test):
    if( predict(i) == Y_test[index] ):
        count += 1
print(count/(len(X_test)))

0.9333333333333333


Classification using QDA

In [None]:
#finding covariance matrix(class-wise) 

def covariance_1(x,klass):
    ans = np.zeros((x.shape[1],x.shape[1]))
    n = x.shape[1]
    for i in range(n):
        for j in range(n):
            for k in range(x.shape[0]):
                ans[i][j] += (x[k,i] - class_wise_mean[klass][0,i])*(x[k,j] - class_wise_mean[klass][0,j])
            ans[i][j] /= (x.shape[0]-1)
    return ans

In [None]:
def get_class_examples_1(k):
    ans = []
    for index,i in enumerate(X_train):
        if(Y_train[index] == k):
            ans.append(i)
    return np.array(ans)

class_wise_elements_1 = []

for i in range(10):
    class_wise_elements_1.append(get_class_examples_1(i))

In [None]:
#finding individual covariance matrix for different classes

cov_1 = []
for i in range(10):
    cov_1.append(covariance_1(class_wise_elements[i],i))
    print(i,"done")


0 done
1 done
2 done
3 done
4 done
5 done
6 done
7 done
8 done
9 done


In [None]:
#finding log of second and third term of n-d Gaussian function(priory probability is same for LDA and QDA)

def second_term_1(x):
    ans = []
    for i in range(10):
        ans.append(-1*(np.log(((2*math.pi)**(5))*(np.sqrt(la.norm(cov_1[i]))))))
    return ans
    
def third_term_1(x):
    ans = []
    for i in range(10):
        value = np.array(x - class_wise_mean[i][0])
        t3 = ((-1/2)*(np.dot(value,np.dot(la.pinv(cov_1[i]),value.T))))[0][0]
        ans.append(t3)
    return ans

In [None]:
#prediction function

def predict_1(x):
    ans = np.add(first_term,np.add(second_term_1(x),third_term_1(x)))
    return np.argmax(ans)

In [None]:
#finding accuracy on test-set

count_1 = 0
for index,i in enumerate(X_test):
    if( predict_1(i) == Y_test[index] ):
        count_1 += 1
print(count_1/(len(X_test)))

0.9277777777777778


LDA on binary class (0 or 1)

In [None]:
#finding priory probabilities, average covariance matrix and log of multivariate Gaussian distribution function

n0 = len(class_wise_elements[0])
n1 = len(class_wise_elements[1])
priory_b = [n0/(n0+n1), n1/(n0+n1)]
class_wise_mean_b = [class_wise_mean[0], class_wise_mean[1]]
cov_b = (cov_1[0] + cov_1[1])/2
first_term_b = []


def third_term_b(x):
    ans = []
    for i in range(2):
        value = np.array(x - class_wise_mean_b[i][0])
        t3 = ((-1/2)*(np.dot(value,np.dot(la.pinv(cov_b),value.T))))[0][0]
        ans.append(t3)
    return ans

second_term_b = []

for i in range(2):
    first_term_b.append(np.log(priory_b[i]))
    second_term_b.append(-1*(np.log(((2*math.pi)**(5))*(np.sqrt(la.norm(cov_1[i]))))))



In [None]:
#prediction function

def predict_1(x):
    ans = np.add(first_term_b,np.add(second_term_b,third_term_b(x)))
    return np.argmax(ans)

In [None]:
#finding accuracy on test-set

count1_b = 0
count2_b = 0
for index,i in enumerate(X_test):
    if((Y_test[index] == 0) or (Y_test[index] == 1)):
        count1_b += 1
        if(predict_1(i) == Y_test[index]):
            count2_b += 1
print(count2_b/count1_b)

1.0
