# LDA Classifier for MNIST data

## Preliminaries

### Imports

In [1]:
import os
import pickle 

import numpy as np
import matplotlib.pyplot as plt

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis,QuadraticDiscriminantAnalysis

%matplotlib inline
import gzip

import sys
sys.path.append("../..")
from E4525_ML import mnist
from E4525_ML import plots

### Data

In [2]:
data_dir=r"../../raw/MNIST/"

In [3]:
model_dir=f"../../data/models"

In [4]:
    images_filename=data_dir+"train-images-idx3-ubyte.gz"
    labels_filename=data_dir+"train-labels-idx1-ubyte.gz"

    test_images_filename=data_dir+"t10k-images-idx3-ubyte.gz"
    test_labels_filename=data_dir+"t10k-labels-idx1-ubyte.gz"

    images=mnist.read_images(images_filename)
    labels=mnist.read_labels(labels_filename)
    
    test_images=mnist.read_images(test_images_filename)
    test_labels=mnist.read_labels(test_labels_filename)
    
    print(images.shape,labels.shape)

(60000, 28, 28) (60000,)


In [5]:
nimages=images.shape[0]
nrows=images.shape[1]
ncols=images.shape[2]
print(nimages,nrows,ncols)

60000 28 28


## LDA Classifier

In [6]:
K=10
D=28*28
print("N",nimages)
print("K",K)
print("D",D)

N 60000
K 10
D 784


The number of parameters to fit is
1. $\pi$: $K-1$
2. $\mu$: $K\times D$
3. $\Lambda$: $D\times \frac{D+1}{2}$


In [7]:
print("params {0:,}".format(K-1+K*D+D*(D+1)//2))

params 315,569


In [8]:
model=LinearDiscriminantAnalysis()

In [9]:
model.fit(images.reshape(nimages,-1),labels)
Y_pred=model.predict(images.reshape(nimages,-1))
"Training accuracy",np.mean(Y_pred==labels)



('Training accuracy', 0.8714833333333334)

In [10]:
Y_pred=model.predict(test_images.reshape((len(test_images),-1)))
"Test Accuracy",np.mean(Y_pred==test_labels)


('Test Accuracy', 0.873)

## QDA Classifier

The number of parameters to fit is
1. $\pi$: $K-1$
2. $\mu$: $K\times D$
3. $\Lambda$: $K\times D\times \frac{D+1}{2}$

In [11]:
print("params {0:,}".format(K-1+K*D+K*D*(D+1)//2))

params 3,085,049


In [12]:
model=QuadraticDiscriminantAnalysis()

In [13]:
model.fit(images.reshape(nimages,-1),labels)
Y_pred=model.predict(images.reshape(nimages,-1))
"Training accuracy",np.mean(Y_pred==labels)



('Training accuracy', 0.57045)

In [14]:
Y_pred=model.predict(test_images.reshape((len(test_images),-1)))
"Test Accuracy",np.mean(Y_pred==test_labels)


('Test Accuracy', 0.5395)

### Save Model

In [15]:
model_filename=model_dir+"/MNIST_LDA.p"
file=open(model_filename,"wb")
pickle.dump(model,file)