In [16]:
import os.path
import sys
import tqdm # task bar
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import idx2numpy as inp
import scipy.sparse.linalg
import scipy.spatial.distance as dist

# Loads Database

In [8]:
# also vectorises each 28 x 28 pixel image to a 784-element vector
TrImgs = np.array([img.flatten() for img in inp.convert_from_file('train-images.idx3-ubyte')])
TrLbls = inp.convert_from_file('train-labels.idx1-ubyte')
TsImgs = np.array([img.flatten() for img in inp.convert_from_file('t10k-images.idx3-ubyte')])
TsLbls = inp.convert_from_file('t10k-labels.idx1-ubyte')
# calculate TRAINING IMGS mean for use in rest of program
TrImgsMean = (np.mean(TrImgs,axis=0))

## Simple Tools

In [12]:
show = lambda img: plt.imshow(img.reshape(28,28)) # show vectorised alphabet

# Class Means & Between-Class Scatter

In [187]:
def ClassMeans(Imgs,Lbls):
    """Returns rows of class-means"""
    class_means = []
    classes = np.unique(Lbls) # get unique labels, sorted
    for c in classes:
        sys.stdout.write("\rDoing class %d" % c); sys.stdout.flush()
        class_imgs = Imgs[Lbls==c] # get set of class images
        class_means.append(np.mean(class_imgs,axis=0)) 
    print("\n")
    return np.array(class_means).astype('float16')

In [182]:
# Test Area!
v = np.array([[1,0],[2,3]]).T
print v
print np.dot(v,v.T)

[[1 2]
 [0 3]]
[[5 6]
 [6 9]]


In [202]:
def Sb(Imgs,Lbls):
    """Between-Class scatter matrix"""
    Mean = np.mean(Imgs,axis=0)
    Means = ClassMeans(Imgs,Lbls)
    N = len(Lbls)
    
    classes = np.unique(Lbls)
    C = len(classes)
    class_nums = np.array([np.sum(Lbls==c) for c in classes]) # compute class size
    class_frac = class_nums*1./N # class fraction
    
    Sb = np.zeros(shape=(784,784))
    
    for c in classes:
        vec = np.array([Means[c]-Mean]).T # centred class mean expressed as a column vector
        Sb += class_frac[c]*np.dot(vec,vec.T) # column * row = matrix !
    return Sb

In [201]:
TrSb = Sb(TrImgs,TrLbls)

Doing class 9



# Class Covariances & Within-Class Scatter

In [218]:
def ClassCovars(Imgs,Lbls):
    """Returns rows of class-covars"""
    classes = np.unique(Lbls)
    C = len(classes)
    
    Covs = np.zeros(shape=(C,784,784))
    
    for c in classes:
#         sys.stdout.write("\rDoing class %d" % c); sys.stdout.flush()
        class_imgs = Imgs[Lbls==c] # get set of class images
        class_mean = np.mean(class_imgs,axis=0)
        class_covar = np.zeros(shape=(784,784))
        n = len(class_imgs)
        for i, img in enumerate(class_imgs):
            sys.stdout.write("\rDoing class {} img {} of {}".format(c,i,n)); sys.stdout.flush()
            vec = np.array([img-class_mean]).T
            Covs[c]+=np.dot(vec,vec.T)
        Covs[c] = Covs[c]/n # normalise to number of images in class
        print("\n")
    print("\n")
    return np.array(Covs)

In [None]:
TrCovars = ClassCovars(TrImgs,TrLbls)

Doing class 0 img 4673 of 5923

In [199]:
a[0]

array([[ 0.,  0.],
       [ 0.,  0.]])