# Adaptative Media Processing HW2

## __Methods__
A. Optimal Linear Associative Memory (OLAM). <br>
B. Fisher’s linear discriminator (FLD)

## __Dataset__
Fisher’s iris data from the Machine Learning Repository at UC Irvine <br>
http://archive.ics.uci.edu/ml/datasets/Iris 

## __Experiment__
1) The data of each class were divided in half.
2) Half of it were used to train each classifier and the other half to test them.
3) The accuracy of the testing was collected
4) Step 1-3 were repeated 100 times for a t-test with power = 1 (delta = 0.05)

## __Result__
method: mean (standard deviation)<br>
olam: 0.803 (0.036) <br>
fld:  0.946 (0.023)

__T-test__
H_0 : mean_fld <= mean_olam
H_1 : mean_fld >  mean_olam

p-value < 2.2e-16

## __Conclusion__
There is a significant difference in the accuracy, suggesting that Fisher's classifier is better than OLAM on this dataset.


In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

%matplotlib inline

In [2]:
def make_training_set(classes, sizes):
    train = np.array([], dtype=bool)
    for i, c in enumerate(classes):
        mask = np.array([True]*int(sizes[i]/2) + [False]*int(sizes[i]/2))
        np.random.shuffle(mask)
        train = np.append(train, mask)
        
    return train

In [3]:
class OLAM(object):
    def __init__(self):
        self._positive = 0
        self._total    = 0
        self.accuracy  = 0
        self.M = np.array([])
        
    def train(self, X, Y):
        X_ = np.linalg.pinv(np.transpose(np.matrix(X)))
        Y_ = np.transpose(np.matrix(Y))
        self.M = Y_ * X_
        
    def classify(self, X):
        Y_ = self.M * np.transpose(np.array([X])) 
        return np.argmax(Y_)
    
    def test(self, X, Y):
        self._total = len(X)
        for x, y in zip(np.array(X),np.array(Y)):
            y_ = self.classify(x)
            if(y_ == np.argmax(y)):
                self._positive += 1
                
        self.accuracy = self._positive/self._total
        return self.accuracy

In [4]:
class F_LD(object):
    def __init__(self):
        self._positive = 0
        self._total    = 0
        self.accuracy  = 0
        
        self.M = np.array([])
    
    def train(self, data, itr, its):
        self._classes = data["class"].unique()

        # calculate mean_class and mean_global
        self._mean_c = np.array([[data[data["class"] == cls].mean().iloc[:itr]] for cls in self._classes])
        self._mean_g = np.mean(self._mean_c, axis=0)
        
        # Sw: within-class covariance 
#         self._Sw = [np.sum([np.transpose((x - self._mean_c[i]))*(x - self._mean_c[i])
#                            for x in np.matrix(data[data["class"] == cls].iloc[:,:4])])
#                                 for i, cls in enumerate(self._classes)]

        R = [np.zeros((4, 4)) for i in range(len(classes))]
        for i, cls in enumerate(classes):
            for x in np.matrix(data[data["class"] == cls].iloc[:,:4]):
                tmp_ = x - self._mean_c[i]
                tmp_ = np.transpose(tmp_) * tmp_
                R[i] += tmp_
                
        self._Sw = sw = R[0] + R[1] + R[2]
        
        # Sb: between-class covariance
        self._Sb = np.sum([len(np.matrix(data[data["class"] == cls]))*(self._mean_c[i] - self._mean_g)*np.transpose((self._mean_c[i] - self._mean_g)) 
                     for i, cls in enumerate(classes)], axis=0)
        
        val, vec = np.linalg.eig(np.linalg.pinv(self._Sw) * self._Sb)
        
        # W
        self._W = np.transpose(vec[np.argmax(val)].reshape(4,1))
        
        # Projections
        self._p1 = self._W * self._mean_c[0]
        self._p2 = self._W * self._mean_c[1] 
        self._p3 = self._W * self._mean_c[2]
        
    def classify(self, X):
        p_ = self._W * X
        return np.argmin([np.linalg.norm(p_ - self._p1), np.linalg.norm(p_ - self._p2), np.linalg.norm(p_ - self._p3)])
    
    def test(self, X, Y):
        self._total = len(X)
        for x, y in zip(np.array(X),np.array(Y)):
            y_ = self.classify(x)
            if(y_ == np.argmax(y)):
                self._positive += 1
                
        self.accuracy = self._positive/self._total
        return self.accuracy

## Read the data

In [5]:
header  = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]
data    = pd.read_csv("data/iris.data", names=header)

## Preprossing:
1. setosa -> [1,0,0]
2. versicolor -> [0,1,0]
3. virginica -> [0,0,1]

In [6]:
classes = data["class"].unique()
sizes   = [sum(data["class"] == c) for c in classes]
mask_cls = [data["class"] == c for c in classes]

#data = data_raw.copy().drop(columns = ["class"])
data["Iris-setosa"] = list(map(int, mask_cls[0]))
data["Iris-versicolor"] = list(map(int, mask_cls[1]))
data["Iris-virginica"] = list(map(int, mask_cls[2]))

## OLAM 

In [7]:
rep = 100
acc_olam = []
for i in range(0, rep):
    # make train test set
    train = make_training_set(classes, sizes)
    test  = np.array([not x for x in train])

    train_x, train_y = data[train].iloc[:,:4], data[train].iloc[:,5:] #x: features, y:encoded_class
    test_x,  test_y  = data[test].iloc[:,:4], data[test].iloc[:,5:]

    # initialize model
    model = OLAM()

    # train
    model.train(train_x, train_y)

    # test
    model.test(test_x, test_y)
    
    acc_olam += [model.accuracy]
#     print(i, model.accuracy, sep=",")

print("Mean(std)\n", np.mean(acc_olam), "(",np.std(acc_olam),")")
np.savetxt("acc_olam.txt", acc_olam, delimiter=",")

Mean(std)
 0.8029333333333333 ( 0.03620092079860217 )


## FISHER'S

In [8]:
rep = 100
acc_fld = []
for i in range(0, rep):
    # make train test set
    train = make_training_set(classes, sizes)
    test  = np.array([not x for x in train])

    train_x, train_y = data[train].iloc[:,:4], data[train].iloc[:,5:] #x: features, y:encoded_class
    test_x,  test_y  = data[test].iloc[:,:4], data[test].iloc[:,5:]

    # initialize model
    model = F_LD()

    # train
    model.train(data[train], 4, 5)

    # test
    model.test(test_x, test_y)
    
    acc_fld += [model.accuracy]
    #print(i+1, model.accuracy, sep=",")
print("Mean(std)\n", np.mean(acc_fld), "(",np.std(acc_fld),")")
np.savetxt("acc_fld.txt", acc_fld, delimiter=",")

Mean(std)
 0.946 ( 0.022578258962501468 )
