In [45]:
import numpy as np 
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

import matplotlib.pyplot as plt
%matplotlib inline

In [53]:
class GaussianDiscriminant:
    
    def __init__(self):
        self.data = None
        self.labels = None
        self.feat_ids = None
        self.label_ids = None
        self._means = None
        self._covariances = None
        self._prior_probs = None
        
    def fit(self, data, labels):
        self.data = np.array(data)
        self.labels = np.array(labels)
        self.feat_ids = [i for i in range(len(self.data[0]))] 
        self.label_ids = [i for i in set(labels)]
        self._means = None
        self.means
        self._covariances = None
        self.covariances
        self._prior_probs = None
        self.prior_probs
        
    @property
    def means(self):
        if not self._means:
            self._means = [[self.data[self.labels == label_id, feat_id].mean() 
                            for feat_id in self.feat_ids] 
                           for label_id in self.label_ids]
        return np.array(self._means)
    
    @property
    def covariances(self):
        if not self._covariances:
            self._covariances = [np.cov(self.data[self.labels == label_id], rowvar=False)
                                 for label_id in self.label_ids]
        return np.array(self._covariances)
    
    @property
    def prior_probs(self):
        if not self._prior_probs:
            self._prior_probs = [self.data[self.labels == label_id].size/self.data.size
                                 for label_id in self.label_ids]
        return np.array(self._prior_probs)
    
    def gauss(self, x, mean, covariance):
        exp = np.exp(-1/2*np.matmul(x - mean, np.matmul(np.linalg.pinv(covariance), x - mean)))
        return 1/((2*np.pi)**(len(x)/2)*np.linalg.det(covariance)**(1/2))*exp
    
    def cond_probs(self, x):
        probs = [self.gauss(x, mean, covariance) 
                 for mean, covariance in zip(self.means, self.covariances)]
        return np.array(probs)
             
    def predict(self, x):
        x = np.array(x)
        prediction = []
        for _x in x:
            probs = self.cond_probs(_x)*self.prior_probs
            probs = probs/probs.sum()
            prediction.append(probs.argmax())
        return np.array(prediction)
    
    def predict_probs(self, x):
        x = np.array(x)
        prediction = []
        for _x in x:
            probs = self.cond_probs(_x)*self.prior_probs
            probs = probs/probs.sum()
            prediction.append(probs)
        return np.array(prediction)

In [54]:
data = datasets.load_iris()
data, labels = data.data, data.target

train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2)

In [60]:
gd = GaussianDiscriminant()
gd.fit(train_data, train_labels)
prediction = gd.predict(test_data)

sklearn_gd = QuadraticDiscriminantAnalysis()
sklearn_gd.fit(train_data, train_labels)
sklearn_prediction = sklearn_gd.predict(test_data)

accuracy = (prediction == test_labels).sum()/len(prediction)
sklearn_accuracy = (sklearn_prediction == test_labels).sum()/len(sklearn_prediction)

print('self-made GDA accuracy: {}\nsklearn quadratic GDA: {}'.format(accuracy, sklearn_accuracy))

self made GDA accuracy: 0.9666666666666667
sklearn quadratic GDA: 0.9666666666666667
