In [1]:
import numpy as np   

class GDAClassifier:
    
    def fit(self, X, y, epsilon = 1e-10):
        self.y_classes, y_counts = np.unique(y, return_counts=True)
        self.phi_y = 1.0 * y_counts/len(y)
        self.u = np.array([ X[y==k].mean(axis=0) for k in self.y_classes])
        self.E = self.compute_sigma(X, y)
        self.E += np.ones_like(self.E) * epsilon # fix zero overflow
        self.invE = np.linalg.pinv(self.E)
        return self
    
    def compute_sigma(self,X, y):
        X_u = X.copy().astype('float64')
        for i in range(len(self.u)):
            X_u[y==self.y_classes[i]] -= self.u[i]
        return X_u.T.dot(X_u) / len(y)

    def predict(self, X):
        return np.apply_along_axis(self.get_prob, 1, X)
    
    def score(self, X, y):
        return (self.predict(X) == y).mean()
    
    def get_prob(self, x):
        p = np.exp(-1.0 * np.sum((x - self.u).dot(self.invE) * (x - self.u), axis =1)) * self.phi_y
        return np.argmax(p)

In [2]:
from utils import train_test_split
from sklearn.datasets import load_iris
X,y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.8)
model = GDAClassifier().fit(X_train,y_train)

In [3]:
model.score(X_test,y_test)

0.975

In [5]:
from sklearn.datasets import load_breast_cancer
X,y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.8)
model = GDAClassifier().fit(X_train,y_train)
model.score(X_test,y_test)

0.9296703296703297

In [6]:
from sklearn.datasets import load_digits
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.5)
model = GDAClassifier().fit(X_train,y_train)
model.score(X_test,y_test)

0.9510022271714922