In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal

In [2]:
n_samples = 1000
n_outliers = 100
X, Y= make_classification(n_samples=n_samples,n_features=2, n_redundant=0, n_informative=1,
                             n_clusters_per_class=1, random_state =42)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

In [4]:
class CustomGaussianClassifier:

    def init(self):
        self.mu0 = None
        self.mu1 = None

        self.cov0 = None
        self.cov0 = None

        self.phi0 = None
        self.phi1 = None

        self.p0 = None
        self.p1 = None

    def fit(self, X, y):
        self.mu0 = np.average(X[y==0], axis=0)
        self.mu1 = np.average(X[y==1], axis=0)

        self.cov0 = np.cov(X[y==0], rowvar = False)
        self.cov1 = np.cov(X[y==1], rowvar = False)

        self.phi0 = y[y==0].shape[0]/y.shape[0]
        self.phi1 = 1 - self.phi0

        self.p0 = multivariate_normal(self.mu0, self.cov0)
        self.p1 = multivariate_normal(self.mu1, self.cov1)




    def predict(self, X):
        y_pred = []
        for x in X:
            if self.p0.pdf(x)*self.phi0< self.p1.pdf(x)*self.phi1:
                y_pred.append(1)
            else:
                y_pred.append(0)
        return np.array(y_pred )       

    def score(self, X, y):
        y_pred = self.predict(X)

        score= (y.shape[0] -  np.count_nonzero((y_pred - y)))/ y.shape[0]
        return score

cgc=CustomGaussianClassifier()
cgc.fit(X_train, y_train)
print(cgc.predict(X_test))
print(cgc.score(X_test, y_test))

[0 1 1 0 0 0 1 1 0 0 0 1 1 1 0 1 0 0 0 1 0 0 1 0 1 1 0 0 0 1 1 1 1 1 0 1 1
 1 1 0 1 0 1 1 1 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 0 0 1 0 0 0 1 1 0 1 1 1 1 0
 1 1 1 1 1 1 0 1 0 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 1 0 0 1 0 1 0 1 1 1 1 0
 1 0 0 1 1 0 1 0 0 0 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 0 0 0 1
 1 1 1 0 1 0 1 0 0 0 1 1 1 0 0 1 1 1 1 1 0 0 1 0 1 0 0 0 1 0 0 0 1 1 0 0 1
 0 1 0 1 0 0 0 1 0 0 0 1 1 0 1 1 0 0 0 1 0 0 1 1 0 1 1 1 0 0 1 0 0 1 1 1 0
 1 0 1 1 1 1 1 0 1 0 1 0 0 1 1 0 0 0 1 1 1 1 1 0 1 1 0 1 0 1 0 0 0 0 0 0 1
 1 1 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1
 0 0 0 0 1 1 0 1 1 0 0 1 1 1 0 1 0 1 1 0 0 0 0 1 1 0 1 0 1 1 0 1 1 1]
0.9090909090909091


In [10]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

clf = LinearDiscriminantAnalysis()
clf.fit(X_train, y_train)
LinearDiscriminantAnalysis()
print(clf.predict(X_test))
print(clf.score(X_test, y_test))


[0 1 1 0 0 0 1 1 0 0 0 1 1 1 0 1 0 0 0 1 0 0 1 0 1 1 0 0 0 1 1 1 1 1 0 1 1
 1 1 0 0 0 1 1 1 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 0 0 1 0 0 0 1 1 0 1 1 1 1 0
 1 1 1 1 1 1 0 1 0 0 0 0 0 1 0 1 0 0 1 1 0 1 1 1 0 1 0 0 1 0 1 0 1 1 1 1 0
 1 0 0 0 1 0 1 0 0 0 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 0 0 0 1
 1 1 1 0 1 0 1 0 0 0 1 1 1 0 0 1 1 1 1 1 0 0 1 0 1 0 0 0 1 0 0 0 1 1 0 0 1
 0 1 0 1 0 0 0 1 0 0 0 1 1 0 1 1 0 0 0 1 0 0 1 1 0 1 1 1 0 0 1 0 0 1 1 1 0
 1 0 1 1 1 1 1 0 1 0 1 0 0 1 1 0 0 0 1 1 1 1 1 0 0 1 0 1 0 1 0 0 0 0 0 0 1
 1 1 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 1 1 1
 0 0 0 0 1 1 0 1 1 0 0 1 1 1 0 1 0 1 1 0 0 0 0 1 1 0 1 0 1 1 0 1 1 1]
0.9181818181818182


In [6]:
# model data