In [1]:
import numpy as np

In [80]:
class NBC: 

    def __init__(self, feature_types, num_classes):
        self.feature_types = feature_types
        self.num_features = len(feature_types)
        self.num_classes = num_classes
        self.N  = np.zeros(num_classes)
        self.pi = np.zeros(num_classes) 

    def count_pi(self, y): 
        for c in y: 
            self.N[c] += 1
        self.pi = self.N / len(y)
        
    def parameters(self, X, y): 
        mean = []
        var = []
        for c in range(self.num_classes):
            mean.append(sum(X[i] for i in range(len(X)) if y[i] == c) / self.N[c])
            var.append(sum(X[i]**2 for i in range(len(X)) if y[i] == c) / self.N[c] - np.power(mean[c], 2))
        self.mean = np.asarray(mean)
        self.var = np.asarray(var)

    def calc_gauss(self, x):
        max_p = -100000
        max_c = 0
        for c in range(self.num_classes):
            total_prob = np.log(self.pi[c])
            for f in range(len(x)): 
                mean = self.mean[c][f]
                var = self.var[c][f]
                diff = np.log((2*np.pi*var)**-0.5) - 0.5 * ((x[f] - mean) / (var**0.5))
                total_prob += np.log((2*np.pi*var)**-0.5) - 0.5 * ((x[f] - mean) / (var**0.5))
            if total_prob > max_p: 
                max_p = total_prob
                max_c = c
        return max_c 
            

    def fit(self, X, y):
        self.count_pi(y)
        self.parameters(X,y)

    def predict(self, Xtest):
        yhat = np.zeros(len(Xtest))
        for x in range(len(Xtest)): 
            yhat[x] = self.calc_gauss(Xtest[x])
        return yhat

In [61]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

nbc = NBC(feature_types=["r", "r", "r", "r"], num_classes=3)
nbc.fit(X_train,y_train)
y_pred = nbc.predict(X_test)

from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, y_pred)
ac = accuracy_score(y_test,y_pred)
print(cm)
print(ac)




[[0.12932281 0.1417883  0.02031558 0.01113741]
 [0.26387144 0.1039737  0.2300073  0.04075968]
 [0.38918905 0.10782541 0.29451963 0.06444215]]
-0.9799974377176488
0.857887770778796
-11.734354237050264
-8.899032353971576
0 -21.87942635461309
-0.16860935140494626
0.15419760314718362
-1.1055100854181754
-1.9829183294473478
1 -4.279413993261107
0.2250521423358431
0.48191576303358574
0.12152000107747774
-0.2731273795844892
2 -0.44794158200136736
-1.2580727458088332
1.654599225552865
-7.875591670671103
-2.2660942592359596
0 -10.869089546815431
-0.363281493300629
1.0845757438052255
0.04130098117445874
1.4843117736483933
1 1.070333175189627
0.06475724640631025
1.3955254361054799
1.1349775636230826
2.4843568254020623
2 4.07631496267315
-0.5628844755808725
-1.0011056236940328
1.245119849861461
1.524156080612963
0 0.08135573454711942
0.12339886143857737
-2.016684725054916
2.7519453203934128
3.465586118274531
1 3.147671744913784
0.46549448623014184
-1.6498401408008359
3.53042271145815
4.06006208539

In [4]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [68]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

In [66]:
y_pred  =  classifier.predict(X_test)

In [67]:
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, y_pred)
ac = accuracy_score(y_test,y_pred)
print(cm)
print(ac)

[[11  0  0]
 [ 0 13  0]
 [ 0  1  5]]
0.9666666666666667


In [8]:
from sklearn.linear_model import LogisticRegression

In [9]:
clf = LogisticRegression(penalty='l2', C=10, random_state=0)
clf.fit(X_train, y_train)
y_pred_lr = clf.predict(X_test) 
cm = confusion_matrix(y_test, y_pred_lr)
ac = accuracy_score(y_test,y_pred_lr)
print(cm)
print(ac)


[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
1.0
