In [1]:
import numpy as np

In [10]:
class NBC: 

    def __init__(self, feature_types, num_classes):
        self.feature_types = feature_types
        self.num_features = len(feature_types)
        self.num_classes = num_classes
        self.N  = np.zeros(num_classes)
        self.pi = np.zeros(num_classes) 
        self.mean = np.zeros(num_classes)
        self.var = np.zeros(num_classes) 

    def count_pi(self, y): 
        for c in y: 
            self.N[c] += 1
        self.pi = self.N / len(y)
        
    def parameters(self, X, y): 
        for c in range(self.num_classes):
            self.mean[c] = np.divide (sum(x for x in X if y == c), self.N[c])
            self.var[c] = np.divide(sum(x**2 for x in X if y == c), self.N[c]) - self.mean[c] ** 2

    def calc_gauss(self, x):
        max_p = -100000
        max_c = 0
        for c in range(self.num_classes):
            total_prob = np.log(self.pi[c])
            for f in range(len(x)): 
                var = self.parameters[c][f][1]
                mean = self.parameters[c][f][0]
                diff = np.log((2*np.pi*var)**-0.5) - 0.5 * ((x[f] - mean) / (var**0.5))
                print(diff)
                total_prob += np.log((2*np.pi*var)**-0.5) - 0.5 * ((x[f] - mean) / (var**0.5))
            print(c, total_prob)
            if total_prob > max_p: 
                max_p = total_prob
                max_c = c
        return max_c 
            

    def fit(self, X, y):
        self.count_pi(y)
        self.parameters(X,y)

    def predict(self, Xtest):
        yhat = np.zeros(len(Xtest))
        for x in range(len(Xtest)): 
            yhat[x] = self.calc_gauss(Xtest[x])
        print(yhat)
        return yhat

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

nbc = NBC(feature_types=["r", "r", "r", "r"], num_classes=3)
nbc.fit(X_train,y_train)
y_pred = nbc.predict(X_test)

from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, y_pred)
ac = accuracy_score(y_test,y_pred)
print(cm)
print(ac)




-1.9626010385283732
-1.0449282757734877
-2.219871560669134
-0.9607225174895637
0 -7.312053489112958
-1.9626010385283732
-1.0449282757734877
-2.219871560669134
-0.9607225174895637
1 -7.364697222598379
-1.9626010385283732
-1.0449282757734877
-2.219871560669134
-0.9607225174895637
2 -7.1914255013243435
-2.017034143923555
-0.8428977668630456
-1.9313950330015774
-0.539562731172233
0 -6.454819771612811
-2.017034143923555
-0.8428977668630456
-1.9313950330015774
-0.539562731172233
1 -6.507463505098233
-2.017034143923555
-0.8428977668630456
-1.9313950330015774
-0.539562731172233
2 -6.334191783824196
-1.8809513804356006
-1.5163327965645195
-1.2495414221509886
-0.2988999961337584
0 -6.069655691937267
-1.8809513804356006
-1.5163327965645195
-1.2495414221509886
-0.2988999961337584
1 -6.122299425422688
-1.8809513804356006
-1.5163327965645195
-1.2495414221509886
-0.2988999961337584
2 -5.9490277041486515
-2.370849328992236
-1.0786000272585614
-2.53457322721556
-0.7802254662107077
0 -7.888178146329465


In [4]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [5]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

In [6]:
y_pred  =  classifier.predict(X_test)

In [7]:
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, y_pred)
ac = accuracy_score(y_test,y_pred)
print(cm)
print(ac)

[[11  0  0]
 [ 0 13  0]
 [ 0  1  5]]
0.9666666666666667


In [8]:
from sklearn.linear_model import LogisticRegression

In [9]:
clf = LogisticRegression(penalty='l2', C=10, random_state=0)
clf.fit(X_train, y_train)
y_pred_lr = clf.predict(X_test) 
cm = confusion_matrix(y_test, y_pred_lr)
ac = accuracy_score(y_test,y_pred_lr)
print(cm)
print(ac)


[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
1.0
