In [1]:
import numpy as np

In [111]:
class NBC: 

    def __init__(self, feature_types, num_classes):
        self.feature_types = feature_types
        self.num_features = len(feature_types)
        self.num_classes = num_classes
        self.N  = np.zeros(num_classes)
        self.pi = np.zeros(num_classes) 

    def count_pi(self, y): 
        for c in y: 
            self.N[c] += 1
        self.pi = self.N / len(y)
        
    def parameters(self, X, y): 
        mean = []
        var = []
        for c in range(self.num_classes):
            mean.append(sum(X[i] for i in range(len(X)) if y[i] == c) / self.N[c])
            var.append(sum(X[i]**2 for i in range(len(X)) if y[i] == c) / self.N[c] - np.power(mean[c], 2))
        self.mean = np.asarray(mean)
        self.var = np.asarray(var)

    def gaussian(self, mean, var, x): 
        return (-0.5*np.log( 2*(np.pi)*var) - 0.5 * ((x - mean) / var)**2)

    def bernoulli(self, m, x):
        return -(x*np.log(m) + (1-x) * np.log(1-m))

    def guess_c(self, x):
        max_p = -100000
        max_c = 0
        for c in range(self.num_classes):
            total_prob = np.log(self.pi[c])
            for f in range(len(x)): 
                mean = self.mean[c][f]
                var = self.var[c][f]
                if self.feature_types[f] == "r": total_prob += self.gaussian(mean, var, x[f])
                elif self.feature_types[f] == "b": total_prob += self.benoulli(mean, x[f])
            if total_prob > max_p: 
                max_p = total_prob
                max_c = c
        return max_c 
            

    def fit(self, X, y):
        self.count_pi(y)
        self.parameters(X,y)

    def predict(self, Xtest):
        yhat = np.zeros(len(Xtest))
        for x in range(len(Xtest)):
            yhat[x] = self.guess_c(Xtest[x])
        return yhat

In [112]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

nbc = NBC(feature_types=["r", "r", "r", "r"], num_classes=3)
nbc.fit(X_train,y_train)
y_pred = nbc.predict(X_test)

from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, y_pred)
ac = accuracy_score(y_test,y_pred)
print(cm)
print(ac)




[[11  0  0]
 [ 0 13  0]
 [ 0  1  5]]
0.9666666666666667


In [103]:
print(y_train)

[2 1 0 2 2 1 0 1 1 1 2 0 2 0 0 1 2 2 2 2 1 2 1 1 2 2 2 2 1 2 1 0 2 1 1 1 1
 2 0 0 2 1 0 0 1 0 2 1 0 1 2 1 0 2 2 2 2 0 0 2 2 0 2 0 2 2 0 0 2 0 0 0 1 2
 2 0 0 0 1 1 0 0 1 0 2 1 2 1 0 2 0 2 0 0 2 0 2 1 1 1 2 2 1 1 0 1 2 2 0 1 1
 1 1 0 0 0 2 1 2 0]


In [4]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [68]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

In [66]:
y_pred  =  classifier.predict(X_test)

In [67]:
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, y_pred)
ac = accuracy_score(y_test,y_pred)
print(cm)
print(ac)

[[11  0  0]
 [ 0 13  0]
 [ 0  1  5]]
0.9666666666666667


In [8]:
from sklearn.linear_model import LogisticRegression

In [9]:
clf = LogisticRegression(penalty='l2', C=10, random_state=0)
clf.fit(X_train, y_train)
y_pred_lr = clf.predict(X_test) 
cm = confusion_matrix(y_test, y_pred_lr)
ac = accuracy_score(y_test,y_pred_lr)
print(cm)
print(ac)


[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
1.0
