In [1]:
import numpy as np
import pandas as pd

In [3]:
class NaiveBayesClassifier():
    def gaussian_probability(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp((-1/2)*((x - mean)** 2) / (2*var))
        denominator = np.sqrt(2 * np.pi * var)
        prob = numerator / denominator
        
        return prob
    
    def classwise_probability(self, x):
        probabilities = []
        
        for i in range(self.count):
            prior = np.log(self.prior[i])
            conditional = np.sum(np.log(self.gaussian_probability(i, x)))
            probability = prior + conditional
            probabilities.append(probability)
            
            return self.classes[np.argmax(probabilities)]
        
    def fit(self, features, target):
        self.classes = np.unique(target)
        self.count = len(self.classes)
        self.feature_nums = features.shape[1]
        self.rows = features.shape[0]
        
        self.mean = features.groupby(target).apply(np.mean).to_numpy()
        self.var = features.groupby(target).apply(np.var).to_numpy()
        
        self.prior = (features.groupby(target).apply(lambda x: len(x))/ self.rows).to_numpy()
        
    def predict(self, features):
        return [self.classwise_probability(f) for f in features.to_numpy()]
    
    def accuracy(self, y_test, y_pred):
        return np.sum(y_test == y_pred) / len(y_test)

In [4]:
df = pd.read_csv("Iris.csv")
df = df.sample(frac = 1, random_state = 1).reset_index(drop = True)
print("(Sample, columns): ", df.shape)
X, y = df.iloc[:, :-1], df.iloc[:, -1]

X_train, X_test, y_train, y_test = X[:100], X[100:], y[:100], y[100:]

print("(Train feature samples, train feature columns): ", X_train.shape)
print("(Train target samples, train target columns): ", y_train.shape)
print("(Test feature samples, test feature columns): ", X_test.shape)
print("(Test feature samples, test feature columns): ", y_test.shape)

(Sample, columns):  (150, 6)
(Train feature samples, train feature columns):  (100, 5)
(Train target samples, train target columns):  (100,)
(Test feature samples, test feature columns):  (50, 5)
(Test feature samples, test feature columns):  (50,)


In [5]:
classifier = NaiveBayesClassifier()
classifier.fit(X_train, y_train)

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


In [6]:
print("Classification classes: ", classifier.classes)
print("Number of feautres: ", classifier.feature_nums)

Classification classes:  ['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']
Number of feautres:  5


In [7]:
prediction = classifier.predict(X_test)

In [9]:
print("Accuracy: %.4f" % classifier.accuracy(y_test, prediction))

Accuracy: 0.3800
