In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

In [32]:
train = pd.read_csv("datasets/irisTraining.txt", delim_whitespace=True, header=None)
X_train = train.iloc[:, :-1].values  # Features
y_train = train.iloc[:, -1].values    # Target
# y_train[y_train == -1] = 0

test = pd.read_csv("datasets/irisTesting.txt", delim_whitespace=True, header=None)
X_test = test.iloc[:, :-1].values  # Features
y_test = test.iloc[:, -1].values    # Target
# y_test[y_test == -1] = 0

#converts -1 to 0

In [48]:
class NaiveBayes:
    def fit(self, X, y):
        self.X = X
        self.y = y
        self.classes = np.unique(y)
        self.parameters = []
        # Calculate mean and standard deviation for each class and feature
        for c in self.classes:
            X_c = X[y == c]
            self.parameters.append([(np.mean(X_c[:, i]), np.std(X_c[:, i])) for i in range(X_c.shape[1])])
        

    #NormalPDF probability
    def _calculate_probability(self, x, mean, stdev):
        exponent = np.exp(-((x - mean) ** 2 / (2 * stdev ** 2)))
        return (1 / (np.sqrt(2 * np.pi) * stdev)) * exponent

    #Looping through dataset
    def _calculate_class_probabilities(self, x):
        probabilities = {}
        for i, c in enumerate(self.classes):
            probabilities[c] = 1
            for j, param in enumerate(self.parameters[i]):
                mean, stdev = param
                probabilities[c] *= self._calculate_probability(x[j], mean, stdev)
        return probabilities

    def predict(self, X):
        predictions = []
        for x in X:
            probabilities = self._calculate_class_probabilities(x)
            best_class = None
            best_prob = -1
            for c, prob in probabilities.items():
                if best_class is None or prob > best_prob:
                    best_prob = prob
                    best_class = c
            predictions.append(best_class)
        return predictions

def accuracy(y_true, y_pred):
    correct = np.sum(y_true == y_pred)
    total = len(y_true)
    return correct / total

def true_positives(y_true, y_pred, positive_label):
    return np.sum((y_true == positive_label) & (y_pred == positive_label))

def false_positives(y_true, y_pred, positive_label):
    return np.sum((y_true != positive_label) & (y_pred == positive_label))

def true_negatives(y_true, y_pred, negative_label):
    return np.sum((y_true == negative_label) & (y_pred == negative_label))

def false_negatives(y_true, y_pred, negative_label):
    return np.sum((y_true != negative_label) & (y_pred != negative_label))

def precision(y_true, y_pred, positive_label):
    tp = true_positives(y_true, y_pred, positive_label)
    fp = false_positives(y_true, y_pred, positive_label)
    return tp / (tp + fp)

def recall(y_true, y_pred, positive_label):
    tp = true_positives(y_true, y_pred, positive_label)
    fn = false_negatives(y_true, y_pred, positive_label)
    return tp / (tp + fn)

In [49]:
nb=NaiveBayes()
nb.fit(X_train,y_train)
y_pred = nb.predict(X_test)

In [50]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, y_train)
   
# making predictions on the testing set
y_pred = gnb.predict(X_test)

In [51]:
print(f'accurancy: {accuracy(y_test, y_pred)}')
print(f'true pos: {true_positives(y_test, y_pred, positive_label=1)}')  # positive class label is 1
print(f'false pos: {false_positives(y_test, y_pred, positive_label=1)}')
print(f'true neg: {true_negatives(y_test, y_pred, negative_label=-1)}')  # negative class label is -1
print(f'false neg: {false_negatives(y_test, y_pred, negative_label=-1)}')
print(f'precision: {precision(y_test, y_pred, positive_label=1)}')
print(f'recall: {recall(y_test, y_pred, positive_label=1)}')

accurancy: 0.98
true pos: 16
false pos: 1
true neg: 33
false neg: 16
precision: 0.9411764705882353
recall: 0.32653061224489793
