GAUSSIAN NAIVE BAYES - library and no library (from scratch) implementation

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

In [2]:
data = pd.read_csv(r'D:\studia\systemy sztucznej inteligencji\iris.csv')
data.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [3]:
lab_encoder = LabelEncoder()
data['variety encoded'] = lab_encoder.fit_transform(data['variety'])

X = data.drop(columns = ['variety encoded', 'variety'])
y = data['variety encoded']

std_scaler = StandardScaler()
X_scaled = std_scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = 0.25)

ACCURACY

In [4]:
def accuracy(y_test, y_pred):
    corrected = sum(y_test == y_pred)
    return corrected/len(y_test)

GAUSSIAN NAIVE BAYES - using library

In [5]:
nb_lib = GaussianNB()

nb_lib.fit(X_train, y_train)
y_pred = nb_lib.predict(X_test)
print(accuracy(y_test, y_pred))

0.9473684210526315


GAUSSIAN NAIVE BAYES - from scratch (without library)

In [6]:
class NB:
    def __init__(self):
        self.mean = {}
        self.var = {}
        self.class_prior = {}

    def fit(self, X_train, y_train):
        self.classes = np.unique(y_train)
        for c in self.classes:
            X_c = X_train[y_train == c]
            self.mean = np.mean(X_c, axis=0)
            self.var = np.var(X_c, axis=0)
            self.class_prior[c] = len(X_c)/len(X_train)

    def calculate_probability(self, x, mean, var):
        exponent = np.exp(-(x-mean)**2/(2*var))
        return 1/(np.sqrt(2*np.pi*var))*exponent

    def calculate_posterior(self, x):
        prob_posterior = []
        for c in self.classes:
            prob = np.log(np.sum(self.calculate_probability(x, self.mean[c], self.var[c])))
            prob_posterior.append(np.log(self.class_prior[c]+prob))

        return prob_posterior

    def predict(self, X_test):
        predictions = []

        for i in range(len(X_test)):
            prob = self.calculate_posterior(X_test.iloc[i])
            res = 0
            act_prob = prob[res]

            for j in range(len(prob)):
                if prob[j] > act_prob:
                    act_prob = prob[j]
                    res = j
            predictions.append(res)

        return predictions

In [7]:
nb_without_lib = NB()

nb_without_lib.fit(X_train, y_train)
y_pred = nb_without_lib.predict(X_test)

print(accuracy(y_test, y_pred))

0.8421052631578947


  prob_posterior.append(np.log(self.class_prior[c]+prob))
