# Naive Bayes Classifier Python Implementation

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.metrics import confusion_matrix,accuracy_score, mean_squared_error
import sklearn

## Dataset 1

### Preparing dataset

In [None]:
class NaiveBayes:
    def __init__(self, train_split):
        self.train_split = train_split
        self.cov = np.eye(2)

    def loadDataset(self):
        dataset = pd.read_csv("../datasets/Dataset_1_Team_29.csv")
        self.train_size = int(self.train_split * dataset.shape[0])
        self.test_size = int((1 - self.train_split) * dataset.shape[0])
        self.X = dataset.iloc[:, [0, 1]].values
#         self.X = (self.X - np.min(self.X, axis=0))/(np.max(self.X, axis=0)-np.min(self.X, axis=0))
        self.y = dataset.iloc[:, 2].values
        self.X = self.X.astype(float)
        #training set split
        self.X_train=self.X[0:self.train_size,:]
        self.y_train=self.y[0:self.train_size]
        #testing set split 
        self.X_test=self.X[self.train_size:,:] 
        self.y_test=self.y[self.train_size:]
        self.L = np.array([[0,2,1],[2,0,3],[1,3,0]])
        
    def plot(self):
        #visualize the training set 
        X_set, y_set = self.X_train, self.y_train
        for i, j in enumerate(np.unique(y_set)):
            plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                        c = ListedColormap(('red', 'green', "blue"))(i), label = j,marker='.')
        plt.title('Training set')
        plt.xlabel('x_1')
        plt.ylabel('x_2')
        plt.legend()
        plt.show()
        
    def generate_data(self):
            first = [True, True, True]
            self.class_data_dic = dict()
            for i in range(self.y_train.shape[0]):
                X_temp=self.X_train[i,:].reshape(self.X_train[i,:].shape[0],1)
                if first[self.y_train[i]]:  
                    self.class_data_dic[self.y_train[i]] = X_temp
                    first[self.y_train[i]] = False
                else:
                    self.class_data_dic[self.y_train[i]]=np.append(self.class_data_dic[self.y_train[i]],X_temp,axis=1)
                    
    def summary(self):
        self.mean = [np.mean(self.class_data_dic[i], axis=1) for i in range(3)]
        self.std = [np.std(self.class_data_dic[i], axis=1) for i in range(3)]
    
    def fit(self):
        self.loadDataset()
        self.generate_data()
        self.summary()

    def posterior(self,X,X_train_class,mean,L):
        def likelyhood(x,mean,cov):
            # return (1/np.sqrt(2*np.pi*sigma))*(np.exp(-(x-mean)**2/(2*sigma**2)))
            prefix = 1/np.sqrt(2*np.pi*abs(np.linalg.det(cov)))
            x_mean = (x-mean)
            cov_inv = np.linalg.inv(cov)
            product = (x_mean.dot(cov_inv)).dot(np.transpose(x_mean))
            exponent = np.exp(-product/2)
            return prefix * exponent
        product=likelyhood(X,mean,self.cov)
        product=product*(X_train_class.shape[0]/self.X_train.shape[0])
        return product
            
    def predict(self, X_test = None):
        if X_test is None:
            X_test = self.X_test
        p_0 = self.posterior(X_test, self.class_data_dic[0], self.mean[0], self.L[0])
        p_1 = self.posterior(X_test, self.class_data_dic[1], self.mean[1], self.L[1])
        p_2 = self.posterior(X_test, self.class_data_dic[2], self.mean[2], self.L[2])
        return np.argmin(np.array([p_0, p_1, p_2]).T.dot(self.L), axis=2)[0]
    
    def plot_decision_boundary(self):
        X_set, y_set = self.X_test, self.y_test
        X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.1),
                             np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.1))
        plt.contourf(X1, X2, self.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('orange', 'green', "blue")))
        plt.xlim(X1.min(), X1.max())
        plt.ylim(X2.min(), X2.max())
        for i, j in enumerate(np.unique(y_set)):
            plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],c = ListedColormap(('red', 'green', "blue"))(i), label = j,marker='.')
        plt.title('Naive Bayes Classification our implementation(Test set)')
        plt.xlabel('X_1')
        plt.ylabel('X_2')
        plt.legend()
        plt.show()
        
    def performance_metric(self):
        # Listing out the performance metrics
        y_train_pred = self.predict(self.X_train)
        print('Train Accuracy : ',accuracy_score(self.y_train,y_train_pred),'\n',
              'Test Accuracy : ',accuracy_score(self.y_test,self.y_pred),'\n',
              'Confusion matrix : \n', confusion_matrix(self.y_test,self.y_pred))
        print('\n\n')
        print('Mean Square Error on training data : ', mean_squared_error(self.y_train,y_train_pred),'\n',
              'Mean Square Error on testing : ', mean_squared_error(self.y_test, self.y_pred))


In [None]:
naiveBayes = NaiveBayes(0.8)
naiveBayes.fit()
naiveBayes.y_pred = naiveBayes.predict()

In [None]:
naiveBayes.performance_metric()

# Case 2

In [None]:
class NaiveBayes2:
    def __init__(self, train_split):
        self.train_split = train_split
        self.cov = np.eye(2)

    def loadDataset(self):
        dataset = pd.read_csv("../datasets/Dataset_2_Team_29.csv")
        self.train_size = int(self.train_split * dataset.shape[0])
        self.test_size = int((1 - self.train_split) * dataset.shape[0])
        self.X = dataset.iloc[:, [0, 1]].values
#         self.X = (self.X - np.min(self.X, axis=0))/(np.max(self.X, axis=0)-np.min(self.X, axis=0))
        self.y = dataset.iloc[:, 2].values
        self.X = self.X.astype(float)
        #training set split
        self.X_train=self.X[0:self.train_size,:]
        self.cov = np.diag(np.diag(np.cov(self.X, rowvar= False)))
        self.y_train=self.y[0:self.train_size]
        #testing set split 
        self.X_test=self.X[self.train_size:,:] 
        self.y_test=self.y[self.train_size:]
        self.L = np.array([[0,2,1],[2,0,3],[1,3,0]])
        
    def plot(self):
        #visualize the training set 
        X_set, y_set = self.X_train, self.y_train
        for i, j in enumerate(np.unique(y_set)):
            plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                        c = ListedColormap(('red', 'green', "blue"))(i), label = j,marker='.')
        plt.title('Training set')
        plt.xlabel('x_1')
        plt.ylabel('x_2')
        plt.legend()
        plt.show()
        
    def generate_data(self):
            first = [True, True, True]
            self.class_data_dic = dict()
            for i in range(self.y_train.shape[0]):
                X_temp=self.X_train[i,:].reshape(self.X_train[i,:].shape[0],1)
                if first[self.y_train[i]]:  
                    self.class_data_dic[self.y_train[i]] = X_temp
                    first[self.y_train[i]] = False
                else:
                    self.class_data_dic[self.y_train[i]]=np.append(self.class_data_dic[self.y_train[i]],X_temp,axis=1)
                    
    def summary(self):
        self.mean = [np.mean(self.class_data_dic[i], axis=1) for i in range(3)]
        self.std = [np.std(self.class_data_dic[i], axis=1) for i in range(3)]
    
    def fit(self):
        self.loadDataset()
        self.generate_data()
        self.summary()

    def posterior(self,X,X_train_class,mean,L):
        def likelyhood(x,mean,cov):
            # return (1/np.sqrt(2*np.pi*sigma))*(np.exp(-(x-mean)**2/(2*sigma**2)))
            prefix = 1/np.sqrt(2*np.pi*abs(np.linalg.det(cov)))
            x_mean = (x-mean)
            cov_inv = np.linalg.inv(cov)
            product = (x_mean.dot(cov_inv)).dot(np.transpose(x_mean))
            exponent = np.exp(-product/2)
            return prefix * exponent
        product=likelyhood(X,mean,self.cov)
        product=product*(X_train_class.shape[0]/self.X_train.shape[0])
        return product
            
    def predict(self, X_test = None):
        if X_test is None:
            X_test = self.X_test
        p_0 = self.posterior(X_test, self.class_data_dic[0], self.mean[0], self.L[0])
        p_1 = self.posterior(X_test, self.class_data_dic[1], self.mean[1], self.L[1])
        p_2 = self.posterior(X_test, self.class_data_dic[2], self.mean[2], self.L[2])
        return np.argmin(np.array([p_0, p_1, p_2]).T.dot(self.L), axis=2)[0]
    
    def plot_decision_boundary(self):
        X_set, y_set = self.X_test, self.y_test
        X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.1),
                             np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.1))
        plt.contourf(X1, X2, self.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('orange', 'green', "blue")))
        plt.xlim(X1.min(), X1.max())
        plt.ylim(X2.min(), X2.max())
        for i, j in enumerate(np.unique(y_set)):
            plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],c = ListedColormap(('red', 'green', "blue"))(i), label = j,marker='.')
        plt.title('Naive Bayes Classification our implementation(Test set)')
        plt.xlabel('X_1')
        plt.ylabel('X_2')
        plt.legend()
        plt.show()
        
    def performance_metric(self):
        # Listing out the performance metrics
        y_train_pred = self.predict(self.X_train)
        print('Train Accuracy : ',accuracy_score(self.y_train,y_train_pred),'\n',
              'Test Accuracy : ',accuracy_score(self.y_test,self.y_pred),'\n',
              'Confusion matrix : \n', confusion_matrix(self.y_test,self.y_pred))
        print('\n\n')
        print('Mean Square Error on training data : ', mean_squared_error(self.y_train,y_train_pred),'\n',
              'Mean Square Error on testing : ', mean_squared_error(self.y_test, self.y_pred))




In [None]:
naiveBayes2 = NaiveBayes2(0.8)
naiveBayes2.fit()
naiveBayes2.y_pred = naiveBayes2.predict()

In [None]:
naiveBayes2.performance_metric()

# Case 3

In [None]:
class NaiveBayes3:
    def __init__(self, train_split):
        self.train_split = train_split

    def loadDataset(self):
        dataset = pd.read_csv("../datasets/Dataset_2_Team_29.csv")
        self.train_size = int(self.train_split * dataset.shape[0])
        self.test_size = int((1 - self.train_split) * dataset.shape[0])
        self.X = dataset.iloc[:, [0, 1]].values
#         self.X = (self.X - np.min(self.X, axis=0))/(np.max(self.X, axis=0)-np.min(self.X, axis=0))
        self.y = dataset.iloc[:, 2].values
        self.X = self.X.astype(float)
        #training set split
        self.X_train=self.X[0:self.train_size,:]
        self.cov = np.diag(np.diag(np.cov(self.X, rowvar= False)))
        self.y_train=self.y[0:self.train_size]
        #testing set split 
        self.X_test=self.X[self.train_size:,:] 
        self.y_test=self.y[self.train_size:]
        self.L = np.array([[0,2,1],[2,0,3],[1,3,0]])
        
    def plot(self):
        #visualize the training set 
        X_set, y_set = self.X_train, self.y_train
        for i, j in enumerate(np.unique(y_set)):
            plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                        c = ListedColormap(('red', 'green', "blue"))(i), label = j,marker='.')
        plt.title('Training set')
        plt.xlabel('x_1')
        plt.ylabel('x_2')
        plt.legend()
        plt.show()
        
    def generate_data(self):
            first = [True, True, True]
            self.class_data_dic = dict()
            for i in range(self.y_train.shape[0]):
                X_temp=self.X_train[i,:].reshape(self.X_train[i,:].shape[0],1)
                if first[self.y_train[i]]:  
                    self.class_data_dic[self.y_train[i]] = X_temp
                    first[self.y_train[i]] = False
                else:
                    self.class_data_dic[self.y_train[i]]=np.append(self.class_data_dic[self.y_train[i]],X_temp,axis=1)
                    
    def summary(self):
        self.mean = [np.mean(self.class_data_dic[i], axis=1) for i in range(3)]
        self.std = [np.std(self.class_data_dic[i], axis=1) for i in range(3)]
        self.cov = [np.diag(np.diag(np.cov(self.class_data_dic[i].T, rowvar=False))) for i in range(3)]
    
    def fit(self):
        self.loadDataset()
        self.generate_data()
        self.summary()

    def posterior(self,X,X_train_class,mean,cov):
        def likelyhood(x,mean,cov):
            # return (1/np.sqrt(2*np.pi*sigma))*(np.exp(-(x-mean)**2/(2*sigma**2)))
            prefix = 1/np.sqrt(2*np.pi*abs(np.linalg.det(cov)))
            x_mean = (x-mean)
            cov_inv = np.linalg.inv(cov)
            product = (x_mean.dot(cov_inv)).dot(np.transpose(x_mean))/2
            exponent = np.exp(-product)
            return prefix * exponent
        product=likelyhood(X,mean,cov)
        product=product*(X_train_class.shape[0]/self.X_train.shape[0])
        return product
            
    def predict(self, X_test = None):
        if X_test is None:
            X_test = self.X_test
        p_0 = self.posterior(X_test, self.class_data_dic[0], self.mean[0], self.cov[0])
        p_1 = self.posterior(X_test, self.class_data_dic[1], self.mean[1], self.cov[1])
        p_2 = self.posterior(X_test, self.class_data_dic[2], self.mean[2], self.cov[2])
        return np.argmin(np.array([p_0, p_1, p_2]).T.dot(self.L), axis=2)[0]
    
    def plot_decision_boundary(self):
        X_set, y_set = self.X_test, self.y_test
        X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.1),
                             np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.1))
        plt.contourf(X1, X2, self.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('orange', 'green', "blue")))
        plt.xlim(X1.min(), X1.max())
        plt.ylim(X2.min(), X2.max())
        for i, j in enumerate(np.unique(y_set)):
            plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],c = ListedColormap(('red', 'green', "blue"))(i), label = j,marker='.')
        plt.title('Naive Bayes Classification our implementation(Test set)')
        plt.xlabel('X_1')
        plt.ylabel('X_2')
        plt.legend()
        plt.show()
        
    def performance_metric(self):
        # Listing out the performance metrics
        y_train_pred = self.predict(self.X_train)
        print('Train Accuracy : ',accuracy_score(self.y_train,y_train_pred),'\n',
              'Test Accuracy : ',accuracy_score(self.y_test,self.y_pred),'\n',
              'Confusion matrix : \n', confusion_matrix(self.y_test,self.y_pred))
        print('\n\n')
        print('Mean Square Error on training data : ', mean_squared_error(self.y_train,y_train_pred),'\n',
              'Mean Square Error on testing : ', mean_squared_error(self.y_test, self.y_pred))

In [None]:
naiveBayes3 = NaiveBayes3(0.8)
naiveBayes3.fit()
naiveBayes3.y_pred = naiveBayes.predict()

In [None]:
naiveBayes3.performance_metric()

# Case 4

In [None]:
class NaiveBayes4:
    def __init__(self, train_split):
        self.train_split = train_split

    def loadDataset(self):
        dataset = pd.read_csv("../datasets/Dataset_2_Team_29.csv")
        self.train_size = int(self.train_split * dataset.shape[0])
        self.test_size = int((1 - self.train_split) * dataset.shape[0])
        self.X = dataset.iloc[:, [0, 1]].values
        self.X = (self.X - np.min(self.X, axis=0))/(np.max(self.X, axis=0)-np.min(self.X, axis=0))
        self.cov = np.cov(self.X,rowvar= False)
        self.y = dataset.iloc[:, 2].values
        self.X = self.X.astype(float)
        #training set split
        self.X_train=self.X[0:self.train_size,:]
        self.y_train=self.y[0:self.train_size]
        #testing set split 
        self.X_test=self.X[self.train_size:,:] 
        self.y_test=self.y[self.train_size:]
        self.L = np.array([[0,2,1],[2,0,3],[1,3,0]])
        
    def plot(self):
        #visualize the training set 
        X_set, y_set = self.X_train, self.y_train
        for i, j in enumerate(np.unique(y_set)):
            plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                        c = ListedColormap(('red', 'green', "blue"))(i), label = j,marker='.')
        plt.title('Training set')
        plt.xlabel('x_1')
        plt.ylabel('x_2')
        plt.legend()
        plt.show()
        
    def generate_data(self):
            first = [True, True, True]
            self.class_data_dic = dict()
            for i in range(self.y_train.shape[0]):
                X_temp=self.X_train[i,:].reshape(self.X_train[i,:].shape[0],1)
                if first[self.y_train[i]]:  
                    self.class_data_dic[self.y_train[i]] = X_temp
                    first[self.y_train[i]] = False
                else:
                    self.class_data_dic[self.y_train[i]]=np.append(self.class_data_dic[self.y_train[i]],X_temp,axis=1)
            
                    
    def summary(self):
        self.mean = [np.mean(self.class_data_dic[i], axis=1) for i in range(3)]
        self.std = [np.std(self.class_data_dic[i], axis=1) for i in range(3)]

    def fit(self):
        self.loadDataset()
        self.generate_data()
        self.summary()

    def posterior(self,X,X_train_class,mean,cov):
        def likelyhood(x,mean,cov):
            # return (1/np.sqrt(2*np.pi*sigma))*(np.exp(-(x-mean)**2/(2*sigma**2)))
            prefix = 1/np.sqrt(2*np.pi*abs(np.linalg.det(cov)))
            x_mean = (x-mean)
            cov_inv = np.linalg.inv(cov)
            product = (x_mean.dot(cov_inv)).dot(np.transpose(x_mean))/2
            exponent = np.exp(-product)
            return prefix * exponent
        product=likelyhood(X,mean,cov)
        product=product*(X_train_class.shape[0]/self.X_train.shape[0])
        return product
            
    def predict(self, X_test = None):
        if X_test is None:
            X_test = self.X_test
        p_0 = self.posterior(X_test, self.class_data_dic[0], self.mean[0], self.cov)
        p_1 = self.posterior(X_test, self.class_data_dic[1], self.mean[1], self.cov)
        p_2 = self.posterior(X_test, self.class_data_dic[2], self.mean[2], self.cov)
        return np.argmin(np.array([p_0, p_1, p_2]).T.dot(self.L), axis=2)[0]
        
    def get_confusion_matrix(self):
        #getting the confusion matrix
        self.confusion_matrix = confusion_matrix(self.y_test, self.y_pred)
    
    def plot_decision_boundary(self):
        X_set, y_set = self.X_test, self.y_test
        X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                             np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
        plt.contourf(X1, X2, self.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green', "blue")))
        plt.xlim(X1.min(), X1.max())
        plt.ylim(X2.min(), X2.max())
        for i, j in enumerate(np.unique(y_set)):
            plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],c = ListedColormap(('red', 'green', "blue"))(i), label = j,marker='.')
        plt.title('Naive Bayes Classification our implementation(Test set)')
        plt.xlabel('X_1')
        plt.ylabel('X_2')
        plt.legend()
        plt.show()
    
    def performance_metric(self):
        # Listing out the performance metrics
        y_train_pred = self.predict(self.X_train)
        print('Train Accuracy : ',accuracy_score(self.y_train,y_train_pred),'\n',
              'Test Accuracy : ',accuracy_score(self.y_test,self.y_pred),'\n',
              'Confusion matrix : \n', confusion_matrix(self.y_test,self.y_pred))
        print('\n\n')
        print('Mean Square Error on training data : ', mean_squared_error(self.y_train,y_train_pred),'\n',
              'Mean Square Error on testing : ', mean_squared_error(self.y_test, self.y_pred))

In [None]:
naiveBayes4 = NaiveBayes4(0.8)
naiveBayes4.fit()
naiveBayes4.y_pred = naiveBayes4.predict()

In [None]:
naiveBayes4.performance_metric()

# Case 5

In [None]:
class NaiveBayes5:
    def __init__(self, train_split):
        self.train_split = train_split

    def loadDataset(self):
        dataset = pd.read_csv("../datasets/Dataset_2_Team_29.csv")
        self.train_size = int(self.train_split * dataset.shape[0])
        self.test_size = int((1 - self.train_split) * dataset.shape[0])
        self.X = dataset.iloc[:, [0, 1]].values
#         self.X = (self.X - np.min(self.X, axis=0))/(np.max(self.X, axis=0)-np.min(self.X, axis=0))
        self.y = dataset.iloc[:, 2].values
        self.X = self.X.astype(float)
        #training set split
        self.X_train=self.X[0:self.train_size,:]
        self.cov = np.diag(np.diag(np.cov(self.X, rowvar= False)))
        self.y_train=self.y[0:self.train_size]
        #testing set split 
        self.X_test=self.X[self.train_size:,:] 
        self.y_test=self.y[self.train_size:]
        self.L = np.array([[0,2,1],[2,0,3],[1,3,0]])
        
    def plot(self):
        #visualize the training set 
        X_set, y_set = self.X_train, self.y_train
        for i, j in enumerate(np.unique(y_set)):
            plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                        c = ListedColormap(('red', 'green', "blue"))(i), label = j,marker='.')
        plt.title('Training set')
        plt.xlabel('x_1')
        plt.ylabel('x_2')
        plt.legend()
        plt.show()
        
    def generate_data(self):
            first = [True, True, True]
            self.class_data_dic = dict()
            for i in range(self.y_train.shape[0]):
                X_temp=self.X_train[i,:].reshape(self.X_train[i,:].shape[0],1)
                if first[self.y_train[i]]:  
                    self.class_data_dic[self.y_train[i]] = X_temp
                    first[self.y_train[i]] = False
                else:
                    self.class_data_dic[self.y_train[i]]=np.append(self.class_data_dic[self.y_train[i]],X_temp,axis=1)
                    
    def summary(self):
        self.mean = [np.mean(self.class_data_dic[i], axis=1) for i in range(3)]
        self.std = [np.std(self.class_data_dic[i], axis=1) for i in range(3)]
        self.cov = [np.cov(self.class_data_dic[i].T, rowvar=False) for i in range(3)]
    
    def fit(self):
        self.loadDataset()
        self.generate_data()
        self.summary()

    def posterior(self,X,X_train_class,mean,cov):
        def likelyhood(x,mean,cov):
            # return (1/np.sqrt(2*np.pi*sigma))*(np.exp(-(x-mean)**2/(2*sigma**2)))
            prefix = 1/np.sqrt(2*np.pi*abs(np.linalg.det(cov)))
            x_mean = (x-mean)
            cov_inv = np.linalg.inv(cov)
            product = (x_mean.dot(cov_inv)).dot(np.transpose(x_mean))/2
            exponent = np.exp(-product)
            return prefix * exponent
        product=likelyhood(X,mean,cov)
        product=product*(X_train_class.shape[0]/self.X_train.shape[0])
        return product
            
    def predict(self, X_test = None):
        if X_test is None:
            X_test = self.X_test
        p_0 = self.posterior(X_test, self.class_data_dic[0], self.mean[0], self.cov[0])
        p_1 = self.posterior(X_test, self.class_data_dic[1], self.mean[1], self.cov[1])
        p_2 = self.posterior(X_test, self.class_data_dic[2], self.mean[2], self.cov[2])
        return np.argmin(np.array([p_0, p_1, p_2]).T.dot(self.L), axis=2)[0]
    
    def plot_decision_boundary(self):
        X_set, y_set = self.X_test, self.y_test
        X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.1),
                             np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.1))
        plt.contourf(X1, X2, self.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('orange', 'green', "blue")))
        plt.xlim(X1.min(), X1.max())
        plt.ylim(X2.min(), X2.max())
        for i, j in enumerate(np.unique(y_set)):
            plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],c = ListedColormap(('red', 'green', "blue"))(i), label = j,marker='.')
        plt.title('Naive Bayes Classification our implementation(Test set)')
        plt.xlabel('X_1')
        plt.ylabel('X_2')
        plt.legend()
        plt.show()
        
    def performance_metric(self):
        # Listing out the performance metrics
        y_train_pred = self.predict(self.X_train)
        print('Train Accuracy : ',accuracy_score(self.y_train,y_train_pred),'\n',
              'Test Accuracy : ',accuracy_score(self.y_test,self.y_pred),'\n',
              'Confusion matrix : \n', confusion_matrix(self.y_test,self.y_pred))
        print('\n\n')
        print('Mean Square Error on training data : ', mean_squared_error(self.y_train,y_train_pred),'\n',
              'Mean Square Error on testing : ', mean_squared_error(self.y_test, self.y_pred))

In [None]:
naiveBayes5 = NaiveBayes5(0.8)
naiveBayes5.fit()
naiveBayes5.y_pred = naiveBayes5.predict()

In [None]:
naiveBayes5.performance_metric()