# Naive Bayes Classifier (Self Made)

### 1. Importing Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from collections import defaultdict

### 2. Data Preprocessing

In [2]:
pima = pd.read_csv("diabetes.csv")

pima.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
pima.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [4]:
#normalizing the dataset
scalar = preprocessing.MinMaxScaler()
pima = scalar.fit_transform(pima)

#split dataset in features and target variable

X = pima[:,:8]
y = pima[:, 8]

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.3, random_state=42)


print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)


(537, 8) (231, 8) (537,) (231,)


### 3. Required Functions

In [14]:

def normal_distr(x, mean, dev):
    
    #finding the value through the normal distribution formula
    return (1/(np.sqrt(2 * np.pi) * dev)) * (np.exp(- (((x - mean) / dev) ** 2) / 2))

def finding_mean(X):
    
    return np.mean(X)

def finding_std_dev(X):
    
    return np.std(X)

#def pred(X_test):
    

def train(X_train,Y_train):
    
    labels = set(Y_train)
    
    cnt_table = defaultdict(list)
    
    for row in range(X_train.shape[0]):
        
        for col in range(X_train.shape[1]):
            
            cnt_table[(col, Y_train[row])].append(X_train[row][col])
            
    
    lookup_list = defaultdict(list)
    
    for item in cnt_table.items():
        
        X_category = np.asarray(item[1])
        
        lookup_list[(item[0][0], item[0][1])].append(finding_mean(X_category))
        lookup_list[(item[0][0], item[0][1])].append(finding_std_dev(X_category))
        
    
    return lookup_list


def pred(X_test, lookup_list):
    
    Y_pred = []
    
    
    for row in range(X_test.shape[0]):
        
        prob_yes = 1
        prob_no = 1
        for col in range(X_test.shape[1]):
            
            prob_yes = prob_yes * normal_distr(X_test[row][col], lookup_list[(col, 1)][0], lookup_list[(col, 1)][1])
            prob_no = prob_no * normal_distr(X_test[row][col], lookup_list[(col, 0)][0], lookup_list[(col, 1)][1])
            
        if(prob_yes >= prob_no):
            
            Y_pred.append(1)
            
        else:
            
            Y_pred.append(0)
            
    
    return np.asarray(Y_pred)


def score(Y_pred, Y_test):
    
    correct_pred = np.sum(Y_pred == Y_test)
    
    return correct_pred / Y_pred.shape[0]


def naive_bayes(X_train,Y_train, X_test, Y_test):
    
    lookup_list = train(X_train, Y_train)
    
    Y_pred = pred(X_test, lookup_list)
    
    return score(Y_pred, Y_test)
    
    

In [15]:
score = naive_bayes(X_train, Y_train, X_test, Y_test)

<class 'numpy.ndarray'>


In [11]:
print("The accuracy of the model is : {0}".format(score))

The accuracy of the model is : -0.24912414318354914
