# Naive Bayes Notebook

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
import statsmodels.tools.tools as stattools

In [2]:
employee = pd.read_csv("../data/employee_cleaned.csv")

## Encoding Categories via panda dummy variables

In [3]:
employee = pd.get_dummies(employee, columns=["City","EverBenched","JoiningYear"])
employee = employee.drop(columns = ['Age','Education','PaymentTier','Gender','ExperienceInCurrentDomain'])

## Partition Data prior to training model

In [4]:
X = employee.loc[:, employee.columns != 'LeaveOrNot']
y = employee.loc[:, employee.columns == 'LeaveOrNot']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40, random_state=7)

## Running Naive Bayes Algorithm

In [6]:
nb = MultinomialNB().fit(X_train, y_train.values.ravel())

In [7]:
y_pred = nb.predict(X_test)

## Evaluating Naive Bayes Predictions

In [8]:
ypred = pd.crosstab(y_test['LeaveOrNot'], y_pred, rownames = ['Actual'], colnames = ['Predicted'])
ypred['Total'] = ypred.sum(axis=1); ypred.loc['Total'] = ypred.sum()
print(ypred)

Predicted    0    1  Total
Actual                    
0          643    7    650
1          340  116    456
Total      983  123   1106


In [9]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))

Accuracy: 0.6862567811934901
Precision: 0.943089430894309
Recall: 0.2543859649122807
