# Predict hire/no hire using Naive Bayes Classification

In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [57]:
data = pd.read_csv("NewPastHires.csv")

In [58]:
data.head()

Unnamed: 0.1,Unnamed: 0,TotalExperience,CurrentlyEmployed,NumberOfPreviousEmployers,EducationLevel,AttendedTopTierSchool,DidInternship,WasCandidateHired
0,0,10,Y,4,BS,N,N,Y
1,1,0,N,0,BS,Y,Y,Y
2,2,7,N,6,BS,N,N,N
3,3,2,Y,1,MS,Y,N,Y
4,4,20,N,2,PhD,Y,N,N


In [59]:
from sklearn import preprocessing 

In [60]:
le = preprocessing.LabelEncoder()

In [61]:
# Code to transform values into numerical. All these are the selected features. 
TotalExperience_encoded = le.fit_transform(data['TotalExperience'])
CurrentlyEmployed_encoded = le.fit_transform(data['CurrentlyEmployed'])
NumberOfPreviousEmployers_encoded = le.fit_transform(data['NumberOfPreviousEmployers'])
EducationLevel_encoded = le.fit_transform(data['EducationLevel'])
AttendedTopTierSchool_encoded = le.fit_transform(data['AttendedTopTierSchool'])
DidInternship_encoded = le.fit_transform(data['DidInternship'])

In [62]:
label = le.fit_transform(data['WasCandidateHired'])
print(label)

[1 1 0 1 0 1 1 1 1 0 0 1 1]


In [63]:
features = list(zip(TotalExperience_encoded, CurrentlyEmployed_encoded, NumberOfPreviousEmployers_encoded, EducationLevel_encoded,
             AttendedTopTierSchool_encoded, DidInternship_encoded))

In [64]:
features

[(7, 1, 3, 0, 0, 0),
 (0, 0, 0, 0, 1, 1),
 (6, 0, 5, 0, 0, 0),
 (2, 1, 1, 1, 1, 0),
 (9, 0, 2, 2, 1, 0),
 (0, 0, 0, 2, 1, 1),
 (5, 1, 2, 1, 0, 1),
 (3, 0, 1, 0, 0, 1),
 (8, 1, 4, 0, 0, 0),
 (0, 0, 0, 0, 0, 0),
 (1, 0, 1, 2, 1, 0),
 (4, 1, 1, 0, 0, 1),
 (0, 0, 0, 2, 1, 0)]

In [65]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()

In [66]:
# Fitting data into the naive bayes model
model.fit(features,label)

GaussianNB(priors=None, var_smoothing=1e-09)

In [67]:
#hirePredict consists of the predicted values
hirePredict = model.predict(features)

In [68]:
hirePredict

array([1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0])

# Accuracy of the model

In [69]:
from sklearn import metrics

In [70]:
#code to print the accuracy of the model
print("Accuracy:",metrics.accuracy_score(label,hirePredict))

Accuracy: 0.9230769230769231


# Prediction with New Employee data

In [None]:
# An array that consists of data of a new employee
newEmployee = np.array([[5,1,5,1,1,1]])

In [72]:
newEmployee

array([[5, 1, 5, 1, 1, 1]])

In [73]:
# Model predicting hire/no hire decision based on the newEmployee data. 
#The model shows, the new employee will be hired. 
predicted= model.predict(newEmployee) 
print ("Predicted Value:", predicted)

Predicted Value: [1]


# Test Scenario - 

Verify any change in accuracy without normalizing TotalExperience and NumberOfPreviousEmployers.

In [76]:
features_2 = list(zip(data['TotalExperience'], CurrentlyEmployed_encoded, data['NumberOfPreviousEmployers'], EducationLevel_encoded,
             AttendedTopTierSchool_encoded, DidInternship_encoded))

In [77]:
features_2

[(10, 1, 4, 0, 0, 0),
 (0, 0, 0, 0, 1, 1),
 (7, 0, 6, 0, 0, 0),
 (2, 1, 1, 1, 1, 0),
 (20, 0, 2, 2, 1, 0),
 (0, 0, 0, 2, 1, 1),
 (5, 1, 2, 1, 0, 1),
 (3, 0, 1, 0, 0, 1),
 (15, 1, 5, 0, 0, 0),
 (0, 0, 0, 0, 0, 0),
 (1, 0, 1, 2, 1, 0),
 (4, 1, 1, 0, 0, 1),
 (0, 0, 0, 2, 1, 0)]

In [78]:
model_2 = GaussianNB()

In [79]:
model_2.fit(features_2,label)

GaussianNB(priors=None, var_smoothing=1e-09)

In [80]:
hirePredict_2 = model_2.predict(features_2)

In [81]:
hirePredict_2

array([1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0])

In [82]:
print("Accuracy:",metrics.accuracy_score(label,hirePredict_2))

Accuracy: 0.9230769230769231


Conclusion : The accuracy doesnt change. 