## Naive Bayes Classification

In [1]:
# importing necessary libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

In [2]:
# importing the dataset

dataset = pd.read_csv('Social_Network_Ads.csv')
print(dataset.shape)
print(dataset.head())

(400, 3)
   Age  EstimatedSalary  Purchased
0   19            19000          0
1   35            20000          0
2   26            43000          0
3   27            57000          0
4   19            76000          0


In [3]:
# Selecting the variables

X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

In [4]:
print(X[:5])
print(y[:5])

[[   19 19000]
 [   35 20000]
 [   26 43000]
 [   27 57000]
 [   19 76000]]
[0 0 0 0 0]


In [5]:
# spliting the dataset into trian and test sets

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 0, test_size = 0.25)

In [6]:
print(X_train[:4])
print(y_train[:4])
print(X_test[:4])
print(y_test[:4])

[[    44  39000]
 [    32 120000]
 [    38  50000]
 [    32 135000]]
[0 1 0 1]
[[   30 87000]
 [   38 50000]
 [   35 75000]
 [   30 79000]]
[0 0 0 0]


In [7]:
# feature Scaling

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



In [8]:
# training the Naive Bayes model on the training set

from sklearn.naive_bayes import GaussianNB

classifier = GaussianNB()
classifier.fit(X_train, y_train)


GaussianNB(priors=None, var_smoothing=1e-09)

In [9]:
# saving the classifier and the naive bayes model

with open('naive_bayes_model', 'wb') as file:
    pickle.dump(classifier, file)
    
    
with open('scaler', 'wb') as file:
    pickle.dump(scaler, file)


In [10]:
# make a prediction for age=19 and salary=20000

print(classifier.predict(scaler.transform([[19,20000]])))

[0]


In [11]:
# make and comparing predictions for test set
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)), axis = 1)[:20])

[[0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]]


In [12]:
# making the confusion matrix

from sklearn.metrics import confusion_matrix, accuracy_score

cm = confusion_matrix(y_test, y_pred)
print(cm)

print(accuracy_score(y_test, y_pred))

[[65  3]
 [ 7 25]]
0.9


In [14]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))


Accuracy: 87.67 %
Standard Deviation: 8.95 %


In [15]:
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [0.25, 0.5, 0.75, 1], 'kernel': ['linear']},
              {'C': [0.25, 0.5, 0.75, 1], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

ValueError: Invalid parameter C for estimator GaussianNB(priors=None, var_smoothing=1e-09). Check the list of available parameters with `estimator.get_params().keys()`.

In [17]:
classifier.get_params().keys()

dict_keys(['priors', 'var_smoothing'])