In [2]:
#Import all required libraries to create a GNB Classifier Model
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import pickle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix

In [3]:
# Load the data set
Data = pd.read_csv(r"C:\Users\Vinoth\Dropbox\PC\Desktop\HOPE AI\Machine Learning\Classification_Models\DataSets\Social_Network_Ads.csv")

In [4]:
Data.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [5]:
Data.shape

(400, 5)

## To know more about the data please refer the Exploratory Data Analysis on Social Network Ads DataSet.ipynb file

In [6]:
#Convert the Nominal categorical data of "Gender" column to Numeric values

Data = pd.get_dummies(Data,drop_first=True)

In [7]:
Data.head()

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased,Gender_Male
0,15624510,19,19000,0,1
1,15810944,35,20000,0,1
2,15668575,26,43000,0,0
3,15603246,27,57000,0,0
4,15804002,19,76000,0,1


In [8]:
Data.columns

Index(['User ID', 'Age', 'EstimatedSalary', 'Purchased', 'Gender_Male'], dtype='object')

In [9]:
# Arrange the columns for better visualization and also we can drop User ID colum as it doesn't makes any sense
Data = Data.loc[:,['Gender_Male','Age', 'EstimatedSalary','Purchased']]

In [10]:
Data

Unnamed: 0,Gender_Male,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0
...,...,...,...,...
395,0,46,41000,1
396,1,51,23000,1
397,0,50,20000,1
398,1,36,33000,0


In [11]:
# Let's Seperate dependent and independent variables

independent = Data[['Gender_Male','Age', 'EstimatedSalary']]
dependent = Data[['Purchased']]

In [12]:
dependent.head()

Unnamed: 0,Purchased
0,0
1,0
2,0
3,0
4,0


In [13]:
independent.head()

Unnamed: 0,Gender_Male,Age,EstimatedSalary
0,1,19,19000
1,1,35,20000
2,0,26,43000
3,0,27,57000
4,1,19,76000


In [14]:
# Standardize the data
Std =StandardScaler()
independent_trsm = Std.fit_transform(independent)


In [15]:
independent_trsm.shape

(400, 3)

In [16]:
independent_trsm

array([[ 1.02020406, -1.78179743, -1.49004624],
       [ 1.02020406, -0.25358736, -1.46068138],
       [-0.98019606, -1.11320552, -0.78528968],
       ...,
       [-0.98019606,  1.17910958, -1.46068138],
       [ 1.02020406, -0.15807423, -1.07893824],
       [-0.98019606,  1.08359645, -0.99084367]])

In [18]:
# Lets Split Train and Test Data

X_Train,X_Test,Y_Train,Y_Test = train_test_split(independent_trsm,dependent,random_state=0,test_size=0.30)

In [19]:
# Lets Create the Model using training data and default parameters
GNB_Classifier_Model = GaussianNB()
GNB_Classifier_Model.fit(X_Train,Y_Train)

  y = column_or_1d(y, warn=True)


In [20]:
# Lets Test the model using the Test Data
Y_Predicted = GNB_Classifier_Model.predict(X_Test)

In [21]:
# Lets check the performance of the model using confusion Matrix 
confusion_matrix(Y_Predicted,Y_Test)

array([[76,  8],
       [ 3, 33]], dtype=int64)

In [22]:
# importing the classification report function from sklearn.metrics module
from sklearn.metrics import classification_report

In [23]:
clf_report =classification_report(Y_Predicted,Y_Test)

In [24]:
print(clf_report)

              precision    recall  f1-score   support

           0       0.96      0.90      0.93        84
           1       0.80      0.92      0.86        36

    accuracy                           0.91       120
   macro avg       0.88      0.91      0.89       120
weighted avg       0.91      0.91      0.91       120



In [25]:
import sklearn
sklearn.metrics.get_scorer_names()

['accuracy',
 'adjusted_mutual_info_score',
 'adjusted_rand_score',
 'average_precision',
 'balanced_accuracy',
 'completeness_score',
 'explained_variance',
 'f1',
 'f1_macro',
 'f1_micro',
 'f1_samples',
 'f1_weighted',
 'fowlkes_mallows_score',
 'homogeneity_score',
 'jaccard',
 'jaccard_macro',
 'jaccard_micro',
 'jaccard_samples',
 'jaccard_weighted',
 'matthews_corrcoef',
 'max_error',
 'mutual_info_score',
 'neg_brier_score',
 'neg_log_loss',
 'neg_mean_absolute_error',
 'neg_mean_absolute_percentage_error',
 'neg_mean_gamma_deviance',
 'neg_mean_poisson_deviance',
 'neg_mean_squared_error',
 'neg_mean_squared_log_error',
 'neg_median_absolute_error',
 'neg_root_mean_squared_error',
 'normalized_mutual_info_score',
 'precision',
 'precision_macro',
 'precision_micro',
 'precision_samples',
 'precision_weighted',
 'r2',
 'rand_score',
 'recall',
 'recall_macro',
 'recall_micro',
 'recall_samples',
 'recall_weighted',
 'roc_auc',
 'roc_auc_ovo',
 'roc_auc_ovo_weighted',
 'roc_auc_

In [27]:
pickle.dump(GNB_Classifier_Model,open(r"C:\Users\Vinoth\Dropbox\PC\Desktop\HOPE AI\Machine Learning\Classification_Models\Final_Model\GNB_Classifier_Model_Final.sav",'wb'))