<h1 style = "color : dodgerblue"> Naive Bayes Classification </h1>

<h2 style = "color : DeepSkyBlue"> Importing Libraries </h2>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

<h2 style = "color : DeepSkyBlue"> Importing Dataset </h2>

In [2]:
# Reading dataset
df = pd.read_excel('Dataset/Instagram_Naive_Bayes.xlsx')

In [3]:
# top 5 records in dataframe
df.head()

Unnamed: 0,Age,Mobile Phone,Purchased
0,15,6334,0
1,28,6667,0
2,20,14334,0
3,21,19000,0
4,15,25334,0


In [4]:
# bottom 5 records in dataframe
df.tail()

Unnamed: 0,Age,Mobile Phone,Purchased
194,28,26334,0
195,34,43000,1
196,42,11334,1
197,37,16667,1
198,33,26334,0


In [5]:
# shape : provides no. of columns & no. of rows
# columns : 3
# rows : 199
df.shape

(199, 3)

In [6]:
# The info() : Prints information about the DataFrame
# The information contains the number of columns, column labels, column data types, memory usage, range index, 
# and the number of cells in each column (non-null values)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 199 entries, 0 to 198
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype
---  ------        --------------  -----
 0   Age           199 non-null    int64
 1   Mobile Phone  199 non-null    int64
 2   Purchased     199 non-null    int64
dtypes: int64(3)
memory usage: 4.8 KB


<h2 style = "color : DeepSkyBlue"> Splitting dataset into X & y </h2>

In [7]:
df.columns

Index(['Age', 'Mobile Phone', 'Purchased'], dtype='object')

In [8]:
df.Purchased

0      0
1      0
2      0
3      0
4      0
      ..
194    0
195    1
196    1
197    1
198    0
Name: Purchased, Length: 199, dtype: int64

In [9]:
#split dataset in features and target variable
feature_cols = ['Age', 'Mobile Phone']
X = df[feature_cols] # Features
y = df['Purchased'] # Target variable

In [10]:
print(X)

     Age  Mobile Phone
0     15          6334
1     28          6667
2     20         14334
3     21         19000
4     15         25334
..   ...           ...
194   28         26334
195   34         43000
196   42         11334
197   37         16667
198   33         26334

[199 rows x 2 columns]


In [11]:
print(y)

0      0
1      0
2      0
3      0
4      0
      ..
194    0
195    1
196    1
197    1
198    0
Name: Purchased, Length: 199, dtype: int64


<h2 style = "color : DeepSkyBlue"> Splitting dataset into Training & Test set </h2>

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42) # 70% training and 30% test

<h2 style = "color : DeepSkyBlue"> Building Naive Bayes Model </h2>

In [15]:
from sklearn.naive_bayes import GaussianNB

In [16]:
# Create Naive Bayes classifer object
clf = GaussianNB()

# Train Naive Bayes Classifer
clf.fit(X_train, y_train)

In [17]:
#Predict the response for test dataset
y_pred = clf.predict(X_test)

<h2 style = "color : DeepSkyBlue"> Evaluating the Model </h2>

In [20]:
from sklearn.metrics import accuracy_score, classification_report

In [21]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy : ", round(accuracy_score(y_test, y_pred) * 100 , 2))

Accuracy :  90.0


In [24]:
report = classification_report(y_test, y_pred, target_names = feature_cols)

print('Classification Report:') 

print(report)

Classification Report:
              precision    recall  f1-score   support

         Age       0.92      0.92      0.92        25
Mobile Phone       0.87      0.87      0.87        15

    accuracy                           0.90        40
   macro avg       0.89      0.89      0.89        40
weighted avg       0.90      0.90      0.90        40

