# Naive Bayes

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('image_bins_stats_bkurto.csv')
X = dataset[['bkurto_bins0','bkurto_bins1','bkurto_bins2','bkurto_bins3','bkurto_bins4','bkurto_bins5','bkurto_bins6','bkurto_bins7']]
y = dataset.iloc[:, -1].values

In [3]:
print(X)

       bkurto_bins0  bkurto_bins1  bkurto_bins2  bkurto_bins3  bkurto_bins4  \
0         47.621590      3.092351           0.0           0.0      0.000000   
1          7.154429      0.840896           0.0           0.0      0.000000   
2         43.394240      0.420448           0.0           0.0      7.701832   
3          6.601182      2.619225           0.0           0.0      0.000000   
4          6.869720      0.000000           0.0           0.0      0.000000   
...             ...           ...           ...           ...           ...   
23995      8.170191      0.000000           0.0           0.0      0.000000   
23996      7.859719      0.000000           0.0           0.0      0.000000   
23997      7.887594      0.000000           0.0           0.0      0.000000   
23998      7.406609      0.000000           0.0           0.0      0.000000   
23999      8.664782      0.000000           0.0           0.0      0.000000   

       bkurto_bins5  bkurto_bins6  bkurto_bins7  
0

In [4]:
print(y)

[1 1 1 ... 0 0 0]


## Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [6]:
print(X_train)

       bkurto_bins0  bkurto_bins1  bkurto_bins2  bkurto_bins3  bkurto_bins4  \
11524      4.697539      0.000000           0.0           0.0      0.000000   
15653      1.569338      0.000000           0.0           0.0      0.000000   
7256       1.539324      0.000000           0.0           0.0      6.449485   
15235      1.854283      0.000000           0.0           0.0      0.000000   
9717      33.515377      2.692971           0.0           0.0      5.072606   
...             ...           ...           ...           ...           ...   
13123      6.592747      0.000000           0.0           0.0      0.000000   
19648      1.480392      0.000000           0.0           0.0      0.000000   
9845      43.518837      6.420400           0.0           0.0      1.808437   
10799      6.627811      0.000000           0.0           0.0      0.000000   
2732      47.035442      3.984198           0.0           0.0      0.000000   

       bkurto_bins5  bkurto_bins6  bkurto_bins7  
1

In [7]:
print(y_train)

[1 0 1 ... 1 1 1]


In [8]:
print(X_test)

       bkurto_bins0  bkurto_bins1  bkurto_bins2  bkurto_bins3  bkurto_bins4  \
5118       2.380153      0.000000           0.0           0.0      0.000000   
10284      7.340075      2.300521           0.0           0.0      0.000000   
6208       1.414617      0.000000           0.0           0.0      0.000000   
3361      46.133754      4.357952           0.0           0.0      0.000000   
7068       1.634967      0.000000           0.0           0.0      0.000000   
...             ...           ...           ...           ...           ...   
15607      2.425029      0.000000           0.0           0.0      0.000000   
9154      50.215545      1.812567           0.0           0.0      8.060602   
3464       6.637429      0.000000           0.0           0.0      0.000000   
9808       7.379634      0.420448           0.0           0.0      1.847616   
6956       1.420615      0.000000           0.0           0.0      0.000000   

       bkurto_bins5  bkurto_bins6  bkurto_bins7  
5

In [9]:
print(y_test)

[1 1 1 ... 1 1 1]


## Training the Naive Bayes model on the Training set

In [10]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

GaussianNB()

## Making the Confusion Matrix

In [11]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[2876   83]
 [ 398 2643]]


0.9198333333333333

In [12]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      0.97      0.92      2959
           1       0.97      0.87      0.92      3041

    accuracy                           0.92      6000
   macro avg       0.92      0.92      0.92      6000
weighted avg       0.92      0.92      0.92      6000



In [16]:
from sklearn import svm, metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
print(confusion_matrix(y_test, y_pred))

print('Accuracy: {0:.3f}'.format(metrics.accuracy_score(y_test, y_pred)))
print('F1 Score: {0:.3f}'.format(metrics.f1_score(y_test, y_pred)))
print('Sensitivity: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))
print('Precision: {0:.3f}'.format(metrics.precision_score(y_test, y_pred)))
print('Recall: {0:.3f}'.format(metrics.recall_score(y_test, y_pred)))

[[2876   83]
 [ 398 2643]]
Accuracy: 0.920
F1 Score: 0.917
Sensitivity: 0.869
Precision: 0.970
Recall: 0.869


In [14]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 92.35 %
Standard Deviation: 0.41 %
