# Discrete Binary Naive Bayes Classifiers in Practice
We'll read the CSV file we created using a Generative Model, and see how well the Naive Bayes Classifier works.

In [1]:
import numpy as np
import numpy.random as nprand
import math as math
import pandas as pd

df = pd.read_csv('coindata.csv', index_col=0)
df.describe()

FileNotFoundError: File b'coindata.csv' does not exist

In [2]:
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score

clf = BernoulliNB()

features = df[['Flip', 'Hat']]
labels = df['Coin']
clf.fit(features, labels)

theta_coin = [math.exp(x) for x in clf.class_log_prior_]

print('P(Coin = 0) =', theta_coin[0])
print('P(Coin = 1) =', theta_coin[1])

feature_parameters = np.exp(clf.feature_log_prob_)

print('P(Flip = 1 | Coin = 0)', feature_parameters[0][0])
print('P(Flip = 1 | Coin = 1)', feature_parameters[1][0])

print('P(Hat = 1 | Coin = 0)', feature_parameters[0][1])
print('P(Hat = 1 | Coin = 1)', feature_parameters[1][1])



P(Coin = 0) = 0.6100000000000003
P(Coin = 1) = 0.3900000000000001
P(Flip = 1 | Coin = 0) 0.10130718954248367
P(Flip = 1 | Coin = 1) 0.8852040816326534
P(Hat = 1 | Coin = 0) 0.2826797385620916
P(Hat = 1 | Coin = 1) 0.4183673469387758


In [3]:
cvs = cross_val_score(clf, features, labels, cv=10)
print(cvs)
print(np.mean(cvs))

print("Accuracy", accuracy_score(labels, clf.predict(features)))

[0.92 0.92 0.94 0.89 0.89 0.88 0.82 0.91 0.93 0.85]
0.8949999999999999
Accuracy 0.895
