## Test: Our Naive Bayes Model vs. Scikit-learn Naive Bayes, Using Random Data:

In [21]:
# Import libraries, packages, modules:

# Models (estimator classes):
from naive_bayes import NaiveBayesGaussian
from sklearn.naive_bayes import GaussianNB

# Other, for testing below:
import numpy as np
import pandas as pd
import random

In [22]:
# Test data: Make randomized data to test with:

test_num_features = random.randint(1, 51)
test_num_rows = test_num_features * 5
# OR: random.randint(0, 101) OR np.random.randint(0, 101)

test_feature_matrix = [[random.uniform(-10, 10) for feature in range(test_num_features)] for row in range(test_num_rows)]
# OR: np.random.uniform(low=-10, high=10, size=(test_num_rows, test_num_features))
test_target = [random.randint(0, 1) for row in range(test_num_rows)]
# OR: np.random.random_integers(low=0, high=1, size=(test_length,))

In [23]:
test_feature_matrix

[[-3.964093544941452,
  -7.855814227147424,
  9.751250675066352,
  4.729076889400378,
  9.493984996162823,
  6.812296929178423,
  7.30723038884895,
  -5.914173615352127,
  -1.1441788496248169,
  3.5870202961614552,
  -3.170952655145703,
  0.1829532911740035],
 [-0.5432829584681436,
  3.844922652458818,
  1.7556792374837116,
  8.758428724617634,
  -8.988361608453841,
  5.214523754255389,
  6.675590377622086,
  2.2985366661279,
  5.901018895462048,
  -6.496492587753773,
  6.241829723715192,
  -1.9167801783381613],
 [8.964721079873499,
  -7.57700524586302,
  7.094246219832677,
  6.607601921214304,
  0.3355078199833077,
  6.511947404602317,
  -2.3680362031395603,
  3.947758574984398,
  1.8449065991291818,
  3.424903455085893,
  6.236635533199909,
  7.706547094284353],
 [-8.039881193414404,
  -5.802605272095103,
  -9.739247453636837,
  9.824968865148804,
  -4.316830634030713,
  -5.9728433993448915,
  3.1241543130138467,
  -6.8931320740216195,
  6.804003133991671,
  -6.7944652421229845,
  -5

In [24]:
# Get shape of feature matrix:
print(f"Test feature matrix shape: {len(test_feature_matrix)} x {len(test_feature_matrix[0])}")

Test feature matrix shape: 60 x 12


In [25]:
test_target

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1]

In [27]:

len(test_target)

60

### Predictions and Accuracy vs. Scikit-learn's NB Model:

Our model's predictions and accuracy score:

In [28]:
# Our model's predictions:
model_nb = NaiveBayesGaussian()
model_nb.fit(X_features=test_feature_matrix, y_target=test_target)
model_nb.predict(X_features=test_feature_matrix)

[1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0]

In [29]:
# Our model's accuracy score:
model_nb.score(X_features=test_feature_matrix, y_target=test_target)

0.75

Scikit-learn Naive Bayes (Gaussian) model's predictions and accuracy score:

In [31]:
# Scikit-learn model's predictions:
from sklearn.naive_bayes import GaussianNB

sklearn_model_nb = GaussianNB()
sklearn_model_nb.fit(X=test_feature_matrix, y=test_target)
sklearn_predictions = sklearn_model_nb.predict(X=test_feature_matrix)
sklearn_predictions

array([1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0,
       1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0])

In [32]:
# Scikit-learn model's accuracy score:
from sklearn.metrics import accuracy_score

accuracy_score(y_true=test_target, y_pred=sklearn_predictions)

0.75