# Gaussian Naive Bayes Classifier: Fit and evaluate a model

In [6]:
import joblib
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

In [7]:
tr_features=pd.read_csv('train_features.csv')
tr_labels=pd.read_csv('train_labels.csv')

te_features=pd.read_csv('test_features.csv')
te_labels=pd.read_csv('test_labels.csv')

In [8]:
def print_results(results):
    print('Best PARAMS: {}\n'.format(results.best_params_))
    means=results.cv_results_['mean_test_score']
    stds=results.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, results.cv_results_['params']):
        print('{} (+/-{}) for {}' .format(round(mean,3), round(std*2,3), params))

In [9]:
GaussianNB()

GaussianNB(priors=None, var_smoothing=1e-09)

In [10]:
gnb=GaussianNB()

In [11]:
gnb.fit(tr_features, tr_labels.values.ravel())

GaussianNB(priors=None, var_smoothing=1e-09)

In [12]:
pred_te= gnb.predict(te_features)
pred_te

array([1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1,
       1, 1], dtype=int64)

In [13]:
prob_te=gnb.predict_proba(te_features)
prob_te


array([[3.80295089e-04, 9.99619705e-01],
       [3.44678250e-01, 6.55321750e-01],
       [9.03912850e-01, 9.60871503e-02],
       [3.59864467e-01, 6.40135533e-01],
       [3.69786125e-01, 6.30213875e-01],
       [9.03912850e-01, 9.60871503e-02],
       [9.84987107e-01, 1.50128933e-02],
       [9.71295023e-01, 2.87049773e-02],
       [9.83894128e-01, 1.61058724e-02],
       [9.04364598e-01, 9.56354021e-02],
       [9.67261039e-01, 3.27389612e-02],
       [9.50793824e-01, 4.92061755e-02],
       [9.84980287e-01, 1.50197133e-02],
       [9.54247117e-01, 4.57528831e-02],
       [9.83582606e-01, 1.64173939e-02],
       [9.83789610e-01, 1.62103905e-02],
       [4.40031680e-03, 9.95599683e-01],
       [2.71428224e-01, 7.28571776e-01],
       [8.03118314e-01, 1.96881686e-01],
       [9.82313329e-01, 1.76866713e-02],
       [9.85686009e-01, 1.43139908e-02],
       [1.71965006e-01, 8.28034994e-01],
       [9.83117802e-01, 1.68821977e-02],
       [3.08807806e-02, 9.69119219e-01],
       [9.844771

In [14]:
print(classification_report(te_labels, pred_te))
print(confusion_matrix(te_labels, pred_te))

              precision    recall  f1-score   support

           0       0.83      0.81      0.82       113
           1       0.68      0.72      0.70        65

    accuracy                           0.78       178
   macro avg       0.76      0.76      0.76       178
weighted avg       0.78      0.78      0.78       178

[[91 22]
 [18 47]]


In [15]:
joblib.dump(gnb, 'GNB_model.pkl')


['GNB_model.pkl']