In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_breast_cancer

import warnings
warnings.filterwarnings("ignore")

In [13]:
breast_cancer = load_breast_cancer()
df = pd.DataFrame(data=breast_cancer.data, columns=breast_cancer.feature_names)
df['target'] = breast_cancer.target

In [14]:
df.sample(4)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
53,18.22,18.7,120.3,1033.0,0.1148,0.1485,0.1772,0.106,0.2092,0.0631,...,24.13,135.1,1321.0,0.128,0.2297,0.2623,0.1325,0.3021,0.07987,0
369,22.01,21.9,147.2,1482.0,0.1063,0.1954,0.2448,0.1501,0.1824,0.0614,...,25.8,195.0,2227.0,0.1294,0.3885,0.4756,0.2432,0.2741,0.08574,0
208,13.11,22.54,87.02,529.4,0.1002,0.1483,0.08705,0.05102,0.185,0.0731,...,29.16,99.48,639.3,0.1349,0.4402,0.3162,0.1126,0.4128,0.1076,1
210,20.58,22.14,134.7,1290.0,0.0909,0.1348,0.164,0.09561,0.1765,0.05024,...,27.84,158.3,1656.0,0.1178,0.292,0.3861,0.192,0.2909,0.05865,0


In [15]:
X = df.drop(columns=['target'], axis=1)
y = df.target

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [18]:
naive_bayes = GaussianNB()
naive_bayes.fit(X_train, y_train)

In [19]:
y_pred = naive_bayes.predict(X_test)

In [20]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy: .2f}')

cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix: ')
print(cm)

class_report = classification_report(y_test, y_pred)
print('Classification Report: ')
print(class_report)

Accuracy:  0.96
Confusion Matrix: 
[[40  3]
 [ 1 70]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114



In [21]:
# Hyperparameter Tuning

var_smoothing_values = [1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3]

param_grid = {'var_smoothing': var_smoothing_values}

grid_search = GridSearchCV(naive_bayes, param_grid, cv=3, scoring='accuracy')

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_

print('Best Hyperparameters: ')
print(best_params)

best_naive_bayes = grid_search.best_estimator_
y_pred_best = best_naive_bayes.predict(X_test)

Best Hyperparameters: 
{'var_smoothing': 1e-10}


In [22]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy: .2f}')

cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix: ')
print(cm)

class_report = classification_report(y_test, y_pred)
print('Classification Report: ')
print(class_report)

Accuracy:  0.96
Confusion Matrix: 
[[40  3]
 [ 1 70]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

