In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data
y = mnist.target

# Split the data into training and testing sets
n_samples = len(X)
train_data = X[:int(n_samples * 0.1)]
test_data = X[int(n_samples * 0.1):]
train_labels = y[:int(n_samples * 0.1)]
test_labels = y[int(n_samples * 0.1):]

# Define the Naive Bayes classifier
nb = GaussianNB()

# Define the parameter grid to search
param_grid = {
    'var_smoothing': [1e-7, 1e-5, 1e-3, 1e-1]
}

# Perform a grid search over the parameter grid
grid_search = GridSearchCV(nb, param_grid=param_grid, cv=5)
grid_search.fit(train_data, train_labels)

# Print the best parameter combination found
print("Best parameters (Grid Search):", grid_search.best_params_)

# Perform a random search over the parameter grid
random_search = RandomizedSearchCV(nb, param_distributions=param_grid, cv=5, n_iter=10)
random_search.fit(train_data, train_labels)

# Print the best parameter combination found
print("Best parameters (Random Search):", random_search.best_params_)

# Define the Naive Bayes classifier with the best parameter combination
best_nb = GaussianNB(var_smoothing=random_search.best_params_['var_smoothing'])

# Train the Naive Bayes classifier on the training data
best_nb.fit(train_data, train_labels)

# Predict the labels of the test data
predicted_labels = best_nb.predict(test_data)

# Evaluate the performance of the Naive Bayes classifier
accuracy = accuracy_score(test_labels, predicted_labels)
precision = precision_score(test_labels, predicted_labels, average='macro')
recall = recall_score(test_labels, predicted_labels, average='macro')
f1 = f1_score(test_labels, predicted_labels, average='macro')
confusion_matrix = confusion_matrix(test_labels, predicted_labels)

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("Confusion matrix:\n", confusion_matrix)



  warn(


Best parameters (Grid Search): {'var_smoothing': 0.1}




Best parameters (Random Search): {'var_smoothing': 0.1}
Accuracy: 0.8063015873015873
Precision: 0.8194845084672376
Recall: 0.8028015655521654
F1-score: 0.8048942443963527
Confusion matrix:
 [[5602    8   34    9   10  130  125    1  238   55]
 [   0 6803   18   22    4   48   40    2  145   11]
 [  63  243 4796  186   76   20  346   64  479   42]
 [  45  292  214 4935   30  113   82   63  348  303]
 [  13   88   33    0 4110   48  134   21  144 1517]
 [ 181  146   42  632  162 3732  172   33  304  299]
 [  50  162   94    2   72  133 5587    0   64    3]
 [  23  267   29   19  140    2    9 5326  133  591]
 [  48  645   78  221  101  132   55   19 4458  418]
 [  35  163   12   44  296   18    5  126  116 5448]]
