In [None]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, cross_val_score, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00325/Sensorless_drive_diagnosis.txt"
data = pd.read_csv(url, sep=" ", header=None)

# Separate the features and target variables
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Define the model
model = GaussianNB()

# Define the parameter grid for grid search and random search
param_grid = {'var_smoothing': np.logspace(0, -12, num=50)}
random_grid = {'var_smoothing': np.logspace(0, -12, num=50)}

# Perform grid search with 5-fold cross validation
grid_search = GridSearchCV(model, param_grid, cv=10, n_jobs=-1, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_params_grid = grid_search.best_params_
print('Best parameters (Grid Search):', best_params_grid)

# Perform random search with 5-fold cross validation
random_search = RandomizedSearchCV(model, random_grid, cv=10, n_jobs=-1, scoring='accuracy')
random_search.fit(X_train, y_train)
best_params_random = random_search.best_params_
print('Best parameters (Random Search):', best_params_random)

# Perform k-fold cross validation
cv_scores = cross_val_score(model, X_train, y_train, cv=10, scoring='accuracy')
print('Cross-validation scores:', cv_scores)
print('Mean cross-validation score:', np.mean(cv_scores))

# Train and predict using the model with the best parameters from grid search
model_best = GaussianNB(var_smoothing=best_params_grid['var_smoothing'])
model_best.fit(X_train, y_train)
y_pred = model_best.predict(X_test)

# Calculate the accuracy, confusion matrix, and classification report
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
print('Accuracy:', accuracy)
print('Confusion matrix:\n', conf_matrix)
print('Classification report:\n', class_report)

Best parameters (Grid Search): {'var_smoothing': 1.5264179671752366e-09}
Best parameters (Random Search): {'var_smoothing': 2.6826957952797275e-09}
Cross-validation scores: [0.75541208 0.74952526 0.75332321 0.73775161 0.7459172  0.75560197
 0.74401823 0.75655146 0.75688509 0.7711301 ]
Mean cross-validation score: 0.7526116220128121
Accuracy: 0.7552555118783114
Confusion matrix:
 [[515   0   0   0   0   4   0   0  13   0   0]
 [  0 323   0   0   0   0   0   0  23 188   1]
 [  0   0 500  11   7   0   0   0   0   0   0]
 [  0   1  30 472   0   0   1   0   0   0   0]
 [  0   0 131 147  85   0   0 152   0   0   0]
 [247   1  27   0   0 152   0   3 108   0   0]
 [  0   1   0   0   0   0 541   0   0   0   0]
 [  0   1  18  39  10   0   0 463   0   0   0]
 [103   1   2   0   0 108   0   0 309   1   0]
 [  0  43   0   0   0   0   0   0   9 475   0]
 [  0   1   0   0   0   0   0   0   0   0 584]]
Classification report:
               precision    recall  f1-score   support

           1       0.