In [33]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier

# Load the dataset
df = pd.read_csv("mushrooms.csv")

# Separate target and features
y = df["class"]
X = df.drop("class", axis=1)

# Encode categorical variables (since mushroom dataset is all strings)
X = pd.get_dummies(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Set up hyperparameter grid
param_grid = {
    "n_neighbors": [3, 5, 7, 9, 11, 13, 15, 17, 19],
    "weights": ["uniform", "distance"],
    "metric": ["euclidean", "manhattan"],
}

# Initialize KNN
knn = KNeighborsClassifier()

# Set up GridSearchCV with 5-fold cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring="accuracy", n_jobs=-1)

# Fit on training data
grid_search.fit(X_train, y_train)

# Best parameters
print("Best hyperparameters:", grid_search.best_params_)

Best hyperparameters: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}


In [34]:
from sklearn.metrics import accuracy_score

# Predict on test set using the best model
y_pred = grid_search.best_estimator_.predict(X_test)

# Accuracy on test set
test_acc = accuracy_score(y_test, y_pred)
print("Test accuracy:", test_acc)

Test accuracy: 1.0
