In [None]:
# k_Nearest_Neighbors
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier

In [None]:
# Load taining data
Data_train = pd.read_csv('mushroom_train.csv')

In [None]:
# Load test data
Data_test = pd.read_csv('mushroom_test.csv')

In [None]:
# Separate training data features and labels
X = Data_train.drop(columns='class')
y = Data_train['class']

# Define column transformer
numerical_features = ['cap-diameter', 'stem-height', 'stem-width']
categorical_features = ['cap-shape', 'cap-surface', 'cap-color', 'does-bruise-or-bleed',
                        'gill-attachment', 'gill-spacing', 'gill-color', 'stem-color', 'has-ring',
                        'ring-type', 'habitat', 'season']

column_transformer = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(), categorical_features)
])

# Create the pipeline
transformer = Pipeline(steps=[('transformer', column_transformer)])

# Fit and transform training data
X_data = transformer.fit_transform(X)

le = LabelEncoder()
y_data = le.fit_transform(y)

# Split data into training data and validation data
X_train, X_val, y_train, y_val = train_test_split(X_data, y_data, test_size=0.2, random_state=42)

In [None]:
# Separate test data features and labels
Xtest = Data_test.drop(columns='class')
ytest = Data_test['class']

# Transform test data using the same pipeline
X_test = transformer.transform(Xtest)

y_test = le.transform(ytest)

In [None]:
# K Nearest Neighbors
n_values = [5, 10, 15, 20, 50, 100]

for n_neighbors in n_values:
    print("k=",n_neighbors)
    # Initialize the KNeighborsClassifier with the chosen number of neighbors
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)

    knn.fit(X_train, y_train)

    # Predict for validation data
    y_pred_val = knn.predict(X_val)

    accuracy_val = accuracy_score(y_val, y_pred_val)
    f1_val = f1_score(y_val, y_pred_val, average='binary', pos_label=1)

    # Predict for test data
    y_test_pred = knn.predict(X_test)

    # Calculate the accuracy of the model on the test data
    test_accuracy = accuracy_score(y_test, y_test_pred)
    f1_test = f1_score(y_test, y_test_pred, average='binary', pos_label=1)

    # Print the accuracy, classification report, and confusion matrix for the test data
    print(f"Test Accuracy: {test_accuracy:.2f}")
    print("Test F1-score:", f1_test)

In [None]:
# K Nearest Neighbors
n_neighbors = 5

# Initialize the KNeighborsClassifier with the chosen number of neighbors
knn = KNeighborsClassifier(n_neighbors=n_neighbors)

knn.fit(X_train, y_train)

# Predict for validation data
y_pred_val = knn.predict(X_val)

accuracy_val = accuracy_score(y_val, y_pred_val)
f1_val = f1_score(y_val, y_pred_val, average='binary', pos_label=1)

print("Validation Accuracy: {accuracy_val}")
print("Validation F1-score:", f1_val)

# Predict for test data
y_test_pred = knn.predict(X_test)

# Calculate the accuracy of the model on test data
test_accuracy = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='binary', pos_label=1)

print("Test Accuracy: {test_accuracy}")
print("Test F1-score:", f1_test)

# Generate the confusion matrix
cm = confusion_matrix(y_test, y_test_pred)

# Create a heatmap using the Seaborn library
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, cmap="Blues", fmt='g', xticklabels=['Edible', 'Poisonous'], yticklabels=['Edible', 'Poisonous'])

plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Test Confusion Matrix')

plt.show()