In [None]:
# Import necessary libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml
import matplotlib.pyplot as plt
import seaborn as sns

# Load the MNIST dataset
print("Loading MNIST dataset...")
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist.data, mnist.target

# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the distance metrics and range of K values
distance_metrics = ['euclidean', 'manhattan', 'chebyshev']
k_values = range(1, 11)

# Store results for analysis
results = {}

# Perform classification for each distance metric and K value
for metric in distance_metrics:
    print(f"\nUsing {metric} distance:")
    metric_results = []
    for k in k_values:
        # Initialize the KNN classifier
        knn = KNeighborsClassifier(n_neighbors=k, metric=metric)

        # Fit the model
        knn.fit(X_train, y_train)

        # Predict on the test set
        y_pred = knn.predict(X_test)

        # Calculate accuracy
        accuracy = accuracy_score(y_test, y_pred)

        # Generate confusion matrix
        cm = confusion_matrix(y_test, y_pred)

        # Store results
        metric_results.append({
            'k': k,
            'accuracy': accuracy,
            'confusion_matrix': cm
        })

        print(f"K={k}: Accuracy={accuracy:.4f}")
    results[metric] = metric_results

# Visualize accuracy for each distance metric
for metric, metric_results in results.items():
    k_vals = [res['k'] for res in metric_results]
    accuracies = [res['accuracy'] for res in metric_results]

    plt.plot(k_vals, accuracies, label=f"{metric} distance")

plt.title("Accuracy vs K value for MNIST Dataset")
plt.xlabel("K Value")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

# Example: Visualize confusion matrix for a specific case
selected_metric = 'euclidean'
selected_k = 3
selected_result = next(res for res in results[selected_metric] if res['k'] == selected_k)

print(f"\nConfusion Matrix for {selected_metric} distance with K={selected_k}:")
sns.heatmap(selected_result['confusion_matrix'], annot=True, fmt="d", cmap="Blues")
plt.title(f"Confusion Matrix ({selected_metric}, K={selected_k})")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()


Loading MNIST dataset...

Using euclidean distance:
K=1: Accuracy=0.9455
K=2: Accuracy=0.9365
