Lab 4: K-Nearest Neighbors (KNN) Classifier
This script demonstrates KNN classification algorithm.

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris, make_classification
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns


In [None]:
def basic_knn_classification():
    """Demonstrate basic KNN classification"""
    print("=" * 50)
    print("Basic KNN Classification on Iris Dataset")
    print("=" * 50)
    
    # Load dataset
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    print(f"\nDataset shape: {X.shape}")
    print(f"Classes: {iris.target_names}")
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )
    
    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Create and train KNN classifier
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X_train_scaled, y_train)
    
    # Make predictions
    y_pred = knn.predict(X_test_scaled)
    
    # Evaluate
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nAccuracy: {accuracy:.4f}")
    
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=iris.target_names))
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=iris.target_names,
                yticklabels=iris.target_names)
    plt.title('Confusion Matrix - KNN Classifier')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.savefig('lab4_confusion_matrix.png')
    plt.close()
    print("\nConfusion matrix saved as 'lab4_confusion_matrix.png'")


In [None]:
def find_optimal_k():
    """Find optimal value of K"""
    print("\n" + "=" * 50)
    print("Finding Optimal K Value")
    print("=" * 50)
    
    # Load dataset
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    # Split and scale data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Test different K values
    k_values = range(1, 31)
    train_scores = []
    test_scores = []
    
    for k in k_values:
        knn = KNeighborsClassifier(n_neighbors=k)
        knn.fit(X_train_scaled, y_train)
        
        train_score = knn.score(X_train_scaled, y_train)
        test_score = knn.score(X_test_scaled, y_test)
        
        train_scores.append(train_score)
        test_scores.append(test_score)
    
    # Find optimal K
    optimal_k = k_values[np.argmax(test_scores)]
    print(f"\nOptimal K value: {optimal_k}")
    print(f"Best test accuracy: {max(test_scores):.4f}")
    
    # Plot results
    plt.figure(figsize=(10, 6))
    plt.plot(k_values, train_scores, 'o-', label='Training Accuracy')
    plt.plot(k_values, test_scores, 's-', label='Test Accuracy')
    plt.axvline(x=optimal_k, color='r', linestyle='--', 
                label=f'Optimal K = {optimal_k}')
    plt.xlabel('K Value')
    plt.ylabel('Accuracy')
    plt.title('KNN: Accuracy vs K Value')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('lab4_optimal_k.png')
    plt.close()
    print("\nOptimal K plot saved as 'lab4_optimal_k.png'")


In [None]:
def compare_distance_metrics():
    """Compare different distance metrics"""
    print("\n" + "=" * 50)
    print("Comparing Distance Metrics")
    print("=" * 50)
    
    # Load dataset
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    # Split and scale data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Test different metrics
    metrics = ['euclidean', 'manhattan', 'minkowski']
    results = {}
    
    for metric in metrics:
        knn = KNeighborsClassifier(n_neighbors=5, metric=metric)
        knn.fit(X_train_scaled, y_train)
        accuracy = knn.score(X_test_scaled, y_test)
        results[metric] = accuracy
        print(f"\n{metric.capitalize()} distance: {accuracy:.4f}")
    
    # Visualize comparison
    plt.figure(figsize=(10, 6))
    plt.bar(results.keys(), results.values(), color=['blue', 'green', 'orange'])
    plt.ylabel('Accuracy')
    plt.title('KNN: Comparison of Distance Metrics')
    plt.ylim([0.9, 1.0])
    for i, (metric, acc) in enumerate(results.items()):
        plt.text(i, acc + 0.005, f'{acc:.4f}', ha='center')
    plt.tight_layout()
    plt.savefig('lab4_distance_metrics.png')
    plt.close()
    print("\nDistance metrics comparison saved as 'lab4_distance_metrics.png'")


In [None]:
def knn_with_cross_validation():
    """Demonstrate KNN with cross-validation"""
    print("\n" + "=" * 50)
    print("KNN with Cross-Validation")
    print("=" * 50)
    
    # Load dataset
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    # Standardize
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Test with different K values using cross-validation
    k_values = [3, 5, 7, 9, 11]
    
    print("\nCross-validation results (5-fold):")
    for k in k_values:
        knn = KNeighborsClassifier(n_neighbors=k)
        scores = cross_val_score(knn, X_scaled, y, cv=5)
        print(f"K={k}: Mean accuracy = {scores.mean():.4f} (+/- {scores.std() * 2:.4f})")


In [None]:
def decision_boundary_visualization():
    """Visualize decision boundaries"""
    print("\n" + "=" * 50)
    print("Decision Boundary Visualization")
    print("=" * 50)
    
    # Load iris and use only 2 features for visualization
    iris = load_iris()
    X = iris.data[:, :2]  # Use only first two features
    y = iris.target
    
    # Standardize
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Train KNN
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X_scaled, y)
    
    # Create mesh for decision boundary
    h = 0.02
    x_min, x_max = X_scaled[:, 0].min() - 1, X_scaled[:, 0].max() + 1
    y_min, y_max = X_scaled[:, 1].min() - 1, X_scaled[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    
    # Predict on mesh
    Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    # Plot
    plt.figure(figsize=(10, 6))
    plt.contourf(xx, yy, Z, alpha=0.4, cmap='RdYlBu')
    plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=y, cmap='RdYlBu', 
                edgecolors='black', s=50)
    plt.xlabel(iris.feature_names[0])
    plt.ylabel(iris.feature_names[1])
    plt.title('KNN Decision Boundaries (K=5)')
    plt.colorbar(label='Class')
    plt.tight_layout()
    plt.savefig('lab4_decision_boundary.png')
    plt.close()
    print("\nDecision boundary saved as 'lab4_decision_boundary.png'")


In [None]:
def main():
    """Main function to demonstrate KNN classifier"""
    print("\n" + "=" * 50)
    print("Lab 4: K-Nearest Neighbors (KNN) Classifier")
    print("=" * 50)
    
    # Basic KNN classification
    basic_knn_classification()
    
    # Find optimal K
    find_optimal_k()
    
    # Compare distance metrics
    compare_distance_metrics()
    
    # Cross-validation
    knn_with_cross_validation()
    
    # Visualize decision boundaries
    decision_boundary_visualization()
    
    print("\n" + "=" * 50)
    print("Lab 4 Complete!")
    print("=" * 50)


In [None]:
if __name__ == "__main__":
    main()
