# 5.2 Supervised Learning Part 1 Tutorial

This notebook covers fundamental supervised learning algorithms including:
- Naive Bayes
- k-Nearest Neighbors (KNN)
- Support Vector Machines (SVM)
- Decision Trees

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.datasets import make_classification, make_moons

# Set random seed for reproducibility
np.random.seed(42)

## 1. Naive Bayes

Let's implement and analyze a Naive Bayes classifier.

In [None]:
# Generate dataset
X, y = make_classification(n_samples=1000, n_features=2, n_redundant=0, 
                          n_informative=2, random_state=42, 
                          n_clusters_per_class=1)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Make predictions
y_pred = nb_model.predict(X_test)

# Print performance metrics
print("Naive Bayes Performance:")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Visualize decision boundary
def plot_decision_boundary(X, y, model, title):
    h = 0.02  # Step size in the mesh
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.figure(figsize=(10, 8))
    plt.contourf(xx, yy, Z, alpha=0.4)
    plt.scatter(X[:, 0], X[:, 1], c=y, alpha=0.8)
    plt.title(title)
    plt.show()

plot_decision_boundary(X, y, nb_model, "Naive Bayes Decision Boundary")

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Naive Bayes')
plt.show()

## 2. k-Nearest Neighbors (KNN)

Let's explore the KNN algorithm and its hyperparameters.

In [None]:
# Generate non-linear dataset
X, y = make_moons(n_samples=1000, noise=0.15, random_state=42)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Try different values of k
k_values = [1, 3, 5, 15]
plt.figure(figsize=(20, 5))

for i, k in enumerate(k_values, 1):
    # Create and train model
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    
    # Plot decision boundary
    plt.subplot(1, 4, i)
    plot_decision_boundary(X, y, knn, f"KNN (k={k})")
    
    # Print accuracy
    y_pred = knn.predict(X_test)
    print(f"\nAccuracy for k={k}: {accuracy_score(y_test, y_pred):.4f}")

plt.tight_layout()
plt.show()

# Find optimal k using cross-validation
k_range = range(1, 31)
cv_scores = []

for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train, y_train, cv=5, scoring='accuracy')
    cv_scores.append(scores.mean())

# Plot cross-validation results
plt.figure(figsize=(10, 6))
plt.plot(k_range, cv_scores, 'o-')
plt.xlabel('k (number of neighbors)')
plt.ylabel('Cross-validation accuracy')
plt.title('Finding Optimal k for KNN')
plt.grid(True)
plt.show()

optimal_k = k_range[np.argmax(cv_scores)]
print(f"\nOptimal k: {optimal_k}")

## 3. Support Vector Machines (SVM)

Let's implement SVM with different kernels.

In [None]:
# Generate dataset
X, y = make_classification(n_samples=1000, n_features=2, n_redundant=0,
                          n_informative=2, random_state=42,
                          n_clusters_per_class=1)

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Try different kernels
kernels = ['linear', 'poly', 'rbf']
plt.figure(figsize=(15, 5))

for i, kernel in enumerate(kernels, 1):
    # Create and train model
    svm = SVC(kernel=kernel)
    svm.fit(X_train, y_train)
    
    # Plot decision boundary
    plt.subplot(1, 3, i)
    plot_decision_boundary(X_scaled, y, svm, f"SVM ({kernel} kernel)")
    
    # Print performance metrics
    y_pred = svm.predict(X_test)
    print(f"\nPerformance metrics for {kernel} kernel:")
    print(classification_report(y_test, y_pred))

plt.tight_layout()
plt.show()

## 4. Decision Trees

Let's explore decision trees and their visualization.

In [None]:
# Create and train decision tree
dt = DecisionTreeClassifier(max_depth=3, random_state=42)
dt.fit(X_train, y_train)

# Make predictions
y_pred = dt.predict(X_test)

# Print performance metrics
print("Decision Tree Performance:")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Visualize decision tree
plt.figure(figsize=(20,10))
plot_tree(dt, filled=True, feature_names=[f'Feature {i+1}' for i in range(X.shape[1])])
plt.title('Decision Tree Visualization')
plt.show()

# Plot feature importance
feature_importance = pd.DataFrame({
    'feature': [f'Feature {i+1}' for i in range(X.shape[1])],
    'importance': dt.feature_importances_
})
feature_importance = feature_importance.sort_values('importance', ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(x='importance', y='feature', data=feature_importance)
plt.title('Feature Importance in Decision Tree')
plt.show()

# Explore effect of tree depth
depths = [2, 3, 5, 10]
plt.figure(figsize=(20, 5))

for i, depth in enumerate(depths, 1):
    dt = DecisionTreeClassifier(max_depth=depth, random_state=42)
    dt.fit(X_train, y_train)
    
    plt.subplot(1, 4, i)
    plot_decision_boundary(X_scaled, y, dt, f"Decision Tree (depth={depth})")

plt.tight_layout()
plt.show()

## Practice Exercises

1. Compare the performance of all algorithms on a real-world dataset.

2. Experiment with different preprocessing techniques and observe their impact.

3. Implement grid search for hyperparameter tuning.

4. Create your own implementation of KNN from scratch.

5. Analyze the impact of feature scaling on SVM performance.