# 🎯 Support Vector Machines - Interactive Demo

Welcome to the BAI SVM Prototype! This notebook demonstrates the core concepts of Support Vector Machines.

## 📋 Learning Objectives:
- Understand what "support vectors" are and why they matter
- See how SVMs find the optimal decision boundary
- Experiment with different kernels
- Visualize the impact of hyperparameters

In [None]:
# Import essential libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("🚀 Libraries loaded successfully!")
print("📊 Ready for SVM exploration!")

## 🎨 Part 1: Creating Sample Data

Let's create a simple 2D dataset to visualize SVM concepts.

In [None]:
# Create sample data
np.random.seed(42)

# Generate linearly separable data
n_samples = 100
X = np.random.randn(n_samples, 2)
y = (X[:, 0] + X[:, 1] > 0).astype(int)

# Add some noise to make it more interesting
X += np.random.normal(0, 0.1, X.shape)

# Plot the data
plt.figure(figsize=(10, 6))
plt.scatter(X[y==0, 0], X[y==0, 1], c='red', marker='o', s=100, label='Class 0', alpha=0.7)
plt.scatter(X[y==1, 0], X[y==1, 1], c='blue', marker='s', s=100, label='Class 1', alpha=0.7)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('🎯 Sample Dataset for SVM')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print(f"📊 Dataset created: {n_samples} samples, 2 features")
print(f"🔴 Class 0: {sum(y==0)} samples")
print(f"🔵 Class 1: {sum(y==1)} samples")

## 🎯 Part 2: Linear SVM - Finding the "Street"

Now let's train a linear SVM and visualize the decision boundary and support vectors.

In [None]:
def plot_svm_decision_boundary(X, y, model, title="SVM Decision Boundary"):
    """Plot SVM decision boundary and support vectors"""
    plt.figure(figsize=(12, 8))
    
    # Create a mesh for plotting decision boundary
    h = 0.02
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    
    # Get decision boundary
    Z = model.decision_function(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    # Plot decision boundary and margins
    plt.contour(xx, yy, Z, colors='black', levels=[-1, 0, 1], alpha=0.5, 
                linestyles=['--', '-', '--'])
    
    # Fill the margin area
    plt.contourf(xx, yy, Z, levels=[-1, 1], colors=['lightcoral', 'lightblue'], alpha=0.3)
    
    # Plot data points
    plt.scatter(X[y==0, 0], X[y==0, 1], c='red', marker='o', s=100, 
                label='Class 0', alpha=0.8, edgecolors='black')
    plt.scatter(X[y==1, 0], X[y==1, 1], c='blue', marker='s', s=100, 
                label='Class 1', alpha=0.8, edgecolors='black')
    
    # Highlight support vectors
    support_vectors = model.support_vectors_
    plt.scatter(support_vectors[:, 0], support_vectors[:, 1], 
                s=300, linewidth=3, facecolors='none', edgecolors='yellow',
                label=f'Support Vectors ({len(support_vectors)})')
    
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title(title)
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    return plt

# Train linear SVM
svm_linear = svm.SVC(kernel='linear', C=1.0)
svm_linear.fit(X, y)

# Plot results
plot_svm_decision_boundary(X, y, svm_linear, 
                          "🎯 Linear SVM: The 'Street' Between Classes")
plt.show()

print(f"✅ Linear SVM trained successfully!")
print(f"🎯 Number of support vectors: {len(svm_linear.support_vectors_)}")
print(f"📏 Training accuracy: {accuracy_score(y, svm_linear.predict(X)):.3f}")

## 🎛️ Part 3: The Impact of the C Parameter

Let's see how the C parameter affects the decision boundary.

In [None]:
# Test different C values
C_values = [0.1, 1.0, 10.0, 100.0]

fig, axes = plt.subplots(2, 2, figsize=(15, 12))
axes = axes.ravel()

for i, C in enumerate(C_values):
    # Train SVM with different C
    svm_model = svm.SVC(kernel='linear', C=C)
    svm_model.fit(X, y)
    
    # Plot on subplot
    plt.sca(axes[i])
    
    # Create mesh
    h = 0.02
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    
    # Decision boundary
    Z = svm_model.decision_function(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    # Plot
    plt.contour(xx, yy, Z, colors='black', levels=[-1, 0, 1], alpha=0.5,
                linestyles=['--', '-', '--'])
    plt.contourf(xx, yy, Z, levels=[-1, 1], colors=['lightcoral', 'lightblue'], alpha=0.3)
    
    plt.scatter(X[y==0, 0], X[y==0, 1], c='red', marker='o', s=50, alpha=0.8)
    plt.scatter(X[y==1, 0], X[y==1, 1], c='blue', marker='s', s=50, alpha=0.8)
    
    # Support vectors
    support_vectors = svm_model.support_vectors_
    plt.scatter(support_vectors[:, 0], support_vectors[:, 1], 
                s=200, linewidth=2, facecolors='none', edgecolors='yellow')
    
    plt.title(f'C = {C}\n({len(support_vectors)} support vectors)')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("🎛️ Effect of C parameter:")
print("   📉 Low C (0.1): Wider margin, more support vectors (less overfitting)")
print("   📈 High C (100): Narrower margin, fewer support vectors (more overfitting)")

## 🌀 Part 4: Nonlinear Data - The Kernel Trick

Let's create some nonlinear data and see how different kernels handle it.

In [None]:
# Create nonlinear data (circles)
from sklearn.datasets import make_circles

X_circles, y_circles = make_circles(n_samples=200, noise=0.2, factor=0.3, random_state=42)

# Plot the circular data
plt.figure(figsize=(10, 6))
plt.scatter(X_circles[y_circles==0, 0], X_circles[y_circles==0, 1], 
            c='red', marker='o', s=100, label='Class 0', alpha=0.7)
plt.scatter(X_circles[y_circles==1, 0], X_circles[y_circles==1, 1], 
            c='blue', marker='s', s=100, label='Class 1', alpha=0.7)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('🌀 Nonlinear Dataset (Circles)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.axis('equal')
plt.show()

print("🌀 Created circular dataset - linear boundaries won't work well here!")

In [None]:
# Compare different kernels
kernels = ['linear', 'poly', 'rbf']
titles = ['Linear Kernel', 'Polynomial Kernel (degree=3)', 'RBF (Gaussian) Kernel']

fig, axes = plt.subplots(1, 3, figsize=(18, 6))

for i, (kernel, title) in enumerate(zip(kernels, titles)):
    # Train SVM with different kernel
    if kernel == 'poly':
        svm_model = svm.SVC(kernel=kernel, degree=3, C=1.0)
    else:
        svm_model = svm.SVC(kernel=kernel, C=1.0)
    
    svm_model.fit(X_circles, y_circles)
    
    # Plot
    plt.sca(axes[i])
    
    # Create mesh
    h = 0.02
    x_min, x_max = X_circles[:, 0].min() - 0.5, X_circles[:, 0].max() + 0.5
    y_min, y_max = X_circles[:, 1].min() - 0.5, X_circles[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    
    # Decision boundary
    Z = svm_model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    # Plot decision regions
    plt.contourf(xx, yy, Z, alpha=0.3, colors=['lightcoral', 'lightblue'])
    
    # Plot data
    plt.scatter(X_circles[y_circles==0, 0], X_circles[y_circles==0, 1], 
                c='red', marker='o', s=50, alpha=0.8, edgecolors='black')
    plt.scatter(X_circles[y_circles==1, 0], X_circles[y_circles==1, 1], 
                c='blue', marker='s', s=50, alpha=0.8, edgecolors='black')
    
    # Support vectors
    support_vectors = svm_model.support_vectors_
    plt.scatter(support_vectors[:, 0], support_vectors[:, 1], 
                s=200, linewidth=2, facecolors='none', edgecolors='yellow')
    
    accuracy = accuracy_score(y_circles, svm_model.predict(X_circles))
    plt.title(f'{title}\nAccuracy: {accuracy:.3f}')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("🎪 Kernel Performance on Circular Data:")
print("   📐 Linear: Struggles with circular patterns")
print("   🔢 Polynomial: Better, but can be complex")
print("   🌀 RBF: Excellent for this type of nonlinear data!")

## 🎯 Part 5: Real-World Application - Iris Dataset

Let's apply SVM to the classic Iris dataset.

In [None]:
# Load Iris dataset
iris = datasets.load_iris()
X_iris = iris.data[:, :2]  # Use only first 2 features for visualization
y_iris = iris.target

# For simplicity, let's use only 2 classes
mask = y_iris != 2  # Remove class 2 (Virginica)
X_iris_2class = X_iris[mask]
y_iris_2class = y_iris[mask]

# Scale the features
scaler = StandardScaler()
X_iris_scaled = scaler.fit_transform(X_iris_2class)

# Train SVM
svm_iris = svm.SVC(kernel='rbf', C=1.0, gamma='scale')
svm_iris.fit(X_iris_scaled, y_iris_2class)

# Plot results
plot_svm_decision_boundary(X_iris_scaled, y_iris_2class, svm_iris,
                          "🌺 SVM on Iris Dataset (Setosa vs Versicolor)")
plt.xlabel('Sepal Length (scaled)')
plt.ylabel('Sepal Width (scaled)')
plt.show()

# Print results
accuracy = accuracy_score(y_iris_2class, svm_iris.predict(X_iris_scaled))
print(f"🌺 Iris Dataset Results:")
print(f"   🎯 Accuracy: {accuracy:.3f}")
print(f"   🔗 Support Vectors: {len(svm_iris.support_vectors_)}")
print(f"   📊 Classes: Setosa (0) vs Versicolor (1)")

## 🎓 Summary and Key Takeaways

### What we learned about Support Vector Machines:

1. **🎯 Support Vectors**: Only the points closest to the decision boundary matter!
2. **📏 Large Margin**: SVMs find the "widest street" between classes
3. **🎛️ C Parameter**: Controls the trade-off between margin width and violations
4. **🎪 Kernel Trick**: Enables nonlinear classification without explicit feature transformation
5. **⚖️ Feature Scaling**: Essential for SVM performance

### 🚀 Next Steps:
- Experiment with different datasets
- Try hyperparameter tuning
- Explore SVM regression
- Compare with other algorithms

In [None]:
# Fun experiment: Try changing parameters and see what happens!
print("🎮 Interactive Experiment Zone!")
print("=" * 40)
print("Try modifying these parameters in the cells above:")
print("🎛️ C values: 0.01, 0.1, 1, 10, 100, 1000")
print("🌀 Gamma values: 0.001, 0.01, 0.1, 1, 10")
print("🔢 Polynomial degrees: 2, 3, 4, 5")
print("\n💡 Questions to explore:")
print("   • What happens with very high C?")
print("   • How does gamma affect RBF kernel boundaries?")
print("   • When does polynomial kernel work best?")
print("\n🎯 Challenge: Can you find the optimal parameters for each dataset?")