In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# a) Load and split data 
iris = load_iris()
X = iris.data
y = iris.target
target_names = iris.target_names

# 80:20 split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# b) Train three different SVM models 
kernels = {
    'Linear': SVC(kernel='linear', random_state=42),
    'Polynomial (degree=3)': SVC(kernel='poly', degree=3, random_state=42),
    'RBF': SVC(kernel='rbf', random_state=42)
}

results = {}

print("--- 1. SVM Kernel Comparison (Iris Dataset) ---")
print(f"Train set size: {X_train.shape[0]} samples")
print(f"Test set size: {X_test.shape[0]} samples")

for kernel_name, model in kernels.items():
    # Train the model
    model.fit(X_train, y_train)
    
    # Get predictions
    y_pred = model.predict(X_test)
    
    # c) Evaluate each model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    # d) Get confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    results[kernel_name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        'Confusion Matrix': cm
    }

for kernel_name, metrics in results.items():
    print("\n" + "="*30)
    print(f"Kernel: {kernel_name}")
    print("="*30)
    print(f"  Accuracy:  {metrics['Accuracy']:.4f}")
    print(f"  Precision: {metrics['Precision']:.4f}")
    print(f"  Recall:    {metrics['Recall']:.4f}")
    print(f"  F1-Score:  {metrics['F1-Score']:.4f}")
    print("\n  Confusion Matrix:")
    print("  (Rows: True, Cols: Predicted)")
    
    cm_str = "        " + "  ".join([name[:4] for name in target_names])
    print(cm_str)
    print("      " + "-"*(len(cm_str) - 8))
    for i, row in enumerate(metrics['Confusion Matrix']):
        print(f"  {target_names[i][:4]:<6} | {row}")

--- 1. SVM Kernel Comparison (Iris Dataset) ---
Train set size: 120 samples
Test set size: 30 samples

Kernel: Linear
  Accuracy:  1.0000
  Precision: 1.0000
  Recall:    1.0000
  F1-Score:  1.0000

  Confusion Matrix:
  (Rows: True, Cols: Predicted)
        seto  vers  virg
      ----------------
  seto   | [10  0  0]
  vers   | [0 9 0]
  virg   | [ 0  0 11]

Kernel: Polynomial (degree=3)
  Accuracy:  1.0000
  Precision: 1.0000
  Recall:    1.0000
  F1-Score:  1.0000

  Confusion Matrix:
  (Rows: True, Cols: Predicted)
        seto  vers  virg
      ----------------
  seto   | [10  0  0]
  vers   | [0 9 0]
  virg   | [ 0  0 11]

Kernel: RBF
  Accuracy:  1.0000
  Precision: 1.0000
  Recall:    1.0000
  F1-Score:  1.0000

  Confusion Matrix:
  (Rows: True, Cols: Predicted)
        seto  vers  virg
      ----------------
  seto   | [10  0  0]
  vers   | [0 9 0]
  virg   | [ 0  0 11]


In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler

# A) Load the dataset 
cancer = load_breast_cancer()
X_c = cancer.data
y_c = cancer.target

X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_c, y_c, test_size=0.2, random_state=42)

print("\n--- 2. Impact of Feature Scaling (Breast Cancer) ---")

# B) Train WITHOUT feature scaling 
svc_unscaled = SVC(kernel='rbf', random_state=42)
svc_unscaled.fit(X_train_c, y_train_c)

train_acc_unscaled = svc_unscaled.score(X_train_c, y_train_c)
test_acc_unscaled = svc_unscaled.score(X_test_c, y_test_c)

# B) Train WITH feature scaling
# 1. Create and fit the scaler ON THE TRAINING DATA ONLY
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_c)

# 2. Transform the test data using the *fitted* scaler
X_test_scaled = scaler.transform(X_test_c)

# 3. Train the new SVM
svc_scaled = SVC(kernel='rbf', random_state=42)
svc_scaled.fit(X_train_scaled, y_train_c)


train_acc_scaled = svc_scaled.score(X_train_scaled, y_train_c)
test_acc_scaled = svc_scaled.score(X_test_c, y_test_c) # Note: use X_test_c (unscaled) for unscaled model

# C) Compare results
print("\nAccuracy Comparison")
print(f"                      | Training Accuracy | Testing Accuracy")
print(f"Model WITHOUT Scaling | {train_acc_unscaled:17.4f} | {test_acc_unscaled:16.4f}")
print(f"Model WITH Scaling    | {train_acc_scaled:17.4f} | {test_acc_scaled:16.4f}")


--- 2. Impact of Feature Scaling (Breast Cancer) ---

Accuracy Comparison
                      | Training Accuracy | Testing Accuracy
Model WITHOUT Scaling |            0.9143 |           0.9474
Model WITH Scaling    |            0.9890 |           0.3772
