<a href="https://colab.research.google.com/github/avanniiii/AvaniAgarwal_102303745_UML501/blob/main/SVM_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Q1.

In [2]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

def evaluate_model(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='macro', zero_division=0)
    rec = recall_score(y_true, y_pred, average='macro', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
    return acc, prec, rec, f1

def print_metrics(name, y_true, y_pred):
    acc, prec, rec, f1 = evaluate_model(y_true, y_pred)
    print(f"\n===== {name} Kernel =====")
    print("Accuracy :", acc)
    print("Precision:", prec)
    print("Recall   :", rec)
    print("F1-score :", f1)
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, zero_division=0))
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

kernels = {
    "Linear": SVC(kernel='linear'),
    "Polynomial (degree=3)": SVC(kernel='poly', degree=3),
    "RBF": SVC(kernel='rbf')
}

results = {}

for name, model in kernels.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc, prec, rec, f1 = evaluate_model(y_test, y_pred)
    results[name] = f1
    print_metrics(name, y_test, y_pred)

best_kernel = max(results, key=results.get)
print("\nBest Kernel by F1-score:", best_kernel)



===== Linear Kernel =====
Accuracy : 1.0
Precision: 1.0
Recall   : 1.0
F1-score : 1.0

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00        10
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Confusion Matrix:
[[10  0  0]
 [ 0 10  0]
 [ 0  0 10]]

===== Polynomial (degree=3) Kernel =====
Accuracy : 0.9666666666666667
Precision: 0.9696969696969697
Recall   : 0.9666666666666667
F1-score : 0.9665831244778612

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.90      0.95        10
           2       0.91      1.00      0.95        10

    accuracy                           0.97      

Q2

In [3]:
from sklearn.preprocessing import StandardScaler

bc = datasets.load_breast_cancer()
X = bc.data
y = bc.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

svm_no_scale = SVC(kernel='rbf')
svm_no_scale.fit(X_train, y_train)

train_pred_ns = svm_no_scale.predict(X_train)
test_pred_ns = svm_no_scale.predict(X_test)

train_acc_ns = accuracy_score(y_train, train_pred_ns)
test_acc_ns = accuracy_score(y_test, test_pred_ns)

print("\n===== Breast Cancer (NO SCALING) =====")
print("Train Accuracy:", train_acc_ns)
print("Test Accuracy :", test_acc_ns)
print(confusion_matrix(y_test, test_pred_ns))
print(classification_report(y_test, test_pred_ns, zero_division=0))

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

svm_scaled = SVC(kernel='rbf')
svm_scaled.fit(X_train_s, y_train)

train_pred_s = svm_scaled.predict(X_train_s)
test_pred_s = svm_scaled.predict(X_test_s)

train_acc_s = accuracy_score(y_train, train_pred_s)
test_acc_s = accuracy_score(y_test, test_pred_s)

print("\n===== Breast Cancer (WITH SCALING) =====")
print("Train Accuracy:", train_acc_s)
print("Test Accuracy :", test_acc_s)
print(confusion_matrix(y_test, test_pred_s))
print(classification_report(y_test, test_pred_s, zero_division=0))

print("\n=== Discussion ===")
print("SVM with RBF kernel is sensitive to feature scale.")
print("Without scaling → features with large values dominate distance calculation.")
print("With StandardScaler → each feature contributes equally.")
print("Scaled model generally gives higher accuracy and more stable results.")



===== Breast Cancer (NO SCALING) =====
Train Accuracy: 0.9186813186813186
Test Accuracy : 0.9298245614035088
[[36  6]
 [ 2 70]]
              precision    recall  f1-score   support

           0       0.95      0.86      0.90        42
           1       0.92      0.97      0.95        72

    accuracy                           0.93       114
   macro avg       0.93      0.91      0.92       114
weighted avg       0.93      0.93      0.93       114


===== Breast Cancer (WITH SCALING) =====
Train Accuracy: 0.9824175824175824
Test Accuracy : 0.9824561403508771
[[41  1]
 [ 1 71]]
              precision    recall  f1-score   support

           0       0.98      0.98      0.98        42
           1       0.99      0.99      0.99        72

    accuracy                           0.98       114
   macro avg       0.98      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114


=== Discussion ===
SVM with RBF kernel is sensitive to feature scale.
Without scaling