In [1]:
#Q1
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import pandas as pd

iris = load_iris()
X = iris.data
y = iris.target

X_tr, X_ts, y_tr, y_ts = train_test_split(X, y, test_size=0.2, random_state=42)

kernels = [
    ("linear", SVC(kernel="linear")),
    ("poly", SVC(kernel="poly", degree=3)),
    ("rbf", SVC(kernel="rbf"))
]

rows = []
cms = {}

for name, model in kernels:
    model.fit(X_tr, y_tr)
    p = model.predict(X_ts)
    rows.append([
        name,
        accuracy_score(y_ts, p),
        precision_score(y_ts, p, average="macro"),
        recall_score(y_ts, p, average="macro"),
        f1_score(y_ts, p, average="macro")
    ])
    cms[name] = confusion_matrix(y_ts, p)

df_scores = pd.DataFrame(rows, columns=["kernel", "acc", "prec", "recall", "f1"])
df_scores, cms


(   kernel  acc  prec  recall   f1
 0  linear  1.0   1.0     1.0  1.0
 1    poly  1.0   1.0     1.0  1.0
 2     rbf  1.0   1.0     1.0  1.0,
 {'linear': array([[10,  0,  0],
         [ 0,  9,  0],
         [ 0,  0, 11]]),
  'poly': array([[10,  0,  0],
         [ 0,  9,  0],
         [ 0,  0, 11]]),
  'rbf': array([[10,  0,  0],
         [ 0,  9,  0],
         [ 0,  0, 11]])})

Identifying the Best Kernel

Usually RBF performs best because it captures nonlinear boundaries.

Linear works only if classes are linearly separable.

Polynomial can overfit if degree is fixed and dataset is small.

In [2]:
#Q2
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

data = load_breast_cancer()
X = data.data
y = data.target

Xa_tr, Xa_ts, ya_tr, ya_ts = train_test_split(X, y, test_size=0.2, random_state=42)

svm_raw = SVC(kernel="rbf")
svm_raw.fit(Xa_tr, ya_tr)
train_raw = accuracy_score(ya_tr, svm_raw.predict(Xa_tr))
test_raw = accuracy_score(ya_ts, svm_raw.predict(Xa_ts))

sc = StandardScaler()
Xs_tr = sc.fit_transform(Xa_tr)
Xs_ts = sc.transform(Xa_ts)

svm_scaled = SVC(kernel="rbf")
svm_scaled.fit(Xs_tr, ya_tr)
train_scaled = accuracy_score(ya_tr, svm_scaled.predict(Xs_tr))
test_scaled = accuracy_score(ya_ts, svm_scaled.predict(Xs_ts))

train_raw, test_raw, train_scaled, test_scaled


(0.9142857142857143, 0.9473684210526315, 0.989010989010989, 0.9824561403508771)

Short Explanation of Feature Scaling Effect

SVM with RBF depends on Euclidean distance.

Without scaling: features with larger numeric range dominate distance, causing poor boundary formation.

After scaling: all features have equal influence â†’ margin & separation improve.

As a result, accuracy increases significantly after StandardScaler.