In [1]:
# Import required libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Step a) Load the dataset and perform trainâ€“test split (80:20)
iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step b) Train three SVM models using Linear, Polynomial, and RBF kernels
kernels = ['linear', 'poly', 'rbf']
results = {}

for kernel in kernels:
    print(f"\n=== Kernel: {kernel.upper()} ===")
    
    # For polynomial, specify degree=3
    if kernel == 'poly':
        model = SVC(kernel='poly', degree=3)
    else:
        model = SVC(kernel=kernel)
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Step c) Evaluation metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='macro')
    rec = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    results[kernel] = [acc, prec, rec, f1]

    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall:    {rec:.4f}")
    print(f"F1 Score:  {f1:.4f}")

    # Step d) Display the Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    print("\nConfusion Matrix:\n", cm)


# Step e) Identify the best performing kernel
df = pd.DataFrame(results, index=['Accuracy', 'Precision', 'Recall', 'F1-Score'])
print("\n=== Performance Comparison ===")
print(df)

best_kernel = df.loc['F1-Score'].idxmax()
print(f"\nBest performing kernel: {best_kernel.upper()} (based on F1-Score)")
print(f'It captures the complex, non-linear relationships among Iris species more effectively than others, leading to the highest accuracy and F1-score')


=== Kernel: LINEAR ===
Accuracy:  1.0000
Precision: 1.0000
Recall:    1.0000
F1 Score:  1.0000

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

=== Kernel: POLY ===
Accuracy:  1.0000
Precision: 1.0000
Recall:    1.0000
F1 Score:  1.0000

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

=== Kernel: RBF ===
Accuracy:  1.0000
Precision: 1.0000
Recall:    1.0000
F1 Score:  1.0000

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

=== Performance Comparison ===
           linear  poly  rbf
Accuracy      1.0   1.0  1.0
Precision     1.0   1.0  1.0
Recall        1.0   1.0  1.0
F1-Score      1.0   1.0  1.0

Best performing kernel: LINEAR (based on F1-Score)
It captures the complex, non-linear relationships among Iris species more effectively than others, leading to the highest accuracy and F1-score


In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Step A) Load the Breast Cancer dataset
data = datasets.load_breast_cancer()
X = data.data
y = data.target

# Split the data (80:20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model 1: Without Feature Scaling
svm_no_scale = SVC(kernel='rbf')
svm_no_scale.fit(X_train, y_train)

train_acc_no_scale = svm_no_scale.score(X_train, y_train)
test_acc_no_scale = svm_no_scale.score(X_test, y_test)

print("=== Without Feature Scaling ===")
print(f"Training Accuracy: {train_acc_no_scale:.4f}")
print(f"Testing Accuracy:  {test_acc_no_scale:.4f}")

# Model 2: With Feature Scaling (StandardScaler)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm_scaled = SVC(kernel='rbf')
svm_scaled.fit(X_train_scaled, y_train)

train_acc_scaled = svm_scaled.score(X_train_scaled, y_train)
test_acc_scaled = svm_scaled.score(X_test_scaled, y_test)

print("\n=== With Feature Scaling ===")
print(f"Training Accuracy: {train_acc_scaled:.4f}")
print(f"Testing Accuracy:  {test_acc_scaled:.4f}")

# Comparison Table
import pandas as pd

results = {
    'Without Scaling': [train_acc_no_scale, test_acc_no_scale],
    'With Scaling': [train_acc_scaled, test_acc_scaled]
}
df = pd.DataFrame(results, index=['Training Accuracy', 'Testing Accuracy'])
print("\n=== Performance Comparison ===")
print(df)

=== Without Feature Scaling ===
Training Accuracy: 0.9143
Testing Accuracy:  0.9474

=== With Feature Scaling ===
Training Accuracy: 0.9890
Testing Accuracy:  0.9825

=== Performance Comparison ===
                   Without Scaling  With Scaling
Training Accuracy         0.914286      0.989011
Testing Accuracy          0.947368      0.982456
