## Support Vector Classification
Sklearn Documentation
- [SVM](https://scikit-learn.org/stable/modules/svm)
- [SVC](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC)
  
Dataset
- [load_iris](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris)
- [make_classification](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification)
#### Importing Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.datasets import make_classification
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, roc_auc_score, classification_report
)

#### make_classification data

In [2]:
# Generate sample data
#note n_informative+n_redundant+n_repeated≤n_features
X, y = make_classification(n_samples=1000, n_features=5,  n_informative=3,random_state=42)

# Train-test split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=43)
print("X_train:",X_train.shape)
print("y_train:",y_train.shape)
print("X_test:",X_test.shape)
print("y_test:",y_test.shape)

X_train: (800, 5)
y_train: (800,)
X_test: (200, 5)
y_test: (200,)


In [3]:
# Train SVM classifier
svm_clf = SVC(kernel='rbf', C=1, gamma='scale')
svm_clf.fit(X_train, y_train)

In [4]:
# Predict
y_pred = svm_clf.predict(X_test)

In [5]:
# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.925


#### Iris Data

In [6]:
iris_data=load_iris(return_X_y=False,as_frame=True)
X=iris_data.data
y=iris_data.target
print("Features in the data are:",iris_data.feature_names)
print("The Target Names are:",iris_data.target_names)
# print(iris_data.DESCR)

Features in the data are: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
The Target Names are: ['setosa' 'versicolor' 'virginica']


### Train-test split

In [7]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=43)
print("X_train:",X_train.shape)
print("y_train:",y_train.shape)
print("X_test:",X_test.shape)
print("y_test:",y_test.shape)

X_train: (120, 4)
y_train: (120,)
X_test: (30, 4)
y_test: (30,)


In [8]:
svm_clf = SVC(kernel='rbf', C=1, gamma='scale', probability=True)# probability=True for ROC-AUC
svm_clf.fit(X_train, y_train)

In [9]:
# Predictions
y_pred = svm_clf.predict(X_test)
y_prob = svm_clf.predict_proba(X_test)  # Probabilities for ROC-AUC

In [10]:
# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.9333333333333333


In [11]:
# Compute classification metrics
accuracy = accuracy_score(y_test, y_pred)
precision_macro = precision_score(y_test, y_pred, average='macro')
recall_macro = recall_score(y_test, y_pred, average='macro')
f1_macro = f1_score(y_test, y_pred, average='macro')
precision_weighted = precision_score(y_test, y_pred, average='weighted')
recall_weighted = recall_score(y_test, y_pred, average='weighted')
f1_weighted = f1_score(y_test, y_pred, average='weighted')

# Compute ROC-AUC score (one-vs-rest approach)
roc_auc = roc_auc_score(y_test, y_prob, multi_class='ovr')

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"Macro Precision: {precision_macro:.4f}")
print(f"Macro Recall: {recall_macro:.4f}")
print(f"Macro F1-Score: {f1_macro:.4f}")
print(f"Weighted Precision: {precision_weighted:.4f}")
print(f"Weighted Recall: {recall_weighted:.4f}")
print(f"Weighted F1-Score: {f1_weighted:.4f}")
print(f"ROC-AUC Score: {roc_auc:.4f}")

print("\nConfusion Matrix:")
print(conf_matrix)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.9333
Macro Precision: 0.9333
Macro Recall: 0.9259
Macro F1-Score: 0.9213
Weighted Precision: 0.9467
Weighted Recall: 0.9333
Weighted F1-Score: 0.9329
ROC-AUC Score: 1.0000

Confusion Matrix:
[[13  0  0]
 [ 0  8  0]
 [ 0  2  7]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.80      1.00      0.89         8
           2       1.00      0.78      0.88         9

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.92        30
weighted avg       0.95      0.93      0.93        30



### Gridsearch

In [12]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.01, 0.1, 1],
    'kernel': ['linear', 'rbf', 'poly']
}

grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(X_train, y_train)

print(grid_search.best_params_)

{'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
