In [1]:
# --- Baseline SVM on Iris Dataset ---

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split into train-test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train baseline SVM
svm_clf = SVC()  # Default: kernel='rbf', C=1.0
svm_clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = svm_clf.predict(X_test)

print("Baseline SVM Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Baseline SVM Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [3]:
# --- Optuna Optimization for SVM on Iris Dataset ---

!pip install optuna
import optuna
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline

# Define objective function
def objective(trial):
    # Hyperparameter search space
    C = trial.suggest_loguniform('C', 1e-3, 1e3)
    gamma = trial.suggest_loguniform('gamma', 1e-4, 1e1)
    kernel = trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid'])

    # Create pipeline (with scaling)
    model = make_pipeline(StandardScaler(), SVC(C=C, gamma=gamma, kernel=kernel))

    # 5-fold cross validation
    scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')

    return scores.mean()

# Create study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

# Print best hyperparameters
print("\nBest trial:")
print("  Accuracy:", study.best_trial.value)
print("  Params:", study.best_trial.params)

# Train model with best params
best_params = study.best_trial.params
best_model = make_pipeline(StandardScaler(),
                           SVC(C=best_params['C'],
                               gamma=best_params['gamma'],
                               kernel=best_params['kernel']))

best_model.fit(X_train, y_train)
y_pred_optuna = best_model.predict(X_test)

print("\nOptimized SVM Accuracy:", accuracy_score(y_test, y_pred_optuna))
print("\nClassification Report:\n", classification_report(y_test, y_pred_optuna))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_optuna))

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.6.0


[I 2025-11-13 09:15:19,359] A new study created in memory with name: no-name-78645fd5-ca65-4e9a-8e13-8fa2af4fa6f4
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
  gamma = trial.suggest_loguniform('gamma', 1e-4, 1e1)
[I 2025-11-13 09:15:19,398] Trial 0 finished with value: 0.8666666666666668 and parameters: {'C': 0.018810723701894717, 'gamma': 0.004874617270124778, 'kernel': 'rbf'}. Best is trial 0 with value: 0.8666666666666668.
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
  gamma = trial.suggest_loguniform('gamma', 1e-4, 1e1)
[I 2025-11-13 09:15:19,431] Trial 1 finished with value: 0.8733333333333334 and parameters: {'C': 0.02092759702500717, 'gamma': 0.08925512322272083, 'kernel': 'rbf'}. Best is trial 1 with value: 0.8733333333333334.
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
  gamma = trial.suggest_loguniform('gamma', 1e-4, 1e1)
[I 2025-11-13 09:15:19,460] Trial 2 finished with value: 0.9666666666666666 and parameters: {'C': 372.99083398086725, 'gamma': 1.8683907380569331, '


Best trial:
  Accuracy: 0.9666666666666668
  Params: {'C': 6.274813827475258, 'gamma': 0.0016511791984178145, 'kernel': 'linear'}

Optimized SVM Accuracy: 0.9666666666666667

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.89      0.94         9
           2       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30

Confusion Matrix:
 [[10  0  0]
 [ 0  8  1]
 [ 0  0 11]]
