In [35]:
from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import pandas as pd

In [47]:
# load and split the data
iris = load_iris()
X = iris.data
y = iris.target

In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.75, random_state=42
)

In [12]:
# define the pipeline
model = Pipeline([
    ("preprocessor", StandardScaler()),
    ("classifier", HistGradientBoostingClassifier(random_state=42))
])

In [13]:
# define the hyperparameter space
learning_rates = [0.01, 0.1, 1, 10]
max_leaf_nodes = [3, 10, 30]

In [14]:
best_score = -1
best_params = {}

In [15]:
# Manual Search
for lr in learning_rates:
    for mln in max_leaf_nodes:
        # Update parameters
        model.set_params(classifier__learning_rate=lr, classifier__max_leaf_nodes=mln)
        
        # Train and score
        model.fit(X_train, y_train)
        score = model.score(X_test, y_test)
        
        if score > best_score:
            best_score = score
            best_params = {'learning-rate': lr, 'max leaf nodes': mln}

In [16]:
print(f"Best parameters: {best_params}")
print(f"Best score: {best_score}")

Best parameters: {'learning-rate': 0.01, 'max leaf nodes': 3}
Best score: 1.0


In [18]:
# support vector classifier
model = SVC()

In [19]:
# Get the value of the hyperparameter 'degree'
degree_value = model.get_params()['degree']

In [20]:
print(f"The value of the 'degree' hyperparameter is: {degree_value}")

The value of the 'degree' hyperparameter is: 3


In [37]:
cancer = load_breast_cancer()

In [38]:
# features
X = pd.DataFrame(cancer["data"], columns=cancer.feature_names)

In [39]:
# target label
y = pd.DataFrame(cancer["target"], columns=['Cancer'])

In [40]:
# train test split
X_train, X_test, y_train, y_test = train_test_split(
    X, np.ravel(y), test_size=0.3, random_state=42
)

In [41]:
! pip install scikit-optimize



In [42]:
from skopt import BayesSearchCV
# parameter ranges 
from skopt.space import Real, Categorical, Integer

In [43]:
# Define search space
search_space = {
    'C': Real(0.1, 1e+4, prior='uniform'),
    'gamma': Real(1e-6, 1e+1, prior='log-uniform')
}

In [44]:
# Initialize BayesSearchCV
bayes_search = BayesSearchCV(
    SVC(),
    search_space,
    n_iter=20,
    cv=5,
    random_state=42,
    verbose=3
)

In [45]:
# Fit model
bayes_search.fit(X_train, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END C=4101.098578137254, gamma=0.1241881627783703;, score=0.625 total time=   0.0s
[CV 2/5] END C=4101.098578137254, gamma=0.1241881627783703;, score=0.625 total time=   0.0s
[CV 3/5] END C=4101.098578137254, gamma=0.1241881627783703;, score=0.625 total time=   0.0s
[CV 4/5] END C=4101.098578137254, gamma=0.1241881627783703;, score=0.633 total time=   0.0s
[CV 5/5] END C=4101.098578137254, gamma=0.1241881627783703;, score=0.620 total time=   0.0s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END C=8373.899816697289, gamma=1.5247791391944723;, score=0.625 total time=   0.0s
[CV 2/5] END C=8373.899816697289, gamma=1.5247791391944723;, score=0.625 total time=   0.0s
[CV 3/5] END C=8373.899816697289, gamma=1.5247791391944723;, score=0.625 total time=   0.0s
[CV 4/5] END C=8373.899816697289, gamma=1.5247791391944723;, score=0.633 total time=   0.0s
[CV 5/5] END C=8373.899816697289, gamma=1.524779139194

0,1,2
,estimator,SVC()
,search_spaces,"{'C': Real(low=0.1,...m='normalize'), 'gamma': Real(low=1e-0...m='normalize')}"
,optimizer_kwargs,
,n_iter,20
,scoring,
,fit_params,
,n_jobs,1
,n_points,1
,iid,'deprecated'
,refit,True

0,1,2
,"C  C: float, default=1.0 Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty. For an intuitive visualization of the effects of scaling the regularization parameter C, see :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.",2901.942404402699
,"kernel  kernel: {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, default='rbf' Specifies the kernel type to be used in the algorithm. If none is given, 'rbf' will be used. If a callable is given it is used to pre-compute the kernel matrix from data matrices; that matrix should be an array of shape ``(n_samples, n_samples)``. For an intuitive visualization of different kernel types see :ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`.",'rbf'
,"degree  degree: int, default=3 Degree of the polynomial kernel function ('poly'). Must be non-negative. Ignored by all other kernels.",3
,"gamma  gamma: {'scale', 'auto'} or float, default='scale' Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses  1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features - if float, must be non-negative. .. versionchanged:: 0.22  The default value of ``gamma`` changed from 'auto' to 'scale'.",1e-06
,"coef0  coef0: float, default=0.0 Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'.",0.0
,"shrinking  shrinking: bool, default=True Whether to use the shrinking heuristic. See the :ref:`User Guide `.",True
,"probability  probability: bool, default=False Whether to enable probability estimates. This must be enabled prior to calling `fit`, will slow down that method as it internally uses 5-fold cross-validation, and `predict_proba` may be inconsistent with `predict`. Read more in the :ref:`User Guide `.",False
,"tol  tol: float, default=1e-3 Tolerance for stopping criterion.",0.001
,"cache_size  cache_size: float, default=200 Specify the size of the kernel cache (in MB).",200
,"class_weight  class_weight: dict or 'balanced', default=None Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The ""balanced"" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``.",


In [46]:
# Predict and evaluate
y_pred = bayes_search.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.97      0.94      0.95        63
           1       0.96      0.98      0.97       108

    accuracy                           0.96       171
   macro avg       0.97      0.96      0.96       171
weighted avg       0.96      0.96      0.96       171



In [48]:
# load and split the data
iris = load_iris()
X = iris.data
y = iris.target

In [49]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.76, random_state=42
)

In [50]:
# Show the number of rows in the training data
print(len(X_train))

114
