# Hyper parameters tuning - Random Search

Reference: https://inria.github.io/scikit-learn-mooc/python_scripts/parameter_tuning_randomized_search.html

## Prepare data

In [1]:
# obtain the data

from sklearn import decomposition
from sklearn import datasets
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()
X = iris.data
y = iris.target

# split into a training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# PCA 
nof_prin_components = 2 
pca = decomposition.PCA(n_components=nof_prin_components, svd_solver='full').fit(X_train)

# applies PCA to the train and test images to calculate the principal components
X_train_pca = pca.transform(X_train) 
X_test_pca = pca.transform(X_test)

## Support functions

In [2]:
from scipy.stats import loguniform

class loguniform_int:
    """Integer valued version of the log-uniform distribution"""
    def __init__(self, a, b):
        self._distribution = loguniform(a, b)

    def rvs(self, *args, **kwargs):
        """Random variable sample"""
        return self._distribution.rvs(*args, **kwargs).astype(int)

In [3]:
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.pipeline import Pipeline

model = Pipeline([
    
    ("classifier", HistGradientBoostingClassifier(random_state=42, max_leaf_nodes=4)),
])

## Randomized search configurations

In [4]:
from sklearn.model_selection import RandomizedSearchCV

param_distributions = {
    'classifier__l2_regularization': loguniform(1e-6, 1e3),
    'classifier__learning_rate': loguniform(0.001, 10),
    'classifier__max_leaf_nodes': loguniform_int(2, 256),
    'classifier__min_samples_leaf': loguniform_int(1, 100),
    'classifier__max_bins': loguniform_int(2, 255),
}

model_random_search = RandomizedSearchCV(
    model, param_distributions=param_distributions, n_iter=10,
    cv=5, verbose=1,
)


model_random_search.fit(X_train_pca, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


In [5]:
# identify perfect/best hyperparameters
print('Best parameters found:\n', model_random_search.best_params_)

Best parameters found:
 {'classifier__l2_regularization': 1.598267720862488e-05, 'classifier__learning_rate': 0.01431181584940257, 'classifier__max_bins': 211, 'classifier__max_leaf_nodes': 9, 'classifier__min_samples_leaf': 12}


In [6]:
# prediction
y_true, y_pred = y_test , model_random_search.predict(X_test_pca)
from sklearn.metrics import classification_report
print('Results on the test set:')
print(classification_report(y_true, y_pred))

Results on the test set:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.89      0.94         9
           2       0.86      1.00      0.92         6

    accuracy                           0.97        30
   macro avg       0.95      0.96      0.95        30
weighted avg       0.97      0.97      0.97        30



In [8]:
# means

means = model_random_search.cv_results_['mean_test_score']
stds = model_random_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, model_random_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

0.508 (+/-0.439) for {'classifier__l2_regularization': 1.5289299991623538e-05, 'classifier__learning_rate': 1.6805421783414851, 'classifier__max_bins': 15, 'classifier__max_leaf_nodes': 161, 'classifier__min_samples_leaf': 18}
0.908 (+/-0.062) for {'classifier__l2_regularization': 0.29899187773897573, 'classifier__learning_rate': 0.0023110566785611796, 'classifier__max_bins': 63, 'classifier__max_leaf_nodes': 5, 'classifier__min_samples_leaf': 21}
0.917 (+/-0.118) for {'classifier__l2_regularization': 3.600361042968232e-06, 'classifier__learning_rate': 0.0018252458788199106, 'classifier__max_bins': 10, 'classifier__max_leaf_nodes': 4, 'classifier__min_samples_leaf': 1}
0.625 (+/-0.053) for {'classifier__l2_regularization': 113.66178175761836, 'classifier__learning_rate': 0.006667816119914061, 'classifier__max_bins': 39, 'classifier__max_leaf_nodes': 70, 'classifier__min_samples_leaf': 7}
0.725 (+/-0.125) for {'classifier__l2_regularization': 0.007199772340616237, 'classifier__learning_