# Hyper parameters tuning - Random Search

Reference: https://inria.github.io/scikit-learn-mooc/python_scripts/parameter_tuning_randomized_search.html

## Prepare data

In [1]:
# obtain the data

from sklearn import decomposition
from sklearn import datasets
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()
X = iris.data
y = iris.target

# split into a training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# PCA 
nof_prin_components = 2 
pca = decomposition.PCA(n_components=nof_prin_components, svd_solver='full').fit(X_train)

# applies PCA to the train and test images to calculate the principal components
X_train_pca = pca.transform(X_train) 
X_test_pca = pca.transform(X_test)

## Support functions

In [2]:
from scipy.stats import loguniform

class loguniform_int:
    """Integer valued version of the log-uniform distribution"""
    def __init__(self, a, b):
        self._distribution = loguniform(a, b)

    def rvs(self, *args, **kwargs):
        """Random variable sample"""
        return self._distribution.rvs(*args, **kwargs).astype(int)

In [9]:
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.pipeline import Pipeline

mlp_gs = MLPClassifier(max_iter=100)

model = Pipeline([
    
    ("classifier", HistGradientBoostingClassifier(random_state=42, max_leaf_nodes=4)),
])

## Randomized search configurations

In [10]:
from sklearn.model_selection import RandomizedSearchCV

param_distributions = {
    'classifier__l2_regularization': loguniform(1e-6, 1e3),
    'classifier__learning_rate': loguniform(0.001, 10),
    'classifier__max_leaf_nodes': loguniform_int(2, 256),
    'classifier__min_samples_leaf': loguniform_int(1, 100),
    'classifier__max_bins': loguniform_int(2, 255),
}

model_random_search = RandomizedSearchCV(
    model, param_distributions=param_distributions, n_iter=10,
    cv=5, verbose=1,
)


model_random_search.fit(X_train_pca, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


## Define classifier and its parameters

In [12]:
from sklearn.neural_network import MLPClassifier

mlp_gs = MLPClassifier(max_iter=100)

parameter_space = {
    'hidden_layer_sizes': [(100,),(200,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.1, 0.5, 0.7],
    'learning_rate': ['constant','adaptive'],
}

from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(mlp_gs, parameter_space, n_jobs=-1, cv=5)
clf.fit(X_train_pca, y_train) 



## Review results

In [13]:
# identify perfect/best hyperparameters
print('Best parameters found:\n', clf.best_params_)

Best parameters found:
 {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (200,), 'learning_rate': 'constant', 'solver': 'adam'}


In [14]:
# prediction
y_true, y_pred = y_test , clf.predict(X_test_pca)
from sklearn.metrics import classification_report
print('Results on the test set:')
print(classification_report(y_true, y_pred))

Results on the test set:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      0.67      0.80         9
           2       0.81      1.00      0.90        13

    accuracy                           0.90        30
   macro avg       0.94      0.89      0.90        30
weighted avg       0.92      0.90      0.90        30



In [15]:
# means

means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

0.833 (+/-0.091) for {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (100,), 'learning_rate': 'constant', 'solver': 'sgd'}
0.917 (+/-0.091) for {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (100,), 'learning_rate': 'constant', 'solver': 'adam'}
0.833 (+/-0.053) for {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive', 'solver': 'sgd'}
0.892 (+/-0.085) for {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive', 'solver': 'adam'}
0.825 (+/-0.097) for {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (200,), 'learning_rate': 'constant', 'solver': 'sgd'}
0.958 (+/-0.053) for {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (200,), 'learning_rate': 'constant', 'solver': 'adam'}
0.825 (+/-0.062) for {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (200,), 'learning_rate': 'adaptive', 'solver': 'sgd'}
0.958 (+/-0.053) for {'activation': 'tanh', 'alpha': 0.1, '