In [12]:
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.datasets import load_iris
import pandas as pd

In [2]:
data = load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

### Random Forest

In [3]:
model = RandomForestClassifier(n_estimators = 200, max_depth=5, random_state=1)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(f"Accuracy score, {accuracy_score(y_pred, y_test):.2f}")

Accuracy score, 0.97


### AdaBoostClassifier

In [4]:
model = AdaBoostClassifier(n_estimators = 200, learning_rate=0.1, algorithm='SAMME', random_state=1)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(f"Accuracy score, {accuracy_score(y_pred, y_test):.2f}")

Accuracy score, 0.97


### Gradient Boosting

In [5]:
model = GradientBoostingClassifier(n_estimators = 200, learning_rate=0.1, max_depth=5, random_state=1)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(f"Accuracy score, {accuracy_score(y_pred, y_test):.2f}")

Accuracy score, 0.97


## Hyperparameter Tuning with CV

### GridSearchCV

In [9]:
rf_param = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10],
    'max_features': ['sqrt', 'log2']
}

rf_search = GridSearchCV(
    RandomForestClassifier(random_state=1),
    rf_param,
    cv=5,
    n_jobs=-1,
    return_train_score=True
)

rf_search.fit(X_train, y_train)
print("Best RF Parameters", rf_search.best_params_)

y_pred = rf_search.best_estimator_.predict(X_test)
print(f"Accuracy score, {accuracy_score(y_test, y_pred):.2f}")

max_features determines how many features will be considered when splitting a node in a decision tree. Say we have 10 features, we can put max_features=10 so that the model will consider all 10 features. Using 'sqrt' meaning it will take the square root of 10, which is around 3 features, to be considered for splitting. Same goes with log2

### RandomizedSearchCV

In [15]:
ada_param = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.5, 1]
}

ada_search = RandomizedSearchCV(
    AdaBoostClassifier(algorithm='SAMME', random_state=1),
    ada_param,
    n_iter=6,
    cv=5,
    n_jobs=-1,
    return_train_score=True
)

ada_search.fit(X_train, y_train)
print("Best Ada Parameters", ada_search.best_params_)

y_pred = ada_search.best_estimator_.predict(X_test)
print(f"Accuracy score, {accuracy_score(y_test, y_pred):.2f}")

Best Ada Parameters {'n_estimators': 200, 'learning_rate': 1}
Accuracy score, 0.97


GridSearchCV tests all the combinations of the parameters determined, whereas RandomizedSearchCV only tests a number of random pairs, thus the need of n_iter parameter. This what makes GridSearchCV usually slower and not suitable when there are many parameters to test , while RandomizedSearchCV is great for that because it doesn't test all combinations. 

n_estimator is the number of trees are built for each model. In RandomForest, n_estimator is the number of decision tree made to predict the data. Putting n_estimators = 100 meaning the model will build 100 decision trees. For AdaBoost and GradientBoosting, n_estimators refers to the number of weak learners added sequentially to fix the previous model mistakes, but usually is also a decision tree.

### Caveats

Even though gridsearch and randomsearch both test the data across multiple parameters and found the best one, we can't just trust it blindly and use it for predictions. We need to check for stability and variance, to make sure that the results of other parameters are consistent or close to each other and the best estimator is not just a lucky pick out of all.

#### Grid Search

In [10]:
rf_search.cv_results_

{'mean_fit_time': array([0.24661303, 0.78195853, 1.11180358, 0.40665207, 0.9020421 ,
        1.15286016, 0.41951842, 0.81797071, 1.00614018, 0.45803514,
        0.81936498, 1.02786345, 0.44692693, 0.80941253, 1.26924195,
        0.36769905, 0.82046633, 0.83745456]),
 'std_fit_time': array([0.14689674, 0.10217438, 0.21578098, 0.10456822, 0.21581894,
        0.27580532, 0.09229842, 0.08485959, 0.03559306, 0.07057253,
        0.2112179 , 0.04789382, 0.08160903, 0.1193462 , 0.14961453,
        0.04715176, 0.11960947, 0.06461691]),
 'mean_score_time': array([0.00897017, 0.02860937, 0.04193611, 0.01810417, 0.03133473,
        0.03891258, 0.01702671, 0.03012943, 0.03548045, 0.01766224,
        0.03706388, 0.03446555, 0.01759243, 0.03014383, 0.03262053,
        0.01553464, 0.02022514, 0.01670799]),
 'std_score_time': array([0.00456273, 0.00919782, 0.00573182, 0.00590131, 0.00770804,
        0.0075255 , 0.00504208, 0.00287695, 0.00156312, 0.00277209,
        0.02028908, 0.00138176, 0.00346765, 

In [14]:
rf_result = pd.DataFrame(rf_search.cv_results_)

rf_result[['params', 'mean_train_score', 'mean_test_score', 'std_test_score']].sort_values('mean_test_score', ascending=False)

Unnamed: 0,params,mean_train_score,mean_test_score,std_test_score
0,"{'max_depth': None, 'max_features': 'sqrt', 'n...",1.0,0.941667,0.056519
1,"{'max_depth': None, 'max_features': 'sqrt', 'n...",1.0,0.941667,0.056519
16,"{'max_depth': 10, 'max_features': 'log2', 'n_e...",1.0,0.941667,0.056519
15,"{'max_depth': 10, 'max_features': 'log2', 'n_e...",1.0,0.941667,0.056519
14,"{'max_depth': 10, 'max_features': 'sqrt', 'n_e...",1.0,0.941667,0.056519
13,"{'max_depth': 10, 'max_features': 'sqrt', 'n_e...",1.0,0.941667,0.056519
12,"{'max_depth': 10, 'max_features': 'sqrt', 'n_e...",1.0,0.941667,0.056519
11,"{'max_depth': 5, 'max_features': 'log2', 'n_es...",1.0,0.941667,0.056519
10,"{'max_depth': 5, 'max_features': 'log2', 'n_es...",1.0,0.941667,0.056519
9,"{'max_depth': 5, 'max_features': 'log2', 'n_es...",1.0,0.941667,0.056519


#### Random Search

In [16]:
ada_result = pd.DataFrame(ada_search.cv_results_)

ada_result[['params', 'mean_train_score', 'mean_test_score', 'std_test_score']].sort_values('mean_test_score', ascending=False)

Unnamed: 0,params,mean_train_score,mean_test_score,std_test_score
2,"{'n_estimators': 200, 'learning_rate': 1}",1.0,0.95,0.040825
5,"{'n_estimators': 100, 'learning_rate': 0.1}",0.979167,0.95,0.040825
0,"{'n_estimators': 300, 'learning_rate': 0.5}",1.0,0.941667,0.056519
1,"{'n_estimators': 300, 'learning_rate': 0.1}",0.995833,0.941667,0.056519
3,"{'n_estimators': 100, 'learning_rate': 1}",1.0,0.941667,0.056519
4,"{'n_estimators': 200, 'learning_rate': 0.1}",0.983333,0.941667,0.056519
