In [32]:
import numpy as np
import pandas as pd

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, mean_squared_error as MSE
from sklearn.ensemble import RandomForestRegressor

from sklearn.datasets import load_breast_cancer, load_boston


In [8]:
data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, stratify=y)

In [9]:
dt = DecisionTreeClassifier()
dt.get_params() 

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': None,
 'splitter': 'best'}

In [18]:
params = {
    'max_depth': np.arange(3, 7),
    'min_samples_leaf': np.arange(0.04, .09, .02),
    'max_features': np.arange(0.2, 0.9, .2)
}

gridsearch = GridSearchCV(estimator=dt, param_grid=params, scoring='accuracy', cv=10, n_jobs=-1)
gridsearch.fit(X_train, y_train)

GridSearchCV(cv=10, estimator=DecisionTreeClassifier(), n_jobs=-1,
             param_grid={'max_depth': array([3, 4, 5, 6]),
                         'max_features': array([0.2, 0.4, 0.6, 0.8]),
                         'min_samples_leaf': array([0.04, 0.06, 0.08])},
             scoring='accuracy')

In [19]:
best_hyperparams = gridsearch.best_params_
print(f'Best params: {best_hyperparams}')

Best params: {'max_depth': 5, 'max_features': 0.4, 'min_samples_leaf': 0.04}


In [22]:
dt = DecisionTreeClassifier(max_depth=5, max_features=.4, min_samples_leaf=.04).fit(X_train, y_train)
print(f'DecisionTree with hyperparameter tuning score: {accuracy_score(y_test, dt.predict(X_test))}')

DecisionTree with hyperparameter tuning score: 0.9122807017543859


In [23]:
dt = DecisionTreeClassifier().fit(X_train, y_train)
print(f'DecisionTree without hyperparameter tuning score: {accuracy_score(y_test, dt.predict(X_test))}')

DecisionTree without hyperparameter tuning score: 0.9210526315789473


In [28]:
# Import roc_auc_score from sklearn.metrics
from sklearn.metrics import roc_auc_score

# Define params_dt
params_dt = {
    'max_depth': [2, 3, 4],
    'min_samples_leaf': [.12, .14, .16, .18]
}

# Instantiate grid_dt
grid_dt = GridSearchCV(estimator=dt,
                       param_grid=params_dt,
                       scoring='roc_auc',
                       cv=5,
                       n_jobs=-1)
grid_dt.fit(X_train, y_train)

# Extract the best estimator
best_model = grid_dt.best_estimator_

# Predict the test set probabilities of the positive class
y_pred_proba = grid_dt.predict_proba(X_test)[:, 1]

# Compute test_roc_auc
test_roc_auc = roc_auc_score(y_test, y_pred_proba)

# Print test_roc_auc
print('Test set ROC AUC score: {:.3f}'.format(test_roc_auc))

Test set ROC AUC score: 0.956


# RandomForest Hyperparameters


In [38]:
data = load_boston()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

params_rf = {
    'n_estimators': [300, 400, 500],
    'max_depth': [4, 6, 8],
    'min_samples_leaf': [.1, .2],
    'max_features': ['log2', 'sqrt']
}

rf = RandomForestRegressor()

grid_rf = GridSearchCV(estimator=rf, param_grid=params_rf, cv=3, scoring='neg_mean_squared_error', verbose=1, n_jobs=-1, refit=True).fit(X_train, y_train)

Fitting 3 folds for each of 36 candidates, totalling 108 fits


In [39]:
best_params = grid_rf.best_params_
print(best_params_)
best_model = grid_rf.best_estimator
y_pred = best_model.predict(X_test)
rmse = np.sqrt(MSE(y_test, y_pred))
print(f'RMSE: {rmse}')

NameError: name 'best_params_' is not defined