In [3]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore")

In [4]:
boston = load_boston()
X, y = boston['data'], boston['target']

In [5]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2)

In [7]:
from sklearn.preprocessing import StandardScaler

In [8]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_valid = sc.transform(X_valid)

**Decision Tree**

In [10]:
from sklearn.tree import DecisionTreeRegressor

In [11]:
k_range= list(range(1,10))

In [22]:
dt_params = {
    'criterion': ['mse', 'mae'],
    'max_depth': k_range,
    'min_samples_leaf': [1,2,4,6,8,10],
    'max_features': ['auto', 'sqrt', 'log2']
}

In [13]:
dt = DecisionTreeRegressor()
grid_dt = GridSearchCV(dt, dt_params, scoring='r2', cv = 10)
grid_dt.fit(X_train, y_train)

GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'criterion': ['mse', 'mae'], 'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'min_samples_leaf': [1, 2, 4, 6, 8, 10], 'max_features': ['auto', 'sqrt', 'log2']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='r2', verbose=0)

In [14]:
print(grid_dt.best_params_)
print(grid_dt.best_score_)
print(grid_dt.best_estimator_)

{'criterion': 'mae', 'max_depth': 7, 'max_features': 'auto', 'min_samples_leaf': 2}
0.7966355752577774
DecisionTreeRegressor(criterion='mae', max_depth=7, max_features='auto',
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=2,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')


**SVR**

In [15]:
from sklearn.svm import SVR

In [17]:
svr_params = {
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'C': [0.001, 0.01, 0.1, 0.25, 0.5],
    'epsilon': [0.1, 0.2, 0.3, 0.4, 0.5]
}

In [18]:
svr = SVR()
grid_svr = GridSearchCV(svr, svr_params, scoring='r2', cv = 10)
grid_svr.fit(X_train, y_train)

GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'C': [0.001, 0.01, 0.1, 0.25, 0.5], 'epsilon': [0.1, 0.2, 0.3, 0.4, 0.5]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='r2', verbose=0)

In [19]:
print(grid_svr.best_params_)
print(grid_svr.best_score_)
print(grid_svr.best_estimator_)

{'C': 0.5, 'epsilon': 0.5, 'kernel': 'linear'}
0.683954282955986
SVR(C=0.5, cache_size=200, coef0=0.0, degree=3, epsilon=0.5,
  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)


**K-neighbors**

In [24]:
from sklearn.neighbors import KNeighborsRegressor

In [26]:
knr_params = {
    'n_neighbors': k_range,
    'p': [1,2],
    'weights': ['uniform', 'distance']
}

In [27]:
knr = KNeighborsRegressor()
grid_knr = GridSearchCV(knr, knr_params, scoring='r2', cv = 10)
grid_knr.fit(X_train, y_train)

GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=5, p=2,
          weights='uniform'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'p': [1, 2], 'weights': ['uniform', 'distance']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='r2', verbose=0)

In [28]:
print(grid_knr.best_params_)
print(grid_knr.best_score_)
print(grid_knr.best_estimator_)

{'n_neighbors': 3, 'p': 1, 'weights': 'distance'}
0.81187572527863
KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=3, p=1,
          weights='distance')


**Gradient boosting**

In [29]:
from sklearn.ensemble import GradientBoostingRegressor

In [31]:
gbr_params = {
    'n_estimators': [10, 50 ,100, 150, 200],
    'max_depth' : k_range,
    'max_features': k_range
}

In [32]:
gbr = GradientBoostingRegressor()
grid_gbr = GridSearchCV(gbr, gbr_params, scoring='r2', cv = 10)
grid_gbr.fit(X_train, y_train)

GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_sampl...=None, subsample=1.0, tol=0.0001,
             validation_fraction=0.1, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_estimators': [10, 50, 100, 150, 200], 'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'max_features': [1, 2, 3, 4, 5, 6, 7, 8, 9]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='r2', verbose=0)

In [33]:
print(grid_gbr.best_params_)
print(grid_gbr.best_score_)
print(grid_gbr.best_estimator_)

{'max_depth': 3, 'max_features': 9, 'n_estimators': 100}
0.8845192177323521
GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=9,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=100, n_iter_no_change=None, presort='auto',
             random_state=None, subsample=1.0, tol=0.0001,
             validation_fraction=0.1, verbose=0, warm_start=False)


**Summary**

In [34]:
estimators = {
    'tree': grid_dt,
    'svm': grid_svr,
    'knn': grid_knr,
    'gboost': grid_gbr
}

In [35]:
for k in estimators:
    v = estimators[k]
    print(k,"CV R2 score:", v.best_score_, "Validation R2 score", v.best_estimator_.score(X_valid, y_valid))

tree CV R2 score: 0.7966355752577774 Validation R2 score 0.6363502612440535
svm CV R2 score: 0.683954282955986 Validation R2 score 0.6281159069175763
knn CV R2 score: 0.81187572527863 Validation R2 score 0.6641839675761876
gboost CV R2 score: 0.8845192177323521 Validation R2 score 0.8456946930853236


In [36]:
grid_gbr.cv_results_ 

{'mean_fit_time': array([0.03610315, 0.00781028, 0.01097183, 0.01857002, 0.02187235,
        0.00156202, 0.00841317, 0.01065266, 0.01774137, 0.02567577,
        0.00279629, 0.01038642, 0.01358163, 0.01996665, 0.02661481,
        0.00265911, 0.00838602, 0.02277989, 0.02316613, 0.02800927,
        0.00159791, 0.00560794, 0.01381874, 0.0211509 , 0.02899487,
        0.0007987 , 0.00838912, 0.01477692, 0.02316198, 0.0289525 ,
        0.00080185, 0.00916562, 0.01593173, 0.02030797, 0.03370991,
        0.00199292, 0.0087853 , 0.01518598, 0.02394009, 0.03284535,
        0.00312421, 0.00821152, 0.01891727, 0.02503238, 0.02792003,
        0.00156221, 0.00781426, 0.01610548, 0.02153292, 0.02499399,
        0.00312445, 0.00781059, 0.01577246, 0.02486215, 0.03052425,
        0.00312333, 0.01004884, 0.01613441, 0.0249975 , 0.03378737,
        0.        , 0.00781045, 0.01735582, 0.03005519, 0.03329456,
        0.00312433, 0.00937319, 0.01653669, 0.02579925, 0.03897552,
        0.00156226, 0.00970542,