In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
from sklearn.datasets import load_boston

In [3]:
boston=load_boston()

In [4]:
boston_df=pd.DataFrame(boston.data,columns=boston.feature_names)

In [5]:
boston_df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [6]:
X=boston_df
y=boston.target

In [7]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.33,random_state=42)

In [8]:
from sklearn.ensemble import RandomForestRegressor
regressor=RandomForestRegressor()

In [9]:
parameter={'n_estimators':[int(x) for x in np.linspace(10, 100, num = 10)],
           'max_depth':[int(x) for x in np.linspace(1, 5, num = 5)],
           'max_features': ['sqrt', 'log2'],
          }

In [10]:
## first way
# hyperparameter tuning 

from sklearn.model_selection import GridSearchCV

ghregressor=GridSearchCV(regressor,param_grid=parameter,scoring='neg_mean_squared_error',cv=10)

ghregressor.fit(X_train,y_train)



GridSearchCV(cv=10, error_score='raise-deprecating',
             estimator=RandomForestRegressor(bootstrap=True, criterion='mse',
                                             max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             n_estimators='warn', n_jobs=None,
                                             oob_score=False, random_state=None,
                                             verbose=0, warm_start=False),
             iid='warn', n_jobs=None,
             param_grid={'max_depth': [1, 2, 3, 4, 5],


In [11]:
print(ghregressor.best_params_)

{'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 80}


In [12]:
y_pred=ghregressor.predict(X_test)

In [13]:
from sklearn.metrics import r2_score
round(r2_score(y_pred,y_test)*100,2)

66.32

In [14]:
## second way
# hyperparameter tuning 

from sklearn.model_selection import RandomizedSearchCV
rsregressor=RandomizedSearchCV(regressor,param_distributions=parameter,scoring='neg_mean_squared_error',cv=10)
rsregressor.fit(X_train,y_train)



RandomizedSearchCV(cv=10, error_score='raise-deprecating',
                   estimator=RandomForestRegressor(bootstrap=True,
                                                   criterion='mse',
                                                   max_depth=None,
                                                   max_features='auto',
                                                   max_leaf_nodes=None,
                                                   min_impurity_decrease=0.0,
                                                   min_impurity_split=None,
                                                   min_samples_leaf=1,
                                                   min_samples_split=2,
                                                   min_weight_fraction_leaf=0.0,
                                                   n_estimators='warn',
                                                   n_jobs=None, oob_score=False,
                                                   random_state=

In [15]:
print(rsregressor.best_params_)

{'n_estimators': 70, 'max_features': 'sqrt', 'max_depth': 5}


In [16]:
y_pred=rsregressor.predict(X_test)

In [17]:
round(r2_score(y_pred,y_test)*100,2)

67.48