In [157]:
import pandas  as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler,MinMaxScaler,LabelEncoder,OneHotEncoder
from sklearn.linear_model import LinearRegression
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import r2_score,mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV

In [104]:
hp_df = pd.read_csv('house_price_data.csv') # dataframe(equivalent to table)

y = hp_df['SalePrice']

hp_df.drop(columns = ["SalePrice",'Id'],inplace = True)

In [105]:
cat_cols = [col for col in hp_df.columns if hp_df[col].dtype == 'object']
con_cols = [col for col in hp_df.columns if hp_df[col].dtype != 'object']

In [106]:
X_train,X_test,y_train,y_test = train_test_split(hp_df,y,random_state = 42, test_size = 0.2)

In [67]:
"""
Fill the NAs
Scaling the continuous features
Encoding the categorical features

"""

'\nFill the NAs\nScaling the continuous features\nEncoding the categorical features\n\n'

In [107]:
for col in X_train.columns:
    if hp_df[col].dtype == 'object':
        X_train[col].fillna(X_train[col].mode()[0],inplace = True)
        X_test[col].fillna(X_train[col].mode()[0],inplace = True) #filling with train mode
        
    if hp_df[col].dtype != 'object':
        X_train[col].fillna(X_train[col].mean(),inplace = True)
        X_test[col].fillna(X_train[col].mean(),inplace = True) #filling with train mode
        
        

In [131]:
# Standard Scaler

scaler = StandardScaler()
min_max_scaler = MinMaxScaler()

for col in con_cols:
    X_train[col] = min_max_scaler.fit_transform(np.array(X_train[col]).reshape(-1,1))
    X_test[col] = min_max_scaler.transform(np.array(X_test[col]).reshape(-1,1))

In [132]:
X_train[con_cols].index

Int64Index([ 254, 1066,  638,  799,  380,  303,   86, 1385,  265,  793,
            ...
             330, 1238,  466,  121, 1044, 1095, 1130, 1294,  860, 1126],
           dtype='int64', length=1168)

In [133]:
# One-Hot Encoding

oe = OneHotEncoder(handle_unknown='ignore')

oe_train = pd.DataFrame(oe.fit_transform(X_train[cat_cols]).toarray(),index = X_train[con_cols].index)
oe_test = pd.DataFrame(oe.transform(X_test[cat_cols]).toarray(),index = X_test[con_cols].index)

In [134]:
train_final = pd.concat([X_train[con_cols],oe_train],axis = 1)
test_final = pd.concat([X_test[con_cols],oe_test],axis = 1)

In [135]:
linreg = LinearRegression()

linreg.fit(train_final,y_train)

LinearRegression()

In [136]:
coefficients = linreg.coef_

In [147]:
y_pred = linreg.predict(test_final)

In [138]:
r2_score(y_test,y_pred)

-1.5075566284102294e+19

In [143]:
dtree = DecisionTreeRegressor(random_state=42)

dtree.fit(train_final,y_train)

dtree_pred = dtree.predict(test_final)

In [144]:
r2_score(y_test,dtree_pred)

0.7751652658329957

In [152]:
rf_regressor = RandomForestRegressor()

In [154]:
# GridSearchCV
# RandomisedSearchCV
rf_param_dict = {
                'n_estimators': [40,60,70,80],
                'max_depth' : [10,12,14],
                'min_samples_split': [5,6,7,8],
                'max_leaf_nodes': [6,7,8,9]
                }

gcv = GridSearchCV(rf_regressor,param_grid=rf_param_dict,cv = 5,verbose =32)

gcv.fit(train_final,y_train)


Fitting 5 folds for each of 192 candidates, totalling 960 fits
[CV 1/5; 1/192] START max_depth=10, max_leaf_nodes=6, min_samples_split=5, n_estimators=40
[CV 1/5; 1/192] END max_depth=10, max_leaf_nodes=6, min_samples_split=5, n_estimators=40; total time=   0.3s
[CV 2/5; 1/192] START max_depth=10, max_leaf_nodes=6, min_samples_split=5, n_estimators=40
[CV 2/5; 1/192] END max_depth=10, max_leaf_nodes=6, min_samples_split=5, n_estimators=40; total time=   0.2s
[CV 3/5; 1/192] START max_depth=10, max_leaf_nodes=6, min_samples_split=5, n_estimators=40
[CV 3/5; 1/192] END max_depth=10, max_leaf_nodes=6, min_samples_split=5, n_estimators=40; total time=   0.3s
[CV 4/5; 1/192] START max_depth=10, max_leaf_nodes=6, min_samples_split=5, n_estimators=40
[CV 4/5; 1/192] END max_depth=10, max_leaf_nodes=6, min_samples_split=5, n_estimators=40; total time=   0.3s
[CV 5/5; 1/192] START max_depth=10, max_leaf_nodes=6, min_samples_split=5, n_estimators=40
[CV 5/5; 1/192] END max_depth=10, max_leaf_nod

[CV 2/5; 9/192] END max_depth=10, max_leaf_nodes=6, min_samples_split=7, n_estimators=40; total time=   0.4s
[CV 3/5; 9/192] START max_depth=10, max_leaf_nodes=6, min_samples_split=7, n_estimators=40
[CV 3/5; 9/192] END max_depth=10, max_leaf_nodes=6, min_samples_split=7, n_estimators=40; total time=   0.4s
[CV 4/5; 9/192] START max_depth=10, max_leaf_nodes=6, min_samples_split=7, n_estimators=40
[CV 4/5; 9/192] END max_depth=10, max_leaf_nodes=6, min_samples_split=7, n_estimators=40; total time=   0.4s
[CV 5/5; 9/192] START max_depth=10, max_leaf_nodes=6, min_samples_split=7, n_estimators=40
[CV 5/5; 9/192] END max_depth=10, max_leaf_nodes=6, min_samples_split=7, n_estimators=40; total time=   0.4s
[CV 1/5; 10/192] START max_depth=10, max_leaf_nodes=6, min_samples_split=7, n_estimators=60
[CV 1/5; 10/192] END max_depth=10, max_leaf_nodes=6, min_samples_split=7, n_estimators=60; total time=   0.6s
[CV 2/5; 10/192] START max_depth=10, max_leaf_nodes=6, min_samples_split=7, n_estimators=

[CV 3/5; 17/192] END max_depth=10, max_leaf_nodes=7, min_samples_split=5, n_estimators=40; total time=   0.5s
[CV 4/5; 17/192] START max_depth=10, max_leaf_nodes=7, min_samples_split=5, n_estimators=40
[CV 4/5; 17/192] END max_depth=10, max_leaf_nodes=7, min_samples_split=5, n_estimators=40; total time=   0.5s
[CV 5/5; 17/192] START max_depth=10, max_leaf_nodes=7, min_samples_split=5, n_estimators=40
[CV 5/5; 17/192] END max_depth=10, max_leaf_nodes=7, min_samples_split=5, n_estimators=40; total time=   0.6s
[CV 1/5; 18/192] START max_depth=10, max_leaf_nodes=7, min_samples_split=5, n_estimators=60
[CV 1/5; 18/192] END max_depth=10, max_leaf_nodes=7, min_samples_split=5, n_estimators=60; total time=   0.7s
[CV 2/5; 18/192] START max_depth=10, max_leaf_nodes=7, min_samples_split=5, n_estimators=60
[CV 2/5; 18/192] END max_depth=10, max_leaf_nodes=7, min_samples_split=5, n_estimators=60; total time=   0.7s
[CV 3/5; 18/192] START max_depth=10, max_leaf_nodes=7, min_samples_split=5, n_esti

[CV 4/5; 25/192] END max_depth=10, max_leaf_nodes=7, min_samples_split=7, n_estimators=40; total time=   0.5s
[CV 5/5; 25/192] START max_depth=10, max_leaf_nodes=7, min_samples_split=7, n_estimators=40
[CV 5/5; 25/192] END max_depth=10, max_leaf_nodes=7, min_samples_split=7, n_estimators=40; total time=   0.6s
[CV 1/5; 26/192] START max_depth=10, max_leaf_nodes=7, min_samples_split=7, n_estimators=60
[CV 1/5; 26/192] END max_depth=10, max_leaf_nodes=7, min_samples_split=7, n_estimators=60; total time=   0.6s
[CV 2/5; 26/192] START max_depth=10, max_leaf_nodes=7, min_samples_split=7, n_estimators=60
[CV 2/5; 26/192] END max_depth=10, max_leaf_nodes=7, min_samples_split=7, n_estimators=60; total time=   0.6s
[CV 3/5; 26/192] START max_depth=10, max_leaf_nodes=7, min_samples_split=7, n_estimators=60
[CV 3/5; 26/192] END max_depth=10, max_leaf_nodes=7, min_samples_split=7, n_estimators=60; total time=   0.8s
[CV 4/5; 26/192] START max_depth=10, max_leaf_nodes=7, min_samples_split=7, n_esti

[CV 5/5; 33/192] END max_depth=10, max_leaf_nodes=8, min_samples_split=5, n_estimators=40; total time=   0.4s
[CV 1/5; 34/192] START max_depth=10, max_leaf_nodes=8, min_samples_split=5, n_estimators=60
[CV 1/5; 34/192] END max_depth=10, max_leaf_nodes=8, min_samples_split=5, n_estimators=60; total time=   0.6s
[CV 2/5; 34/192] START max_depth=10, max_leaf_nodes=8, min_samples_split=5, n_estimators=60
[CV 2/5; 34/192] END max_depth=10, max_leaf_nodes=8, min_samples_split=5, n_estimators=60; total time=   0.6s
[CV 3/5; 34/192] START max_depth=10, max_leaf_nodes=8, min_samples_split=5, n_estimators=60
[CV 3/5; 34/192] END max_depth=10, max_leaf_nodes=8, min_samples_split=5, n_estimators=60; total time=   0.6s
[CV 4/5; 34/192] START max_depth=10, max_leaf_nodes=8, min_samples_split=5, n_estimators=60
[CV 4/5; 34/192] END max_depth=10, max_leaf_nodes=8, min_samples_split=5, n_estimators=60; total time=   0.6s
[CV 5/5; 34/192] START max_depth=10, max_leaf_nodes=8, min_samples_split=5, n_esti

[CV 1/5; 42/192] END max_depth=10, max_leaf_nodes=8, min_samples_split=7, n_estimators=60; total time=   0.7s
[CV 2/5; 42/192] START max_depth=10, max_leaf_nodes=8, min_samples_split=7, n_estimators=60
[CV 2/5; 42/192] END max_depth=10, max_leaf_nodes=8, min_samples_split=7, n_estimators=60; total time=   0.6s
[CV 3/5; 42/192] START max_depth=10, max_leaf_nodes=8, min_samples_split=7, n_estimators=60
[CV 3/5; 42/192] END max_depth=10, max_leaf_nodes=8, min_samples_split=7, n_estimators=60; total time=   0.7s
[CV 4/5; 42/192] START max_depth=10, max_leaf_nodes=8, min_samples_split=7, n_estimators=60
[CV 4/5; 42/192] END max_depth=10, max_leaf_nodes=8, min_samples_split=7, n_estimators=60; total time=   0.7s
[CV 5/5; 42/192] START max_depth=10, max_leaf_nodes=8, min_samples_split=7, n_estimators=60
[CV 5/5; 42/192] END max_depth=10, max_leaf_nodes=8, min_samples_split=7, n_estimators=60; total time=   0.6s
[CV 1/5; 43/192] START max_depth=10, max_leaf_nodes=8, min_samples_split=7, n_esti

[CV 2/5; 50/192] END max_depth=10, max_leaf_nodes=9, min_samples_split=5, n_estimators=60; total time=   0.6s
[CV 3/5; 50/192] START max_depth=10, max_leaf_nodes=9, min_samples_split=5, n_estimators=60
[CV 3/5; 50/192] END max_depth=10, max_leaf_nodes=9, min_samples_split=5, n_estimators=60; total time=   0.7s
[CV 4/5; 50/192] START max_depth=10, max_leaf_nodes=9, min_samples_split=5, n_estimators=60
[CV 4/5; 50/192] END max_depth=10, max_leaf_nodes=9, min_samples_split=5, n_estimators=60; total time=   0.7s
[CV 5/5; 50/192] START max_depth=10, max_leaf_nodes=9, min_samples_split=5, n_estimators=60
[CV 5/5; 50/192] END max_depth=10, max_leaf_nodes=9, min_samples_split=5, n_estimators=60; total time=   0.8s
[CV 1/5; 51/192] START max_depth=10, max_leaf_nodes=9, min_samples_split=5, n_estimators=70
[CV 1/5; 51/192] END max_depth=10, max_leaf_nodes=9, min_samples_split=5, n_estimators=70; total time=   0.9s
[CV 2/5; 51/192] START max_depth=10, max_leaf_nodes=9, min_samples_split=5, n_esti

[CV 3/5; 58/192] END max_depth=10, max_leaf_nodes=9, min_samples_split=7, n_estimators=60; total time=   0.6s
[CV 4/5; 58/192] START max_depth=10, max_leaf_nodes=9, min_samples_split=7, n_estimators=60
[CV 4/5; 58/192] END max_depth=10, max_leaf_nodes=9, min_samples_split=7, n_estimators=60; total time=   0.6s
[CV 5/5; 58/192] START max_depth=10, max_leaf_nodes=9, min_samples_split=7, n_estimators=60
[CV 5/5; 58/192] END max_depth=10, max_leaf_nodes=9, min_samples_split=7, n_estimators=60; total time=   0.6s
[CV 1/5; 59/192] START max_depth=10, max_leaf_nodes=9, min_samples_split=7, n_estimators=70
[CV 1/5; 59/192] END max_depth=10, max_leaf_nodes=9, min_samples_split=7, n_estimators=70; total time=   0.7s
[CV 2/5; 59/192] START max_depth=10, max_leaf_nodes=9, min_samples_split=7, n_estimators=70
[CV 2/5; 59/192] END max_depth=10, max_leaf_nodes=9, min_samples_split=7, n_estimators=70; total time=   0.7s
[CV 3/5; 59/192] START max_depth=10, max_leaf_nodes=9, min_samples_split=7, n_esti

[CV 4/5; 66/192] END max_depth=12, max_leaf_nodes=6, min_samples_split=5, n_estimators=60; total time=   0.6s
[CV 5/5; 66/192] START max_depth=12, max_leaf_nodes=6, min_samples_split=5, n_estimators=60
[CV 5/5; 66/192] END max_depth=12, max_leaf_nodes=6, min_samples_split=5, n_estimators=60; total time=   0.9s
[CV 1/5; 67/192] START max_depth=12, max_leaf_nodes=6, min_samples_split=5, n_estimators=70
[CV 1/5; 67/192] END max_depth=12, max_leaf_nodes=6, min_samples_split=5, n_estimators=70; total time=   0.6s
[CV 2/5; 67/192] START max_depth=12, max_leaf_nodes=6, min_samples_split=5, n_estimators=70
[CV 2/5; 67/192] END max_depth=12, max_leaf_nodes=6, min_samples_split=5, n_estimators=70; total time=   0.7s
[CV 3/5; 67/192] START max_depth=12, max_leaf_nodes=6, min_samples_split=5, n_estimators=70
[CV 3/5; 67/192] END max_depth=12, max_leaf_nodes=6, min_samples_split=5, n_estimators=70; total time=   0.6s
[CV 4/5; 67/192] START max_depth=12, max_leaf_nodes=6, min_samples_split=5, n_esti

[CV 5/5; 74/192] END max_depth=12, max_leaf_nodes=6, min_samples_split=7, n_estimators=60; total time=   0.7s
[CV 1/5; 75/192] START max_depth=12, max_leaf_nodes=6, min_samples_split=7, n_estimators=70
[CV 1/5; 75/192] END max_depth=12, max_leaf_nodes=6, min_samples_split=7, n_estimators=70; total time=   1.3s
[CV 2/5; 75/192] START max_depth=12, max_leaf_nodes=6, min_samples_split=7, n_estimators=70
[CV 2/5; 75/192] END max_depth=12, max_leaf_nodes=6, min_samples_split=7, n_estimators=70; total time=   0.9s
[CV 3/5; 75/192] START max_depth=12, max_leaf_nodes=6, min_samples_split=7, n_estimators=70
[CV 3/5; 75/192] END max_depth=12, max_leaf_nodes=6, min_samples_split=7, n_estimators=70; total time=   0.9s
[CV 4/5; 75/192] START max_depth=12, max_leaf_nodes=6, min_samples_split=7, n_estimators=70
[CV 4/5; 75/192] END max_depth=12, max_leaf_nodes=6, min_samples_split=7, n_estimators=70; total time=   0.8s
[CV 5/5; 75/192] START max_depth=12, max_leaf_nodes=6, min_samples_split=7, n_esti

[CV 1/5; 83/192] END max_depth=12, max_leaf_nodes=7, min_samples_split=5, n_estimators=70; total time=   0.6s
[CV 2/5; 83/192] START max_depth=12, max_leaf_nodes=7, min_samples_split=5, n_estimators=70
[CV 2/5; 83/192] END max_depth=12, max_leaf_nodes=7, min_samples_split=5, n_estimators=70; total time=   0.8s
[CV 3/5; 83/192] START max_depth=12, max_leaf_nodes=7, min_samples_split=5, n_estimators=70
[CV 3/5; 83/192] END max_depth=12, max_leaf_nodes=7, min_samples_split=5, n_estimators=70; total time=   0.8s
[CV 4/5; 83/192] START max_depth=12, max_leaf_nodes=7, min_samples_split=5, n_estimators=70
[CV 4/5; 83/192] END max_depth=12, max_leaf_nodes=7, min_samples_split=5, n_estimators=70; total time=   0.8s
[CV 5/5; 83/192] START max_depth=12, max_leaf_nodes=7, min_samples_split=5, n_estimators=70
[CV 5/5; 83/192] END max_depth=12, max_leaf_nodes=7, min_samples_split=5, n_estimators=70; total time=   0.7s
[CV 1/5; 84/192] START max_depth=12, max_leaf_nodes=7, min_samples_split=5, n_esti

[CV 2/5; 91/192] END max_depth=12, max_leaf_nodes=7, min_samples_split=7, n_estimators=70; total time=   0.6s
[CV 3/5; 91/192] START max_depth=12, max_leaf_nodes=7, min_samples_split=7, n_estimators=70
[CV 3/5; 91/192] END max_depth=12, max_leaf_nodes=7, min_samples_split=7, n_estimators=70; total time=   0.6s
[CV 4/5; 91/192] START max_depth=12, max_leaf_nodes=7, min_samples_split=7, n_estimators=70
[CV 4/5; 91/192] END max_depth=12, max_leaf_nodes=7, min_samples_split=7, n_estimators=70; total time=   0.6s
[CV 5/5; 91/192] START max_depth=12, max_leaf_nodes=7, min_samples_split=7, n_estimators=70
[CV 5/5; 91/192] END max_depth=12, max_leaf_nodes=7, min_samples_split=7, n_estimators=70; total time=   0.6s
[CV 1/5; 92/192] START max_depth=12, max_leaf_nodes=7, min_samples_split=7, n_estimators=80
[CV 1/5; 92/192] END max_depth=12, max_leaf_nodes=7, min_samples_split=7, n_estimators=80; total time=   0.7s
[CV 2/5; 92/192] START max_depth=12, max_leaf_nodes=7, min_samples_split=7, n_esti

[CV 3/5; 99/192] END max_depth=12, max_leaf_nodes=8, min_samples_split=5, n_estimators=70; total time=   0.6s
[CV 4/5; 99/192] START max_depth=12, max_leaf_nodes=8, min_samples_split=5, n_estimators=70
[CV 4/5; 99/192] END max_depth=12, max_leaf_nodes=8, min_samples_split=5, n_estimators=70; total time=   0.7s
[CV 5/5; 99/192] START max_depth=12, max_leaf_nodes=8, min_samples_split=5, n_estimators=70
[CV 5/5; 99/192] END max_depth=12, max_leaf_nodes=8, min_samples_split=5, n_estimators=70; total time=   0.7s
[CV 1/5; 100/192] START max_depth=12, max_leaf_nodes=8, min_samples_split=5, n_estimators=80
[CV 1/5; 100/192] END max_depth=12, max_leaf_nodes=8, min_samples_split=5, n_estimators=80; total time=   0.7s
[CV 2/5; 100/192] START max_depth=12, max_leaf_nodes=8, min_samples_split=5, n_estimators=80
[CV 2/5; 100/192] END max_depth=12, max_leaf_nodes=8, min_samples_split=5, n_estimators=80; total time=   0.7s
[CV 3/5; 100/192] START max_depth=12, max_leaf_nodes=8, min_samples_split=5, n

[CV 4/5; 107/192] END max_depth=12, max_leaf_nodes=8, min_samples_split=7, n_estimators=70; total time=   0.6s
[CV 5/5; 107/192] START max_depth=12, max_leaf_nodes=8, min_samples_split=7, n_estimators=70
[CV 5/5; 107/192] END max_depth=12, max_leaf_nodes=8, min_samples_split=7, n_estimators=70; total time=   0.6s
[CV 1/5; 108/192] START max_depth=12, max_leaf_nodes=8, min_samples_split=7, n_estimators=80
[CV 1/5; 108/192] END max_depth=12, max_leaf_nodes=8, min_samples_split=7, n_estimators=80; total time=   0.7s
[CV 2/5; 108/192] START max_depth=12, max_leaf_nodes=8, min_samples_split=7, n_estimators=80
[CV 2/5; 108/192] END max_depth=12, max_leaf_nodes=8, min_samples_split=7, n_estimators=80; total time=   0.7s
[CV 3/5; 108/192] START max_depth=12, max_leaf_nodes=8, min_samples_split=7, n_estimators=80
[CV 3/5; 108/192] END max_depth=12, max_leaf_nodes=8, min_samples_split=7, n_estimators=80; total time=   0.7s
[CV 4/5; 108/192] START max_depth=12, max_leaf_nodes=8, min_samples_split

[CV 5/5; 115/192] END max_depth=12, max_leaf_nodes=9, min_samples_split=5, n_estimators=70; total time=   0.6s
[CV 1/5; 116/192] START max_depth=12, max_leaf_nodes=9, min_samples_split=5, n_estimators=80
[CV 1/5; 116/192] END max_depth=12, max_leaf_nodes=9, min_samples_split=5, n_estimators=80; total time=   0.7s
[CV 2/5; 116/192] START max_depth=12, max_leaf_nodes=9, min_samples_split=5, n_estimators=80
[CV 2/5; 116/192] END max_depth=12, max_leaf_nodes=9, min_samples_split=5, n_estimators=80; total time=   0.8s
[CV 3/5; 116/192] START max_depth=12, max_leaf_nodes=9, min_samples_split=5, n_estimators=80
[CV 3/5; 116/192] END max_depth=12, max_leaf_nodes=9, min_samples_split=5, n_estimators=80; total time=   0.8s
[CV 4/5; 116/192] START max_depth=12, max_leaf_nodes=9, min_samples_split=5, n_estimators=80
[CV 4/5; 116/192] END max_depth=12, max_leaf_nodes=9, min_samples_split=5, n_estimators=80; total time=   0.7s
[CV 5/5; 116/192] START max_depth=12, max_leaf_nodes=9, min_samples_split

[CV 1/5; 124/192] END max_depth=12, max_leaf_nodes=9, min_samples_split=7, n_estimators=80; total time=   0.7s
[CV 2/5; 124/192] START max_depth=12, max_leaf_nodes=9, min_samples_split=7, n_estimators=80
[CV 2/5; 124/192] END max_depth=12, max_leaf_nodes=9, min_samples_split=7, n_estimators=80; total time=   0.7s
[CV 3/5; 124/192] START max_depth=12, max_leaf_nodes=9, min_samples_split=7, n_estimators=80
[CV 3/5; 124/192] END max_depth=12, max_leaf_nodes=9, min_samples_split=7, n_estimators=80; total time=   0.8s
[CV 4/5; 124/192] START max_depth=12, max_leaf_nodes=9, min_samples_split=7, n_estimators=80
[CV 4/5; 124/192] END max_depth=12, max_leaf_nodes=9, min_samples_split=7, n_estimators=80; total time=   0.7s
[CV 5/5; 124/192] START max_depth=12, max_leaf_nodes=9, min_samples_split=7, n_estimators=80
[CV 5/5; 124/192] END max_depth=12, max_leaf_nodes=9, min_samples_split=7, n_estimators=80; total time=   0.7s
[CV 1/5; 125/192] START max_depth=12, max_leaf_nodes=9, min_samples_split

[CV 2/5; 132/192] END max_depth=14, max_leaf_nodes=6, min_samples_split=5, n_estimators=80; total time=   0.6s
[CV 3/5; 132/192] START max_depth=14, max_leaf_nodes=6, min_samples_split=5, n_estimators=80
[CV 3/5; 132/192] END max_depth=14, max_leaf_nodes=6, min_samples_split=5, n_estimators=80; total time=   0.7s
[CV 4/5; 132/192] START max_depth=14, max_leaf_nodes=6, min_samples_split=5, n_estimators=80
[CV 4/5; 132/192] END max_depth=14, max_leaf_nodes=6, min_samples_split=5, n_estimators=80; total time=   0.6s
[CV 5/5; 132/192] START max_depth=14, max_leaf_nodes=6, min_samples_split=5, n_estimators=80
[CV 5/5; 132/192] END max_depth=14, max_leaf_nodes=6, min_samples_split=5, n_estimators=80; total time=   0.6s
[CV 1/5; 133/192] START max_depth=14, max_leaf_nodes=6, min_samples_split=6, n_estimators=40
[CV 1/5; 133/192] END max_depth=14, max_leaf_nodes=6, min_samples_split=6, n_estimators=40; total time=   0.3s
[CV 2/5; 133/192] START max_depth=14, max_leaf_nodes=6, min_samples_split

[CV 3/5; 140/192] END max_depth=14, max_leaf_nodes=6, min_samples_split=7, n_estimators=80; total time=   0.6s
[CV 4/5; 140/192] START max_depth=14, max_leaf_nodes=6, min_samples_split=7, n_estimators=80
[CV 4/5; 140/192] END max_depth=14, max_leaf_nodes=6, min_samples_split=7, n_estimators=80; total time=   0.6s
[CV 5/5; 140/192] START max_depth=14, max_leaf_nodes=6, min_samples_split=7, n_estimators=80
[CV 5/5; 140/192] END max_depth=14, max_leaf_nodes=6, min_samples_split=7, n_estimators=80; total time=   0.6s
[CV 1/5; 141/192] START max_depth=14, max_leaf_nodes=6, min_samples_split=8, n_estimators=40
[CV 1/5; 141/192] END max_depth=14, max_leaf_nodes=6, min_samples_split=8, n_estimators=40; total time=   0.3s
[CV 2/5; 141/192] START max_depth=14, max_leaf_nodes=6, min_samples_split=8, n_estimators=40
[CV 2/5; 141/192] END max_depth=14, max_leaf_nodes=6, min_samples_split=8, n_estimators=40; total time=   0.3s
[CV 3/5; 141/192] START max_depth=14, max_leaf_nodes=6, min_samples_split

[CV 4/5; 148/192] END max_depth=14, max_leaf_nodes=7, min_samples_split=5, n_estimators=80; total time=   0.7s
[CV 5/5; 148/192] START max_depth=14, max_leaf_nodes=7, min_samples_split=5, n_estimators=80
[CV 5/5; 148/192] END max_depth=14, max_leaf_nodes=7, min_samples_split=5, n_estimators=80; total time=   0.7s
[CV 1/5; 149/192] START max_depth=14, max_leaf_nodes=7, min_samples_split=6, n_estimators=40
[CV 1/5; 149/192] END max_depth=14, max_leaf_nodes=7, min_samples_split=6, n_estimators=40; total time=   0.3s
[CV 2/5; 149/192] START max_depth=14, max_leaf_nodes=7, min_samples_split=6, n_estimators=40
[CV 2/5; 149/192] END max_depth=14, max_leaf_nodes=7, min_samples_split=6, n_estimators=40; total time=   0.3s
[CV 3/5; 149/192] START max_depth=14, max_leaf_nodes=7, min_samples_split=6, n_estimators=40
[CV 3/5; 149/192] END max_depth=14, max_leaf_nodes=7, min_samples_split=6, n_estimators=40; total time=   0.4s
[CV 4/5; 149/192] START max_depth=14, max_leaf_nodes=7, min_samples_split

[CV 5/5; 156/192] END max_depth=14, max_leaf_nodes=7, min_samples_split=7, n_estimators=80; total time=   0.7s
[CV 1/5; 157/192] START max_depth=14, max_leaf_nodes=7, min_samples_split=8, n_estimators=40
[CV 1/5; 157/192] END max_depth=14, max_leaf_nodes=7, min_samples_split=8, n_estimators=40; total time=   0.3s
[CV 2/5; 157/192] START max_depth=14, max_leaf_nodes=7, min_samples_split=8, n_estimators=40
[CV 2/5; 157/192] END max_depth=14, max_leaf_nodes=7, min_samples_split=8, n_estimators=40; total time=   0.3s
[CV 3/5; 157/192] START max_depth=14, max_leaf_nodes=7, min_samples_split=8, n_estimators=40
[CV 3/5; 157/192] END max_depth=14, max_leaf_nodes=7, min_samples_split=8, n_estimators=40; total time=   0.3s
[CV 4/5; 157/192] START max_depth=14, max_leaf_nodes=7, min_samples_split=8, n_estimators=40
[CV 4/5; 157/192] END max_depth=14, max_leaf_nodes=7, min_samples_split=8, n_estimators=40; total time=   0.3s
[CV 5/5; 157/192] START max_depth=14, max_leaf_nodes=7, min_samples_split

[CV 1/5; 165/192] END max_depth=14, max_leaf_nodes=8, min_samples_split=6, n_estimators=40; total time=   0.4s
[CV 2/5; 165/192] START max_depth=14, max_leaf_nodes=8, min_samples_split=6, n_estimators=40
[CV 2/5; 165/192] END max_depth=14, max_leaf_nodes=8, min_samples_split=6, n_estimators=40; total time=   0.3s
[CV 3/5; 165/192] START max_depth=14, max_leaf_nodes=8, min_samples_split=6, n_estimators=40
[CV 3/5; 165/192] END max_depth=14, max_leaf_nodes=8, min_samples_split=6, n_estimators=40; total time=   0.4s
[CV 4/5; 165/192] START max_depth=14, max_leaf_nodes=8, min_samples_split=6, n_estimators=40
[CV 4/5; 165/192] END max_depth=14, max_leaf_nodes=8, min_samples_split=6, n_estimators=40; total time=   0.3s
[CV 5/5; 165/192] START max_depth=14, max_leaf_nodes=8, min_samples_split=6, n_estimators=40
[CV 5/5; 165/192] END max_depth=14, max_leaf_nodes=8, min_samples_split=6, n_estimators=40; total time=   0.3s
[CV 1/5; 166/192] START max_depth=14, max_leaf_nodes=8, min_samples_split

[CV 2/5; 173/192] END max_depth=14, max_leaf_nodes=8, min_samples_split=8, n_estimators=40; total time=   0.4s
[CV 3/5; 173/192] START max_depth=14, max_leaf_nodes=8, min_samples_split=8, n_estimators=40
[CV 3/5; 173/192] END max_depth=14, max_leaf_nodes=8, min_samples_split=8, n_estimators=40; total time=   0.3s
[CV 4/5; 173/192] START max_depth=14, max_leaf_nodes=8, min_samples_split=8, n_estimators=40
[CV 4/5; 173/192] END max_depth=14, max_leaf_nodes=8, min_samples_split=8, n_estimators=40; total time=   0.4s
[CV 5/5; 173/192] START max_depth=14, max_leaf_nodes=8, min_samples_split=8, n_estimators=40
[CV 5/5; 173/192] END max_depth=14, max_leaf_nodes=8, min_samples_split=8, n_estimators=40; total time=   0.4s
[CV 1/5; 174/192] START max_depth=14, max_leaf_nodes=8, min_samples_split=8, n_estimators=60
[CV 1/5; 174/192] END max_depth=14, max_leaf_nodes=8, min_samples_split=8, n_estimators=60; total time=   0.5s
[CV 2/5; 174/192] START max_depth=14, max_leaf_nodes=8, min_samples_split

[CV 3/5; 181/192] END max_depth=14, max_leaf_nodes=9, min_samples_split=6, n_estimators=40; total time=   0.4s
[CV 4/5; 181/192] START max_depth=14, max_leaf_nodes=9, min_samples_split=6, n_estimators=40
[CV 4/5; 181/192] END max_depth=14, max_leaf_nodes=9, min_samples_split=6, n_estimators=40; total time=   0.4s
[CV 5/5; 181/192] START max_depth=14, max_leaf_nodes=9, min_samples_split=6, n_estimators=40
[CV 5/5; 181/192] END max_depth=14, max_leaf_nodes=9, min_samples_split=6, n_estimators=40; total time=   0.3s
[CV 1/5; 182/192] START max_depth=14, max_leaf_nodes=9, min_samples_split=6, n_estimators=60
[CV 1/5; 182/192] END max_depth=14, max_leaf_nodes=9, min_samples_split=6, n_estimators=60; total time=   0.5s
[CV 2/5; 182/192] START max_depth=14, max_leaf_nodes=9, min_samples_split=6, n_estimators=60
[CV 2/5; 182/192] END max_depth=14, max_leaf_nodes=9, min_samples_split=6, n_estimators=60; total time=   0.5s
[CV 3/5; 182/192] START max_depth=14, max_leaf_nodes=9, min_samples_split

[CV 4/5; 189/192] END max_depth=14, max_leaf_nodes=9, min_samples_split=8, n_estimators=40; total time=   0.4s
[CV 5/5; 189/192] START max_depth=14, max_leaf_nodes=9, min_samples_split=8, n_estimators=40
[CV 5/5; 189/192] END max_depth=14, max_leaf_nodes=9, min_samples_split=8, n_estimators=40; total time=   0.4s
[CV 1/5; 190/192] START max_depth=14, max_leaf_nodes=9, min_samples_split=8, n_estimators=60
[CV 1/5; 190/192] END max_depth=14, max_leaf_nodes=9, min_samples_split=8, n_estimators=60; total time=   0.5s
[CV 2/5; 190/192] START max_depth=14, max_leaf_nodes=9, min_samples_split=8, n_estimators=60
[CV 2/5; 190/192] END max_depth=14, max_leaf_nodes=9, min_samples_split=8, n_estimators=60; total time=   0.5s
[CV 3/5; 190/192] START max_depth=14, max_leaf_nodes=9, min_samples_split=8, n_estimators=60
[CV 3/5; 190/192] END max_depth=14, max_leaf_nodes=9, min_samples_split=8, n_estimators=60; total time=   0.5s
[CV 4/5; 190/192] START max_depth=14, max_leaf_nodes=9, min_samples_split

GridSearchCV(cv=5, estimator=RandomForestRegressor(),
             param_grid={'max_depth': [10, 12, 14],
                         'max_leaf_nodes': [6, 7, 8, 9],
                         'min_samples_split': [5, 6, 7, 8],
                         'n_estimators': [40, 60, 70, 80]},
             verbose=32)

In [156]:
gcv.best_params_

gcv.best_score_

0.7563957103772969

In [None]:
# GridSearchCV
# RandomisedSearchCV
rf_param_dict = {
                'n_estimators': [40,60,70,80],
                'max_depth' : [10,12,14],
                'min_samples_split': [5,6,7,8],
                'max_leaf_nodes': [6,7,8,9]
                }

rcv = RandomizedSearchCV(rf_regressor,param_grid=rf_param_dict,cv = 5,verbose =32,n_iter = 100)

rcv.fit(train_final,y_train)