In [37]:
import pandas as pd
import numpy as np
from sklearn.ensemble import BaggingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split,KFold,GridSearchCV
from sklearn.metrics import r2_score

In [16]:
concrete=pd.read_csv('Concrete_Data.csv')

In [17]:
X=concrete.drop('Strength',axis=1)
y=concrete['Strength']

In [18]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.7,random_state=23)

In [19]:
lr=LinearRegression()

In [20]:
lr.fit(X_train,y_train)

In [21]:
y_pred=lr.predict(X_test)

In [22]:
r2_score(y_test,y_pred)

0.6007247084208873

In [23]:
bgr=BaggingRegressor(lr,n_estimators=15,random_state=23)

In [24]:
bgr.fit(X_train,y_train)

In [25]:
y_pred=bgr.predict(X_test)

In [26]:
r2_score(y_test,y_pred)

0.6026136411294161

In [28]:
from sklearn.tree import DecisionTreeRegressor

In [29]:
dtr=DecisionTreeRegressor(random_state=23)

In [30]:
dtr.fit(X_train,y_train)

In [31]:
y_pred=dtr.predict(X_test)

In [32]:
r2_score(y_test,y_pred)

0.7275115568212169

In [33]:
bgr=BaggingRegressor(dtr,n_estimators=15,random_state=23)

In [34]:
bgr.fit(X_train,y_train)

In [35]:
y_pred=bgr.predict(X_test)

In [36]:
r2_score(y_test,y_pred)

0.8288776668798505

# Grid search

In [38]:
kfold=KFold(n_splits=5,shuffle=True,random_state=23)

In [39]:
dtr=DecisionTreeRegressor(random_state=23)

In [40]:
bgr=BaggingRegressor(dtr,random_state=23)

In [41]:
bgr.get_params()

{'base_estimator': 'deprecated',
 'bootstrap': True,
 'bootstrap_features': False,
 'estimator__ccp_alpha': 0.0,
 'estimator__criterion': 'squared_error',
 'estimator__max_depth': None,
 'estimator__max_features': None,
 'estimator__max_leaf_nodes': None,
 'estimator__min_impurity_decrease': 0.0,
 'estimator__min_samples_leaf': 1,
 'estimator__min_samples_split': 2,
 'estimator__min_weight_fraction_leaf': 0.0,
 'estimator__random_state': 23,
 'estimator__splitter': 'best',
 'estimator': DecisionTreeRegressor(random_state=23),
 'max_features': 1.0,
 'max_samples': 1.0,
 'n_estimators': 10,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 23,
 'verbose': 0,
 'warm_start': False}

In [42]:
params={'estimator__max_depth':[None,3,5],'estimator__min_samples_leaf':[1,5,10],'estimator__min_samples_split':[2,5,10],'n_estimators':[10,30,50]}

In [43]:
gcv=GridSearchCV(bgr,param_grid=params,cv=kfold,verbose=3,scoring='r2')

In [44]:
gcv.fit(X,y)

Fitting 5 folds for each of 81 candidates, totalling 405 fits
[CV 1/5] END estimator__max_depth=None, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=10;, score=0.899 total time=   0.1s
[CV 2/5] END estimator__max_depth=None, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=10;, score=0.894 total time=   0.1s
[CV 3/5] END estimator__max_depth=None, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=10;, score=0.916 total time=   0.1s
[CV 4/5] END estimator__max_depth=None, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=10;, score=0.909 total time=   0.1s
[CV 5/5] END estimator__max_depth=None, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=10;, score=0.879 total time=   0.1s
[CV 1/5] END estimator__max_depth=None, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=30;, score=0.897 total time=   0.2s
[CV 2/5] END estimator__ma

[CV 1/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=50;, score=0.866 total time=   0.2s
[CV 2/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=50;, score=0.865 total time=   0.2s
[CV 3/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=50;, score=0.906 total time=   0.2s
[CV 4/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=50;, score=0.887 total time=   0.2s
[CV 5/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__min_samples_split=2, n_estimators=50;, score=0.869 total time=   0.2s
[CV 1/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__min_samples_split=5, n_estimators=10;, score=0.866 total time=   0.0s
[CV 2/5] END estimator__max_depth=None, estimator__min_samples_leaf=5, estimator__min_sa

[CV 1/5] END estimator__max_depth=None, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.833 total time=   0.1s
[CV 2/5] END estimator__max_depth=None, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.838 total time=   0.1s
[CV 3/5] END estimator__max_depth=None, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.874 total time=   0.1s
[CV 4/5] END estimator__max_depth=None, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.860 total time=   0.1s
[CV 5/5] END estimator__max_depth=None, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=30;, score=0.847 total time=   0.1s
[CV 1/5] END estimator__max_depth=None, estimator__min_samples_leaf=10, estimator__min_samples_split=5, n_estimators=50;, score=0.836 total time=   0.1s
[CV 2/5] END estimator__max_depth=None, estimator__min_samples_leaf=10, estimator_

[CV 2/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=30;, score=0.687 total time=   0.1s
[CV 3/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=30;, score=0.743 total time=   0.1s
[CV 4/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=30;, score=0.682 total time=   0.1s
[CV 5/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=30;, score=0.671 total time=   0.1s
[CV 1/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=50;, score=0.676 total time=   0.1s
[CV 2/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10, n_estimators=50;, score=0.682 total time=   0.1s
[CV 3/5] END estimator__max_depth=3, estimator__min_samples_leaf=1, estimator__min_samples_split=10,

[CV 4/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2, n_estimators=10;, score=0.683 total time=   0.0s
[CV 5/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2, n_estimators=10;, score=0.672 total time=   0.0s
[CV 1/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2, n_estimators=30;, score=0.674 total time=   0.1s
[CV 2/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2, n_estimators=30;, score=0.684 total time=   0.1s
[CV 3/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2, n_estimators=30;, score=0.740 total time=   0.1s
[CV 4/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2, n_estimators=30;, score=0.683 total time=   0.1s
[CV 5/5] END estimator__max_depth=3, estimator__min_samples_leaf=10, estimator__min_samples_split=2,

[CV 5/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=2, n_estimators=50;, score=0.830 total time=   0.1s
[CV 1/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_estimators=10;, score=0.817 total time=   0.0s
[CV 2/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_estimators=10;, score=0.821 total time=   0.0s
[CV 3/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_estimators=10;, score=0.864 total time=   0.0s
[CV 4/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_estimators=10;, score=0.825 total time=   0.0s
[CV 5/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_estimators=10;, score=0.836 total time=   0.0s
[CV 1/5] END estimator__max_depth=5, estimator__min_samples_leaf=1, estimator__min_samples_split=5, n_esti

[CV 1/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=5, n_estimators=50;, score=0.815 total time=   0.1s
[CV 2/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=5, n_estimators=50;, score=0.819 total time=   0.1s
[CV 3/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=5, n_estimators=50;, score=0.868 total time=   0.1s
[CV 4/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=5, n_estimators=50;, score=0.827 total time=   0.1s
[CV 5/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=5, n_estimators=50;, score=0.826 total time=   0.1s
[CV 1/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=10, n_estimators=10;, score=0.813 total time=   0.0s
[CV 2/5] END estimator__max_depth=5, estimator__min_samples_leaf=5, estimator__min_samples_split=10, n_es

[CV 1/5] END estimator__max_depth=5, estimator__min_samples_leaf=10, estimator__min_samples_split=10, n_estimators=30;, score=0.794 total time=   0.1s
[CV 2/5] END estimator__max_depth=5, estimator__min_samples_leaf=10, estimator__min_samples_split=10, n_estimators=30;, score=0.805 total time=   0.1s
[CV 3/5] END estimator__max_depth=5, estimator__min_samples_leaf=10, estimator__min_samples_split=10, n_estimators=30;, score=0.854 total time=   0.1s
[CV 4/5] END estimator__max_depth=5, estimator__min_samples_leaf=10, estimator__min_samples_split=10, n_estimators=30;, score=0.820 total time=   0.1s
[CV 5/5] END estimator__max_depth=5, estimator__min_samples_leaf=10, estimator__min_samples_split=10, n_estimators=30;, score=0.818 total time=   0.1s
[CV 1/5] END estimator__max_depth=5, estimator__min_samples_leaf=10, estimator__min_samples_split=10, n_estimators=50;, score=0.802 total time=   0.1s
[CV 2/5] END estimator__max_depth=5, estimator__min_samples_leaf=10, estimator__min_samples_sp

In [45]:
gcv.best_params_

{'estimator__max_depth': None,
 'estimator__min_samples_leaf': 1,
 'estimator__min_samples_split': 2,
 'n_estimators': 50}

In [46]:
gcv.best_score_

0.9092434907551853