In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import BaggingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import  OneHotEncoder, LabelEncoder
from sklearn.compose import make_column_transformer, make_column_selector
from tqdm import tqdm

In [2]:
concrete = pd.read_csv("C:/Python/Cases/Concrete_Strength/Concrete_Data.csv")
X = concrete.drop('Strength', axis=1)
y = concrete['Strength']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

In [8]:
dtc = DecisionTreeRegressor(random_state=25)
knn = KNeighborsRegressor()
lr = LinearRegression()

In [10]:
est_list = [dtc, knn, lr]
n_est = [10, 15, 25, 50]
scores = []
for e in tqdm(est_list):
    for n in n_est:
        bagg = BaggingRegressor(random_state=25, n_estimators=n, estimator=e )
        bagg.fit(X_train, y_train)
        y_pred = bagg.predict(X_test)
        scores.append([e, n, r2_score(y_test, y_pred)])
df_scores = pd.DataFrame( scores, columns=['Estimator','B-Samples','score'] )
df_scores.sort_values('score', ascending=False)

100%|██████████| 3/3 [00:00<00:00,  3.08it/s]


Unnamed: 0,Estimator,B-Samples,score
2,DecisionTreeRegressor(random_state=25),25,0.881644
3,DecisionTreeRegressor(random_state=25),50,0.880861
1,DecisionTreeRegressor(random_state=25),15,0.877123
0,DecisionTreeRegressor(random_state=25),10,0.876503
7,KNeighborsRegressor(),50,0.71214
6,KNeighborsRegressor(),25,0.708045
5,KNeighborsRegressor(),15,0.702541
4,KNeighborsRegressor(),10,0.696449
8,LinearRegression(),10,0.634298
9,LinearRegression(),15,0.634216


For the same estimator, trying different parameter values

In [13]:
depths = [None, 3, 5, 7]
scores = []
for d in tqdm( depths ):
    dtc = DecisionTreeRegressor(random_state=25, max_depth=d)
    bagg = BaggingRegressor(random_state=25, n_estimators=25, estimator=dtc )
    bagg.fit(X_train, y_train)
    y_pred = bagg.predict(X_test)
    scores.append([d, r2_score(y_test, y_pred)])
df_scores = pd.DataFrame( scores, columns=['depth', 'score'] )
df_scores.sort_values('score', ascending=False)

100%|██████████| 4/4 [00:00<00:00, 10.40it/s]


Unnamed: 0,depth,score
0,,0.881644
3,7.0,0.858886
2,5.0,0.808134
1,3.0,0.659029


In [15]:
dtc = DecisionTreeRegressor(random_state=25, max_depth=None)
bagg = BaggingRegressor(random_state=25, n_estimators=25, estimator=dtc, oob_score=True )
bagg.fit(X_train, y_train)
print("OOB score:", bagg.oob_score_)

OOB score: 0.894564632485158
