In [16]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
from sklearn.ensemble import BaggingRegressor

In [3]:
df = pd.read_csv('/content/BostonHousing.csv')
df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [5]:
X = df.drop('medv', axis=1)
y = df['medv']

In [6]:
X.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33


In [7]:
y.head()

Unnamed: 0,medv
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, test_size = 0.2, random_state = 123)
print('Shape of X_train : ',X_train.shape)
print('Shape of X_test : ',X_test.shape)
print('Shape of y_train : ',y_train.shape)
print('Shape of y_test : ',y_test.shape)

Shape of X_train :  (404, 13)
Shape of X_test :  (102, 13)
Shape of y_train :  (404,)
Shape of y_test :  (102,)


In [13]:
linear_regression = LinearRegression()
decision_tree = DecisionTreeRegressor()
knn = KNeighborsRegressor()

linear_regression.fit(X_train, y_train)
decision_tree.fit(X_train, y_train)
knn.fit(X_train, y_train)

In [14]:
y_pred_linear_regression = linear_regression.predict(X_test)
y_pred_decision_tree = decision_tree.predict(X_test)
y_pred_knn = knn.predict(X_test)

In [15]:
print("R2 Score for Linear Regression: ", r2_score(y_test, y_pred_linear_regression))
print("R2 Score for Decision Tree: ", r2_score(y_test, y_pred_decision_tree))
print("R2 Score for KNN: ", r2_score(y_test, y_pred_knn))

R2 Score for Linear Regression:  0.6592466510354096
R2 Score for Decision Tree:  0.42996644449679977
R2 Score for KNN:  0.5475962186976784


## Bagging Regressor

In [17]:
bag_regressor = BaggingRegressor(random_state=1)
bag_regressor.fit(X_train, y_train)

In [18]:
y_pred_bagging_regressor = bag_regressor.predict(X_test)

print('Training Coefficient of R2 : %.3f'%bag_regressor.score(X_train, y_train))
print('Test Coefficient of R2 : %.3f'%bag_regressor.score(X_test, y_test))

Training Coefficient of R2 : 0.980
Test Coefficient of R2 : 0.818


In [20]:
%%time

n_samples = X_train.shape[0]
n_features = X_train.shape[1]

params = {
    'estimator': [None, LinearRegression(), KNeighborsRegressor()],
    'n_estimators': [20,50,100],
    'max_samples': [0.5,1.0],
    'max_features': [0.5,1.0],
    'bootstrap': [True, False],
    'bootstrap_features': [True, False]
}

bagging_regressor_grid = GridSearchCV(BaggingRegressor(random_state=1, n_jobs=-1), param_grid =params, cv=3, n_jobs=-1, verbose=1)
bagging_regressor_grid.fit(X_train, y_train)

print('Train R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(X_train, y_train))
print('Test R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(X_test, y_test))
print('Best R^2 Score Through Grid Search : %.3f'%bagging_regressor_grid.best_score_)
print('Best Parameters : ',bagging_regressor_grid.best_params_)

Fitting 3 folds for each of 144 candidates, totalling 432 fits
Train R^2 Score : 0.983
Test R^2 Score : 0.805
Best R^2 Score Through Grid Search : 0.871
Best Parameters :  {'bootstrap': True, 'bootstrap_features': False, 'estimator': None, 'max_features': 1.0, 'max_samples': 1.0, 'n_estimators': 50}
CPU times: user 1.8 s, sys: 168 ms, total: 1.97 s
Wall time: 1min 17s
