In [2]:
from sklearn.datasets import fetch_california_housing

# Load the dataset
housing = fetch_california_housing()

# Separate features and target
X, y = housing.data, housing.target

# Print dataset information
print('Dataset feature names:', housing.feature_names)
print('Dataset features shape:', X.shape)
print('Dataset target shape:', y.shape)

Dataset feature names: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
Dataset features shape: (20640, 8)
Dataset target shape: (20640,)


In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test,y_train, y_test = train_test_split(X, y, train_size = 0.80, random_state = 123)
print('Train/Test Sets Sizes : ', X_train.shape, X_test.shape, y_train.shape, y_test.shape)

Train/Test Sets Sizes :  (16512, 8) (4128, 8) (16512,) (4128,)


In [5]:
lr = LinearRegression()
dt = DecisionTreeRegressor()
knn = KNeighborsRegressor()

In [6]:
lr.fit(X_train, y_train)
dt.fit(X_train, y_train)
knn.fit(X_train, y_train)

In [7]:
KNeighborsRegressor(algorithm = 'auto', leaf_size = 30, metric = 'minkowski', metric_params = None, n_jobs = None, n_neighbors = 5, p = 2, weights = 'uniform')

In [8]:
y_pred1 = lr.predict(X_test)
y_pred2 = dt.predict(X_test)
y_pred3 = knn.predict(X_test)

In [11]:
print("R^2 score for LR",r2_score(y_test,y_pred1))
print("R^2 score for DT",r2_score(y_test,y_pred2))
print("R^2 score for KNN",r2_score(y_test,y_pred3))

R^2 score for LR 0.6104546894797869
R^2 score for DT 0.5961847645184306
R^2 score for KNN 0.16261917827057237


In [13]:
from sklearn.ensemble import BaggingRegressor

bag_regressor = BaggingRegressor(random_state = 1)
bag_regressor.fit(X_train, y_train)

In [14]:
BaggingRegressor(estimator = None, bootstrap = True, bootstrap_features = False, max_features = 1.0, max_samples = 1.0, n_estimators = 10, n_jobs = None, oob_score = False, random_state = 1, verbose = 0, warm_start = False)

In [16]:
y_preds = bag_regressor.predict(X_test)

print('Training Coefficient of R^2 : %.3f'%bag_regressor.score(X_train, y_train))
print('Test Coefficient of R^2 : %.3f'%bag_regressor.score(X_test, y_test))

Training Coefficient of R^2 : 0.963
Test Coefficient of R^2 : 0.792


In [21]:
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import BaggingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
import warnings
warnings.filterwarnings("ignore")

# Load California housing dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Define hyperparameter grid
params = {
    'estimator': [None, LinearRegression(), KNeighborsRegressor()],
    'n_estimators': [20, 50, 100],
    'max_samples': [0.5, 1.0],
    'max_features': [0.5, 1.0],
    'bootstrap': [True, False],
    'bootstrap_features': [True, False]
}

# GridSearchCV with 3-fold CV
bagging_regressor_grid = GridSearchCV(
    BaggingRegressor(random_state=1, n_jobs=-1),
    param_grid=params,
    cv=3,
    n_jobs=-1,
    verbose=1
)

# Fit the model
bagging_regressor_grid.fit(X_train, y_train)

# Print results
print('Train R^2 Score : %.3f' % bagging_regressor_grid.best_estimator_.score(X_train, y_train))
print('Test R^2 Score : %.3f' % bagging_regressor_grid.best_estimator_.score(X_test, y_test))
print('Best R^2 Score Through Grid Search : %.3f' % bagging_regressor_grid.best_score_)
print('Best Parameters :', bagging_regressor_grid.best_params_)


Fitting 3 folds for each of 144 candidates, totalling 432 fits
Train R^2 Score : 1.000
Test R^2 Score : 0.809
Best R^2 Score Through Grid Search : 0.801
Best Parameters : {'bootstrap': False, 'bootstrap_features': True, 'estimator': None, 'max_features': 1.0, 'max_samples': 1.0, 'n_estimators': 100}


In [22]:
bagging_regressor_grid.best_estimator_