In [1]:
import numpy as np
from sklearn.datasets import _california_housing
from sklearn import ensemble
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

data, target = _california_housing.fetch_california_housing(as_frame=True, return_X_y=True)
data = data.drop(columns=["Population", "AveOccup", "MedInc"])

In [2]:
from sklearn.model_selection import train_test_split

data_train, data_test, target_train, target_test = train_test_split(data, target, test_size=0.25, random_state=42)

## Random Forest

In [4]:
model = ensemble.RandomForestRegressor(
    random_state=42,
    )

parameters = {
  'min_samples_leaf': [1, 2, 5, 10],
  'min_samples_split': [2, 5, 10, 15, 100],
  'max_features': [1.0, 'sqrt'],
  'n_estimators': [200, 400, 600]
}

clf = RandomizedSearchCV(estimator=model, param_distributions=parameters, n_jobs=-1)

_ = clf.fit(data_train, target_train)
print(clf.best_params_)
print(clf.score(data_test, target_test)*100)



{'n_estimators': 600}
83.84761760258371


In [6]:
rfr = ensemble.RandomForestRegressor(
    n_estimators = 10,
    n_jobs=-1
)

## AdaBoost

In [7]:
model = ensemble.AdaBoostRegressor(
    random_state=42,
    base_estimator=rfr,
)

parameters = {
    'n_estimators':[10, 50, 100],
    'learning_rate':[0.01,0.1,1]
}

clf = GridSearchCV(estimator=model, param_grid=parameters, n_jobs=-1)

_ = clf.fit(data_train, target_train)
print(clf.best_params_)
print(clf.score(data_test, target_test)*100)

{'learning_rate': 1, 'n_estimators': 100}
84.30546681633243


In [63]:
model = ensemble.AdaBoostRegressor(
    random_state=42,
    base_estimator=rfr,
    n_estimators=100,
    learning_rate=1
)

_ = model.fit(data_train, target_train)

In [65]:
from matplotlib import pyplot as plt
from sklearn import tree

print("R2 : "+str(model.score(data_test, target_test)*100))

R2 : 82.28197691079582
