In [651]:
import numpy as np
from sklearn.datasets import fetch_california_housing

california_housing = fetch_california_housing(as_frame=True).frame
california_housing = california_housing
target_column = "MedHouseVal"
target = california_housing[target_column]
data = california_housing.drop(columns=[target_column, "Population", "AveOccup", "AveBedrms", "HouseAge", "AveRooms"])

In [543]:
from sklearn.model_selection import train_test_split

data_train, data_test, target_train, target_test = train_test_split(data, target, test_size=0.25, random_state=42)

In [661]:
from sklearn import ensemble

model = ensemble.ExtraTreesRegressor(
    random_state=42,
    n_jobs=-1,
    min_samples_split=5,
    max_features="sqrt",
    )
_ = model.fit(data_train, target_train)

In [662]:
import pandas as pd

pd.DataFrame(model.feature_importances_,
             index = data_train.columns,
             columns = ["importance"]).sort_values(
    "importance",
    ascending = False)

Unnamed: 0,importance
MedInc,0.481876
Longitude,0.281393
Latitude,0.236731


In [663]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import max_error

print("RMSE : "+str(mean_squared_error(target_test, model.predict(data_test), squared=False)))
print("MAE : "+str(mean_absolute_error(target_test, model.predict(data_test))))
print(max_error(target_test, model.predict(data_test)))

print("R2 : "+str(model.score(data_test, target_test)*100))

RMSE : 0.4845599984529276
MAE : 0.32064062386627923
3.5955091666666665
R2 : 82.25550336494842
