In [5]:
import pandas as pd
import numpy as np

from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import r2_score
from sklearn.datasets import fetch_california_housing  

# ðŸ”¹ load the dataset
housing = fetch_california_housing()

# ðŸ”¹ create DataFrame
df = pd.DataFrame(housing.data, columns=housing.feature_names)
df["Target"] = housing.target


In [6]:
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,Target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [10]:
X = df.drop("Target", axis=1)
y = df["Target"]

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
rt = DecisionTreeRegressor(criterion="squared_error", random_state=42)

In [15]:
rt.fit(X_train, y_train)

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [17]:
y_pred = rt.predict(X_test)

In [18]:
y_pred

array([0.414  , 1.203  , 5.00001, ..., 5.00001, 0.66   , 2.172  ],
      shape=(4128,))

In [19]:
r2_score(y_test, y_pred)

0.622075845135081

In [20]:
param_grid = {
    "criterion": ["squared_error", "friedman_mse"],
    "max_depth": [None, 5, 10, 20, 30],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4]
}


In [21]:
reg = GridSearchCV(DecisionTreeRegressor(), param_grid=param_grid)

In [22]:
reg.fit(X_train,y_train)

0,1,2
,estimator,DecisionTreeRegressor()
,param_grid,"{'criterion': ['squared_error', 'friedman_mse'], 'max_depth': [None, 5, ...], 'min_samples_leaf': [1, 2, ...], 'min_samples_split': [2, 5, ...]}"
,scoring,
,n_jobs,
,refit,True
,cv,
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,4
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV

dt = DecisionTreeRegressor(random_state=42)

param_grid = {
    "criterion": ["squared_error", "absolute_error"],
    "max_depth": [2, 4, 8, 10, None],
    "max_features": [0.25, 0.5, 1.0],
    "min_samples_split": [2, 5, 10, 0.1, 0.2],
    "min_samples_leaf": [1, 2, 4]
}

grid = GridSearchCV(
    estimator=dt,
    param_grid=param_grid,
    cv=5,
    scoring="r2",
    n_jobs=-1
)

grid.fit(X_train, y_train)

In [None]:
reg.best_score_

In [None]:
reg.best_params_

In [None]:
for importance, name in sorted(
    zip(rt.feature_importances_, X_train.columns),
    reverse=True
):
    print(name, importance)