In [16]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, GridSearchCV

import warnings
warnings.filterwarnings("ignore")

In [2]:
X, y = make_regression(n_features=10, n_samples=1000, n_targets=1, noise=4)

In [3]:
X

array([[-2.20870201e-02, -1.50312583e+00, -8.16356476e-01, ...,
         1.09967903e+00, -3.06387364e-01,  5.62482151e-01],
       [ 1.82289041e+00, -2.04202807e+00,  2.83927394e-01, ...,
         2.97219499e-01,  1.43591124e+00, -1.01997836e+00],
       [ 1.20506984e-01, -1.27631054e+00,  1.53797617e+00, ...,
        -1.26738871e+00,  2.63189639e-01, -2.16519132e+00],
       ...,
       [-3.06823963e-01, -2.75843842e-01, -6.30388973e-01, ...,
        -1.14950572e+00, -2.08852289e+00,  1.11432008e+00],
       [ 1.36242423e-01, -1.54932331e+00, -5.33923791e-01, ...,
         4.69817782e-03, -4.32776499e-01,  2.06942027e+00],
       [ 2.10387749e+00, -1.63232386e-02, -1.10771771e+00, ...,
         8.65639307e-02, -6.70293071e-04, -3.59047162e-01]])

In [6]:
y

array([ 1.49376418e+02,  7.13015115e+01, -1.30161057e+02,  2.29992092e+02,
        1.14674359e+02,  1.86932283e+01,  1.15320729e+02, -1.16816420e+02,
       -4.00235118e+02,  3.84419238e+02, -2.94262916e+00, -2.58673373e+01,
       -2.24129288e+01, -9.21120580e+01,  1.08151571e+02, -2.24397247e+02,
       -3.44540077e+02, -1.79533722e+02, -5.74375369e+01, -1.86537797e+02,
       -1.97525670e+02,  1.34959563e+02,  1.84662318e+02, -1.67025387e+02,
       -9.24815677e+01, -1.48642242e+02,  1.50543693e+02,  2.33512592e+02,
       -1.30095060e+02, -9.45762617e+01, -3.10223184e+02,  8.65193438e+01,
        9.94959476e+01,  2.13295672e+02, -1.96628178e+02, -7.69963627e+01,
        1.63911730e+02, -1.52351490e+02,  2.42051883e+02, -2.80272416e+02,
       -1.23327748e+02, -1.75035814e+02, -2.51670823e+02,  3.24255262e+02,
        2.29079743e+02,  1.25347330e+02,  2.02111847e+01, -1.16014123e+02,
        1.80986053e+02, -4.97558295e+01,  4.28592146e+01, -6.94191042e+01,
       -2.67662454e+02,  

In [7]:
X_tr, X_te, y_tr, y_te = train_test_split(X, y, random_state=234, train_size=0.75)

In [8]:
X_tr.shape

(750, 10)

In [9]:
y_tr.shape

(750,)

In [10]:
model = DecisionTreeRegressor()

In [11]:
model.fit(X_tr, y_tr)

In [12]:
y_pr = model.predict(X_te)

In [13]:
rmse = np.sqrt(mean_squared_error(y_te, y_pr))
rmse

145.1332708423711

In [15]:
r2_score(y_te, y_pr)

0.40032718680285584

In [18]:
param_list = {
    "max_depth":[1, 2, 3, 4, 5, 6],
    "min_samples_split": [2, 3, 4, 1] ,
    "max_features":[3, 4, 5, 6, 7]
}

In [19]:
gscv = GridSearchCV(estimator=DecisionTreeRegressor(), param_grid=param_list, n_jobs=-1, verbose=True)

In [20]:
gscv.fit(X_tr, y_tr)

Fitting 5 folds for each of 120 candidates, totalling 600 fits


In [21]:
gscv.best_params_

{'max_depth': 6, 'max_features': 7, 'min_samples_split': 2}

In [22]:
m = DecisionTreeRegressor(max_depth=6, max_features=7, min_samples_split=2)
m.fit(X_tr, y_tr)
pr = m.predict(X_te)
r2_score(y_te, pr)

0.32569449297596154