# Decision Tree Regression

In [None]:
from sklearn import tree

In [None]:
X = [[0, 0], [3,3]]
y = [0.75, 3]

In [None]:
tree_reg = tree.DecisionTreeRegressor(random_state=42)

In [None]:
tree_reg = tree_reg.fit(X, y)

In [None]:
tree_reg.predict([[1.5, 1.5]])

In [None]:
# Import the necessary modules and libraries
import numpy as np
from sklearn.tree import DecisionTreeRegressor
import matplotlib.pyplot as plt

# Create a random dataset
rng = np.random.RandomState(1)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.rand(16))

# Fit regression model
regr_1 = DecisionTreeRegressor(max_depth=2)
regr_2 = DecisionTreeRegressor(max_depth=5)
regr_1.fit(X, y)
regr_2.fit(X, y)

# Predict
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
y_1 = regr_1.predict(X_test)
y_2 = regr_2.predict(X_test)

# Plot the results
plt.figure(figsize=(10,8))
plt.scatter(X, y, s=20, edgecolor="black",
            c="darkorange", label="data")
plt.plot(X_test, y_1, color="cornflowerblue",
         label="max_depth=2", linewidth=2)
plt.plot(X_test, y_2, color="yellowgreen", label="max_depth=5", linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Decision Tree Regression")
plt.legend()
plt.show()

In [None]:
dot_data = tree.export_graphviz(regr_1, out_file=None,
                                filled=True)
graph = graphviz.Source(dot_data)
graph

In [None]:
dot_data = tree.export_graphviz(regr_2, out_file=None,
                                filled=True)
graph = graphviz.Source(dot_data)
graph

## Regularization

In [None]:
# Import the necessary modules and libraries
import numpy as np
from sklearn.tree import DecisionTreeRegressor
import matplotlib.pyplot as plt

# Create a random dataset
rng = np.random.RandomState(1)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.rand(16))

# Fit regression model
regr_1 = DecisionTreeRegressor(max_depth=2)
regr_2 = DecisionTreeRegressor(max_depth=5, min_samples_leaf=10)
regr_1.fit(X, y)
regr_2.fit(X, y)

# Predict
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
y_1 = regr_1.predict(X_test)
y_2 = regr_2.predict(X_test)

# Plot the results
plt.figure(figsize=(10,8))
plt.scatter(X, y, s=20, edgecolor="black",
            c="darkorange", label="data")
plt.plot(X_test, y_1, color="cornflowerblue",
         label="max_depth=2", linewidth=2)
plt.plot(X_test, y_2, color="yellowgreen", label="max_depth=5", linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Decision Tree Regression")
plt.legend()
plt.show()

In [None]:
dot_data = tree.export_graphviz(regr_2, out_file=None,
                                filled=True)
graph = graphviz.Source(dot_data)
graph

# Overfitting

In [None]:
from sklearn.datasets import load_iris
from sklearn import tree
iris = load_iris()

In [None]:
X = iris.data[:, 0:2]
y = iris.target
clf = tree.DecisionTreeClassifier(random_state=42)
clf = clf.fit(X, y)

In [None]:
dot_data = tree.export_graphviz(clf, out_file=None,
                                feature_names=iris.feature_names[2:],
                                class_names=iris.target_names,
                                rounded=True,
                                filled=True)

In [None]:
graph = graphviz.Source(dot_data)
graph

# Modelling End-to-End with Decision Tree

In [None]:
from sklearn.datasets import make_moons

In [None]:
X_data, y_data = make_moons(n_samples=1000, noise=0.5, random_state=42)

In [None]:
cl1 = tree.DecisionTreeClassifier(random_state=42)
cl2 = tree.DecisionTreeClassifier(min_samples_leaf=10, random_state=42)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.1, random_state=42)

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
#params = {'max_leaf_nodes': list(range(2, 50)),
#          'min_samples_split': [2, 3, 4],
#          'min_samples_leaf': list(range(5, 20))}

params ={'min_samples_leaf': list(range(5, 20))}

In [None]:
grid_search_cv = GridSearchCV(tree.DecisionTreeClassifier(random_state=42), params, n_jobs=-1, verbose=1)

grid_search_cv.fit(X_train, y_train)

In [None]:
grid_search_cv.best_estimator_

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
y_pred = grid_search_cv.predict(X_test)
accuracy_score(y_test, y_pred)

In [None]:
cl1.fit(X_train, y_train)
y_pred = cl1.predict(X_test)
accuracy_score(y_test, y_pred)

In [None]:
cl1.fit(X_train, y_train)
y_pred = cl2.predict(X_test)
accuracy_score(y_test, y_pred)

In [None]:
cl1.get_params()