# Trees

In [1]:
import sys

sys.path.insert(1, "..")

from classic.model.tree import DecisionTreeRegressor, DecisionTreeClassifier
from classic.model.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.datasets import load_diabetes, load_digits
from sklearn import tree, ensemble
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split

In [2]:
dataset = load_diabetes()
X, y = dataset['data'], dataset['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train[:2]

array([[ 0.01991321, -0.04464164,  0.00457217,  0.04597234, -0.01808039,
        -0.05454912,  0.06336665, -0.03949338,  0.02865846,  0.06105391],
       [-0.09996055, -0.04464164, -0.06764124, -0.10895595, -0.07449446,
        -0.07271173,  0.01550536, -0.03949338, -0.04987245, -0.00936191]])

## 1. DecisionTreeRegressor

In [3]:
dtr = DecisionTreeRegressor(max_depth=3, min_samples_split=5)
dtr.fit(X_train, y_train)
pred = dtr.predict(X_test)
print(f"Decision Tree Regressor mse = {mean_squared_error(y_test, pred)}")

Decision Tree Regressor mse = 4218.546797954536


In [4]:
dtr_sklearn = tree.DecisionTreeRegressor(max_depth=3, min_samples_split=5)
dtr_sklearn.fit(X_train, y_train)
pred = dtr_sklearn.predict(X_test)
print(f"Decision Tree Regressor (Sklearn) mse = {mean_squared_error(y_test, pred)}")

Decision Tree Regressor (Sklearn) mse = 4192.416768542552


# 2. DecisionTreeClassifier

In [5]:
dataset = load_digits()
X, y = dataset['data'], dataset['target']
X = X.reshape(-1, 64)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train[:2]

array([[ 0.,  0.,  5., 13., 15.,  6.,  0.,  0.,  0.,  2., 16.,  9., 16.,
        13.,  0.,  0.,  0.,  4., 14.,  0., 10., 16.,  2.,  0.,  0.,  4.,
        15., 11., 15., 16.,  1.,  0.,  0.,  0.,  7., 10.,  3., 13.,  8.,
         0.,  0.,  0.,  3.,  0.,  0., 12.,  5.,  0.,  0.,  0., 13., 11.,
         4., 16.,  4.,  0.,  0.,  0.,  7., 14., 16., 11.,  1.,  0.],
       [ 0.,  1.,  8., 11., 13., 12.,  0.,  0.,  0.,  5.,  9.,  0.,  4.,
        16.,  1.,  0.,  0.,  7.,  5.,  0.,  5., 16.,  5.,  0.,  0.,  1.,
        13., 11., 13.,  6.,  8.,  0.,  0.,  0.,  3.,  4.,  1.,  4.,  8.,
         0.,  0.,  0.,  0.,  0.,  0.,  7.,  5.,  0.,  0.,  2.,  8.,  1.,
         2., 14.,  0.,  0.,  0.,  0.,  8., 13., 11.,  3.,  0.,  0.]])

In [6]:
dtc = DecisionTreeClassifier(num_classes=10, max_depth=10, min_samples_split=5)
dtc.fit(X_train, y_train)
pred = dtc.predict(X_test)
pred_ = pred.argmax(axis=1)
print(f"Accuracy = {accuracy_score(y_test, pred_)}")

Accuracy = 0.8444444444444444


In [7]:
dtc = tree.DecisionTreeClassifier(max_depth=10, min_samples_split=5)
dtc.fit(X_train, y_train)
pred = dtc.predict(X_test)
print(f"Accuracy (Sklearn) = {accuracy_score(y_test, pred)}")

Accuracy (Sklearn) = 0.837037037037037


## 3. RandomForestClassifier

In [8]:
rfc = RandomForestClassifier(
    num_classes=10, max_depth=10, min_samples_split=5, colsample_bynode=0.5
)
rfc.fit(X_train, y_train)
pred = rfc.predict(X_test)
pred_ = pred.argmax(axis=1)
print(f"Accuracy = {accuracy_score(y_test, pred_)}")

Accuracy = 0.9611111111111111


In [9]:
rfc = ensemble.RandomForestClassifier(
    max_depth=10, min_samples_split=5, max_features=0.5
)
rfc.fit(X_train, y_train)
pred = rfc.predict(X_test)
print(f"Accuracy (Sklearn) = {accuracy_score(y_test, pred)}")

Accuracy (Sklearn) = 0.9592592592592593


## 4. RandomForestRegressor

In [10]:
dataset = load_diabetes()
X, y = dataset['data'], dataset['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train[:2]

array([[-0.09269548, -0.04464164, -0.0816528 , -0.05731319, -0.06073493,
        -0.0680145 ,  0.0486401 , -0.0763945 , -0.0664902 , -0.02178823],
       [ 0.01628068,  0.05068012,  0.01427248,  0.00121528,  0.00118295,
        -0.02135538, -0.03235593,  0.03430886,  0.07496573,  0.04034337]])

In [11]:
rfr = RandomForestRegressor(max_depth=3, min_samples_split=5, colsample_bynode=0.5)
rfr.fit(X_train, y_train)
pred = rfr.predict(X_test)
print(f"Random Forest Regressor mse = {mean_squared_error(y_test, pred)}")

Random Forest Regressor mse = 3226.7848207503025


In [12]:
rfr = ensemble.RandomForestRegressor(max_depth=3, min_samples_split=5, max_features=0.5)
rfr.fit(X_train, y_train)
pred = rfr.predict(X_test)
print(f"Random Forest Regressor (Sklearn) mse = {mean_squared_error(y_test, pred)}")

Random Forest Regressor (Sklearn) mse = 3199.318284368496
