# Trees

In [1]:
from classic.model.tree import DecisionTreeRegressor, DecisionTreeClassifier
from classic.model.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.datasets import load_diabetes, load_digits
from sklearn import tree, ensemble
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split

In [2]:
dataset = load_diabetes()
X, y = dataset['data'], dataset['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train[:2]

array([[ 0.07803383,  0.05068012, -0.02452876, -0.04240564,  0.00668676,
         0.05286081, -0.06917231,  0.08080427, -0.03712884,  0.0569118 ],
       [ 0.0090156 , -0.04464164, -0.01267283,  0.02875809, -0.01808039,
        -0.00507166, -0.04708248,  0.03430886,  0.02337142, -0.0052198 ]])

## 1. DecisionTreeRegressor

In [3]:
dtr = DecisionTreeRegressor(max_depth=3, min_samples_split=5)
dtr.fit(X_train, y_train)
pred = dtr.predict(X_test)
print(f"Decision Tree Regressor mse = {mean_squared_error(y_test, pred)}")

Decision Tree Regressor mse = 4438.447053432027


In [4]:
dtr_sklearn = tree.DecisionTreeRegressor(max_depth=3, min_samples_split=5)
dtr_sklearn.fit(X_train, y_train)
pred = dtr_sklearn.predict(X_test)
print(f"Decision Tree Regressor (Sklearn) mse = {mean_squared_error(y_test, pred)}")

Decision Tree Regressor (Sklearn) mse = 4378.882662412793


# 2. DecisionTreeClassifier

In [5]:
dataset = load_digits()
X, y = dataset['data'], dataset['target']
X = X.reshape(-1, 64)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train[:2]

array([[ 0.,  4., 13., 16., 16., 12.,  3.,  0.,  0.,  3.,  7.,  4., 13.,
        16.,  6.,  0.,  0.,  0.,  0.,  8., 15.,  5.,  0.,  0.,  0.,  0.,
         0., 12.,  8.,  0.,  0.,  0.,  0.,  0.,  0.,  7., 12.,  0.,  0.,
         0.,  0.,  0.,  0.,  4., 12.,  0.,  0.,  0.,  0.,  1.,  7., 12.,
        11.,  0.,  0.,  0.,  0.,  3., 15., 12.,  2.,  0.,  0.,  0.],
       [ 0.,  6., 16., 16., 16., 15., 10.,  0.,  0.,  9., 16., 13.,  8.,
         6.,  5.,  0.,  0., 12., 16.,  1.,  0.,  0.,  0.,  0.,  0., 10.,
        16.,  7.,  0.,  0.,  0.,  0.,  0.,  3., 15., 15.,  3.,  0.,  0.,
         0.,  0.,  0.,  3., 16., 13.,  0.,  0.,  0.,  0.,  1.,  5., 16.,
         9.,  0.,  0.,  0.,  0.,  9., 16., 11.,  0.,  0.,  0.,  0.]])

In [6]:
dtc = DecisionTreeClassifier(num_classes=10, max_depth=10, min_samples_split=5)
dtc.fit(X_train, y_train)
pred = dtc.predict(X_test)
pred_ = pred.argmax(axis=1)
print(f"Accuracy = {accuracy_score(y_test, pred_)}")

Accuracy = 0.8518518518518519


In [7]:
dtc = tree.DecisionTreeClassifier(max_depth=10, min_samples_split=5)
dtc.fit(X_train, y_train)
pred = dtc.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, pred)}")

Accuracy = 0.8444444444444444


## 3. RandomForestClassifier

In [8]:
rfc = RandomForestClassifier(
    num_classes=10, max_depth=10, min_samples_split=5, colsample_bynode=0.5
)
rfc.fit(X_train, y_train)
pred = rfc.predict(X_test)
pred_ = pred.argmax(axis=1)
print(f"Accuracy = {accuracy_score(y_test, pred_)}")

Accuracy = 0.9666666666666667


In [9]:
rfc = ensemble.RandomForestClassifier(
    max_depth=10, min_samples_split=5, max_features=0.5
)
rfc.fit(X_train, y_train)
pred = rfc.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, pred)}")

Accuracy = 0.9629629629629629


## 4. RandomForestRegressor

In [10]:
dataset = load_diabetes()
X, y = dataset['data'], dataset['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train[:2]

array([[ 0.0090156 , -0.04464164, -0.01267283,  0.02875809, -0.01808039,
        -0.00507166, -0.04708248,  0.03430886,  0.02337142, -0.0052198 ],
       [-0.07453279, -0.04464164,  0.0433734 , -0.03321323,  0.01219057,
         0.00025186,  0.06336665, -0.03949338, -0.02712902, -0.04664087]])

In [11]:
rfr = RandomForestRegressor(max_depth=3, min_samples_split=5, colsample_bynode=0.5)
rfr.fit(X_train, y_train)
pred = rfr.predict(X_test)
print(f"Random Forest Regressor mse = {mean_squared_error(y_test, pred)}")

Random Forest Regressor mse = 3649.1340157616573


In [12]:
rfr = ensemble.RandomForestRegressor(max_depth=3, min_samples_split=5, max_features=0.5)
rfr.fit(X_train, y_train)
pred = rfr.predict(X_test)
print(f"Sklearn Random Forest Regressor mse = {mean_squared_error(y_test, pred)}")

Sklearn Random Forest Regressor mse = 3593.775873269387
