In [83]:
import numpy as np
from sklearn.metrics import mean_squared_error
from ml_from_scratch.tree import DecisionTreeRegressor
import pandas as pd

In [57]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

# LOAD DATA
digits = load_digits()
X = digits.data
y = digits.target

array([0, 1, 2, ..., 8, 9, 8])

In [58]:
# SPLIT DATA

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    stratify=y,
                                                    random_state=3)

In [59]:
# REGRESSION - A Very Fit Tree
# Create a decision tree regressor
# Using hyperparameter custom criteria with default max depth
clf_mse = DecisionTreeRegressor(criteria='squared_error') # Menggunakan MSE
clf_mae = DecisionTreeRegressor(criteria='absolute_error') # Menggunakan MAE
clf_mse.fit(X_train, y_train)
clf_mae.fit(X_train, y_train)

In [96]:
# Predict & Calculate MSE score test
y_pred_train_mse = clf_mse.predict(X_train)
y_pred_test_mse = clf_mse.predict(X_test)

y_pred_train_mae = clf_mae.predict(X_train)
y_pred_test_mae = clf_mae.predict(X_test)

print(f"MSE train using MSE Criteria : {round(mean_squared_error(y_train, y_pred_train_mse),5)}")
print(f"MSE test using MSE Criteria : {round(mean_squared_error(y_test, y_pred_test_mse),5)}")
print("")
print("")
print(f"MSE train using MAE Criteria : {round(mean_squared_error(y_train, y_pred_train_mae),5)}")
print(f"MSE test using MAE Criteria : {round(mean_squared_error(y_test, y_pred_test_mae),5)}")

MSE train using MSE Criteria : 0.00955
MSE test using MSE Criteria : 3.62315


MSE train using MAE Criteria : 0.00955
MSE test using MAE Criteria : 3.62315


In [95]:
# Define parameter possibilities as lists
p_criterion = ['squared_error', 'absolute_error']
p_max_depth = [1,3,5,7,9,12]
# Define place holder for score
results = []

# Nested loops - we need to test for all combinations
for criterion in p_criterion:
    for max_depth in p_max_depth:
        # Train the model
        model = DecisionTreeRegressor(
            criteria=criterion,
            max_depth=max_depth
        )
        model.fit(X_train, y_train)
        trains = model.predict(X_train)
        preds = model.predict(X_test)
        # Append current results
        results.append({
            'MSE train': round(mean_squared_error(y_train, trains), 5),
            'MSE test': round(mean_squared_error(y_test, preds), 5),
            'P_Criterion': criterion,
            'P_MaxDepth': max_depth
        })
        
results = pd.DataFrame(results)
results = results.sort_values(by='MSE test', ascending=True)
results

Unnamed: 0,MSE train,MSE test,P_Criterion,P_MaxDepth
3,0.84296,3.04851,squared_error,7
4,0.16479,3.33867,squared_error,9
2,2.46908,3.4005,squared_error,5
5,0.02413,3.5557,squared_error,12
10,0.55902,3.97402,absolute_error,9
9,1.43537,3.99471,absolute_error,7
11,0.19532,4.10023,absolute_error,12
8,3.00876,4.4168,absolute_error,5
1,5.12117,5.60433,squared_error,3
7,5.21733,5.68392,absolute_error,3
