# Decision Tree Regressor

### Import Libraries

In [2]:
import pandas as pd
import numpy  as np

from sklearn import model_selection as ms
from sklearn import tree            as tr
from sklearn import metrics         as mt

### Load Dataset

In [3]:
X_train = pd.read_csv('Training/X_training.csv')
y_train = pd.read_csv('Training/y_training.csv')
X_val = pd.read_csv('Validation/X_val.csv')
y_val = pd.read_csv('Validation/y_val.csv')
X_test = pd.read_csv('Test/X_test.csv')
y_test = pd.read_csv('Test/y_test.csv')

### Training

In [12]:
m = np.arange( 1, 41, 1)
mse_list = []
max_r2 = 0
min_mse = float('inf')
min_rmse = float('inf')
min_mae = float('inf')
min_mape = float('inf')

for i in m:

    # Define
    model = tr.DecisionTreeRegressor(max_depth=i)

    # Fit
    model.fit(X_train, y_train)

    # Predict
    yhat_train = model.predict(X_train)

    # R²
    r2_train = np.round(mt.r2_score(y_train, yhat_train), 3)
    if r2_train > max_r2:
        max_r2 = r2_train
    
    # MSE
    mse_train = np.round(mt.mean_squared_error(y_train, yhat_train), 3)
    mse_list.append(mse_train)
    if mse_train < min_mse:
        min_mse = mse_train

    # RMSE
    rmse_train = np.round(np.sqrt(mse_train), 3)
    if rmse_train < min_rmse:
        min_rmse = rmse_train
    
    # MAE
    mae_train = np.round(mt.mean_absolute_error(y_train, yhat_train), 3)
    if mae_train < min_mae:
        min_mae = mae_train
   
    # MAPE
    mape_train = np.round(mt.mean_absolute_percentage_error(y_train, yhat_train), 3)
    if mape_train < min_mape:
        min_mape = mape_train
    

print(f'Max R2: {max_r2}')
print(f'Min MSE: {min_mse}')
print(f'Min RMSE: {min_rmse}')
print(f'Min MAE: {min_mae}')
print(f'Min MAPE: {min_mape}')

Max R2: 0.992
Min MSE: 3.94
Min RMSE: 1.985
Min MAE: 0.214
Min MAPE: 0.083


### Validation

In [11]:
# Best parameters retraining with validation data
best_m = mse_list.index( min ( mse_list ) )

# Define
model = tr.DecisionTreeRegressor(max_depth=m[best_m])

# Fit
model.fit(X_train, y_train)

# Predict
yhat_val = model.predict(X_val)

# R²
r2_val = np.round(mt.r2_score(y_val, yhat_val), 3)

# MSE
mse_val = np.round(mt.mean_squared_error(y_val, yhat_val), 3)

# RMSE
rmse_val = np.round(np.sqrt(mse_val), 3)

# MAE
mae_val = np.round(mt.mean_absolute_error(y_val, yhat_val), 3)

# MAPE
mape_val = np.round(mt.mean_absolute_percentage_error(y_val, yhat_val), 3)
    

print(f'R2: {r2_val}')
print(f'MSE: {mse_val}')
print(f'RMSE: {rmse_val}')
print(f'MAE: {mae_val}')
print(f'MAPE: {mape_val}')

R2: -0.29
MSE: 615.935
RMSE: 24.818
MAE: 17.163
MAPE: 6.942


### Test

In [10]:
# Best parameters retraining with validation data
best_m = mse_list.index( min ( mse_list ) )

# Define
model = tr.DecisionTreeRegressor(max_depth=m[best_m])

# Fit
model.fit(np.concatenate( (X_train, X_val)),
          np.concatenate( (y_train, y_val)))

# Predict
y_pred = model.predict(X_test)

# R²
r2_test = np.round(mt.r2_score(y_test, y_pred), 3)

# MSE
mse_test = np.round(mt.mean_squared_error(y_test, y_pred), 3)

# RMSE
rmse_test = np.round(np.sqrt(mse_test), 3)

# MAE
mae_test = np.round(mt.mean_absolute_error(y_test, y_pred), 3)

# MAPE
mape_test = np.round(mt.mean_absolute_percentage_error(y_test, y_pred), 3)
    

print(f'R2: {r2_test}')
print(f'MSE: {mse_test}')
print(f'RMSE: {rmse_test}')
print(f'MAE: {mae_test}')
print(f'MAPE: {mape_test}')

R2: -0.163
MSE: 566.18
RMSE: 23.795
MAE: 15.77
MAPE: 6.02




In [18]:
index =[0]
r2_results = {'index': index,
              'R2 train': max_r2,
              'R2 val': r2_val,
              'R2 test': r2_test,
          }
r2_dataframe = pd.DataFrame(r2_results)
r2_dataframe

Unnamed: 0,index,R2 train,R2 val,R2 test
0,0,0.992,-0.29,-0.163


In [19]:
index =[0]
mse_results = {'index': index,
               'MSE train': min_mse,
               'MSE val': mse_val,
               'MSE test': mse_test, 
           }
mse_dataframe = pd.DataFrame(mse_results)
mse_dataframe

Unnamed: 0,index,MSE train,MSE val,MSE test
0,0,3.94,615.935,566.18


In [20]:
index =[0]
rmse_results = {'index': index,
                'RMSE train': min_rmse,
                'RMSE val': rmse_val,
                'RMSE test': rmse_test,
           }
rmse_dataframe = pd.DataFrame(rmse_results)
rmse_dataframe

Unnamed: 0,index,RMSE train,RMSE val,RMSE test
0,0,1.985,24.818,23.795


In [21]:
index =[0]
mae_results = {'index': index,
               'MAE train': min_mae,
               'MAE val': mae_val,
               'MAE test': mae_test,
           }
mae_dataframe = pd.DataFrame(mae_results)
mae_dataframe

Unnamed: 0,index,MAE train,MAE val,MAE test
0,0,0.214,17.163,15.77


In [22]:
index =[0]
mape_results = {'index': index,
                'MAPE train': min_mape,
                'MAPE val': mape_val,
                'MAPE test': mape_test}
mape_dataframe = pd.DataFrame(mape_results)
mape_dataframe

Unnamed: 0,index,MAPE train,MAPE val,MAPE test
0,0,0.083,6.942,6.02
