# DECISION TREE REGRESSOR

### Importar Bibliotecas

In [1]:
import pandas as pd
import numpy as np
from sklearn import metrics as mt
from sklearn import tree as tr
import warnings

warnings.filterwarnings("ignore")

### Importar bases de dados

In [2]:
x_train = pd.read_csv('../datasets/regressao/X_training.csv')
y_train = pd.read_csv('../datasets/regressao/y_training.csv')
x_valid = pd.read_csv('../datasets/regressao/X_validation.csv')
y_valid = pd.read_csv('../datasets/regressao/y_val.csv')
x_test = pd.read_csv('../datasets/regressao/X_test.csv')
y_test = pd.read_csv('../datasets/regressao/y_test.csv')

### TREINAMENTO

In [3]:
# Treinamento
tree_reg = tr.DecisionTreeRegressor()
tree_reg.fit(x_train, y_train)

# Previsão
y_pred = tree_reg.predict( x_train )

# R2
r2_t = mt.r2_score(y_train, y_pred )
print( 'R2: {:.3f}'.format( r2_t ) )

# MSE
mse_t = mt.mean_squared_error(y_train, y_pred )
print( 'MSE: {:.3f}'.format( mse_t ) )

# RMSE
rmse_t = (np.sqrt( mse_t ))
print( 'RMSE: {:.3f}'.format( rmse_t ) )

# MAE
mae_t = mt.mean_absolute_error(y_train, y_pred )
print( 'MAE: {:.3f}'.format( mae_t ) )

# MAPE
mape_t = mt.mean_absolute_percentage_error(y_train, y_pred )
print( 'MAPE: {:.3f}'.format( mape_t ) )

R2: 0.992
MSE: 3.940
RMSE: 1.985
MAE: 0.214
MAPE: 0.083


### VALIDAÇÃO

In [4]:
# Treinamento
tree_reg = tr.DecisionTreeRegressor()
tree_reg.fit(x_train, y_train)

# Previsão
y_pred_valid = tree_reg.predict( x_valid )

# R2
r2_v = mt.r2_score(y_valid, y_pred_valid )
print( 'R2: {:.3f}'.format( r2_v ) )

# MSE
mse_v = mt.mean_squared_error(y_valid, y_pred_valid )
print( 'MSE: {:.3f}'.format( mse_v ) )

# RMSE
rmse_v = (np.sqrt( mse_v ))
print( 'RMSE: {:.3f}'.format( rmse_v ) )

# MAE
mae_v = mt.mean_absolute_error(y_valid, y_pred_valid )
print( 'MAE: {:.3f}'.format( mae_v ) )

# MAPE
mape_v = mt.mean_absolute_percentage_error(y_valid, y_pred_valid )
print( 'MAPE: {:.3f}'.format( mape_v ) )

R2: -0.293
MSE: 617.528
RMSE: 24.850
MAE: 17.141
MAPE: 6.880


### TESTE

In [5]:
# Treinamento
tree_reg = tr.DecisionTreeRegressor(max_depth=50)
tree_reg.fit(pd.concat([x_train, x_valid], axis=0), pd.concat([y_train, y_valid], axis=0))

# Previsão
y_pred_test = tree_reg.predict( x_test )

# R2
r2_test = mt.r2_score(y_test, y_pred_test )
print( 'R2: {:.3f}'.format( r2_test ) )

# MSE
mse_test = mt.mean_squared_error(y_test, y_pred_test )
print( 'MSE: {:.3f}'.format( mse_test ) )

# RMSE
rmse_test = (np.sqrt( mse_test ))
print( 'RMSE: {:.3f}'.format( rmse_test ) )

# MAE
mae_test = mt.mean_absolute_error(y_test, y_pred_test )
print( 'MAE: {:.3f}'.format( mae_test ) )

# MAPE
mape_test = mt.mean_absolute_percentage_error(y_test, y_pred_test )
print( 'MAPE: {:.3f}'.format( mape_test ) )



R2: -0.150
MSE: 560.107
RMSE: 23.667
MAE: 15.695
MAPE: 6.091


# SALVANDO RESULTADOS

In [6]:
train_metrics = {
    "Algoritmo": "Decision Tree Regressor",
    "R-Squared": np.round(r2_t, 3),
    "MSE": np.round(mse_t, 3),
    "RMSE": np.round(rmse_t, 3),
    "MAE": np.round(mae_t, 3),
    "MAPE": np.round(mape_t, 3),
}
validation_metrics = {
    "Algoritmo": "Decision Tree Regressor",
    "R-Squared": np.round(r2_v, 3),
    "MSE": np.round(mse_v, 3),
    "RMSE": np.round(rmse_v, 3),
    "MAE": np.round(mae_v, 3),
    "MAPE": np.round(mape_v, 3),
}
test_metrics = {
    "Algoritmo": "Decision Tree Regressor",
    "R-Squared": np.round(r2_test, 3),
    "MSE": np.round(mse_test, 3),
    "RMSE": np.round(rmse_test, 3),
    "MAE": np.round(mae_test, 3),
    "MAPE": np.round(mape_test, 3),
}

pd.DataFrame(train_metrics, index=[0]).to_csv("./reg_train_metrics.csv", mode="a", header=False)
pd.DataFrame(validation_metrics, index=[0]).to_csv("./reg_validation_metrics.csv", mode="a", header=False)
pd.DataFrame(test_metrics, index=[0]).to_csv("./reg_test_metrics.csv", mode="a", header=False)