# Ensaio de Machine Learning #

## 0.0 Import library ##

In [13]:
import pandas                       as pd
import numpy as np
from sklearn import preprocessing   as pp
from sklearn import linear_model    as lm
from sklearn import metrics         as mt


## 1.0 Load dataset ##

In [16]:
x_train = pd.read_csv( '../../dataset/regression/x_training.csv')
y_train = pd.read_csv( '../../dataset/regression/y_training.csv')
x_val = pd.read_csv( '../../dataset/regression/x_validation.csv')
y_val = pd.read_csv( '../../dataset/regression/y_val.csv')
x_test = pd.read_csv( '../../dataset/regression/x_test.csv')
y_test = pd.read_csv( '../../dataset/regression/y_test.csv')

## 2.0 Training model ##

## Dados de treino ##

In [20]:
# Grau do polinômio
d = np.arange( 1, 4)
a = np.arange( 1, 20)
rmse_list = []
max_r2 = 0
min_mse = float('inf')
min_rmse = float('inf')
min_mae = float('inf')
min_mape = float('inf')

for i in d:
    for alpha in a:
        # Define Polynomial Ridge
        poly = pp.PolynomialFeatures( degree=i )
        X_poly_train = poly.fit_transform( x_train )

        # training
        model = lm.Ridge(alpha=alpha, max_iter=1000)
        model.fit( X_poly_train , y_train )

        # performance
        yhat_train = model.predict( X_poly_train )

        # R squared
        r_squared = np.round( mt.r2_score( y_train, yhat_train ), 3 )
        if r_squared > max_r2:
            max_r2 = r_squared
        # MSE
        mse = np.round(mt.mean_squared_error( y_train, yhat_train), 3)
        if mse < min_mse:
            min_mse = mse
        # RMSE
        rmse = np.round(np.sqrt( mse ), 3)
        rmse_list.append( rmse )
        if rmse < min_rmse:
            min_rmse = rmse

        # MAE
        mae = np.round(mt.mean_absolute_error( y_train, yhat_train), 3)
        if mae < min_mae:
            min_mae = mae
        # MAPE
        mape = np.round(mt.mean_absolute_percentage_error( y_train, yhat_train), 3) 
        if mape < min_mape:
            min_mape = mape
    
print('degree {} | alhpa {} | MAX R2: {} | MIN MSE: {} | MIN RMSE: {} | MIN MAE: {} | MIN MAPE: {} |'.format(i, alpha, max_r2, min_mse, min_rmse, min_mae, min_mape ) )

degree 3 | alhpa 19 | MAX R2: 0.135 | MIN MSE: 413.4 | MIN RMSE: 20.332 | MIN MAE: 16.065 | MIN MAPE: 8.037 |


## Dados de validação ##

In [23]:
# Encontrando melhore parâmetros sobre os dados de validação

# Grau do polinômio
d = np.arange( 1, 4)
a = np.arange( 1, 20)
rmse_list = []
max_r2 = 0
min_mse = float('inf')
min_rmse = float('inf')
min_mae = float('inf')
min_mape = float('inf')

for i in d:
    for alpha in a:
        # Define Polynomial Ridge
        poly = pp.PolynomialFeatures( degree=i )
        X_poly_train = poly.fit_transform( x_train )
        X_poly_val = poly.transform( x_val )

        # training
        model = lm.Ridge(alpha=alpha, max_iter=1000)
        model.fit( X_poly_train , y_train )

        # performance
        yhat_val = model.predict( X_poly_val )

        # R squared
        r_squared = np.round( mt.r2_score( y_val, yhat_val ), 3 )
        if r_squared > max_r2:
            max_r2 = r_squared
        # MSE
        mse = np.round(mt.mean_squared_error( y_val, yhat_val), 3)
        if mse < min_mse:
            min_mse = mse
        # RMSE
        rmse = np.round(np.sqrt( mse ), 3)
        rmse_list.append( rmse )
        if rmse < min_rmse:
            min_rmse = rmse

        # MAE
        mae = np.round(mt.mean_absolute_error( y_val, yhat_val), 3)
        if mae < min_mae:
            min_mae = mae
        # MAPE
        mape = np.round(mt.mean_absolute_percentage_error( y_val, yhat_val), 3) 
        if mape < min_mape:
            min_mape = mape
    
print('degree {} | alhpa {} | MAX R2: {} | MIN MSE: {} | MIN RMSE: {} | MIN MAE: {} | MIN MAPE: {} |'.format(i, alpha, max_r2, min_mse, min_rmse, min_mae, min_mape ) )

degree 3 | alhpa 19 | MAX R2: 0.068 | MIN MSE: 445.184 | MIN RMSE: 21.099 | MIN MAE: 16.739 | MIN MAPE: 8.53 |


## Dados de teste ##

In [26]:
# Encontrando melhore parâmetros sobre os dados de teste

# Grau do polinômio
d = np.arange( 1, 4)
a = np.arange( 1, 20)
rmse_list = []
max_r2 = 0
min_mse = float('inf')
min_rmse = float('inf')
min_mae = float('inf')
min_mape = float('inf')

for i in d:
    for alpha in a:
        # Define Polynomial Ridge
        poly = pp.PolynomialFeatures( degree=i )
        X_poly_train = poly.fit_transform( x_train )
        X_poly_val = poly.transform( x_val )
        X_poly_test = poly.transform( x_test )
        
        # Concatenando os dados de treino e validação
        X_train_val = np.concatenate((X_poly_train, X_poly_val))
        y_train_val = np.concatenate((y_train, y_val))

        # training
        model = lm.Ridge(alpha=alpha, max_iter=1000)
        model.fit( X_train_val, y_train_val )

        # performance
        y_pred_test = model.predict( X_poly_test )

        # R squared
        r_squared = np.round( mt.r2_score( y_test, y_pred_test ), 3 )
        if r_squared > max_r2:
            max_r2 = r_squared
        # MSE
        mse = np.round(mt.mean_squared_error( y_test, y_pred_test), 3)
        if mse < min_mse:
            min_mse = mse
        # RMSE
        rmse = np.round(np.sqrt( mse ), 3)
        rmse_list.append( rmse )
        if rmse < min_rmse:
            min_rmse = rmse

        # MAE
        mae = np.round(mt.mean_absolute_error( y_test, y_pred_test), 3)
        if mae < min_mae:
            min_mae = mae
        # MAPE
        mape = np.round(mt.mean_absolute_percentage_error( y_test, y_pred_test), 3) 
        if mape < min_mape:
            min_mape = mape
    
print('degree {} | alhpa {} | MAX R2: {} | MIN MSE: {} | MIN RMSE: {} | MIN MAE: {} | MIN MAPE: {} |'.format(i, alpha, max_r2, min_mse, min_rmse, min_mae, min_mape ) )

degree 3 | alhpa 19 | MAX R2: 0.09 | MIN MSE: 442.967 | MIN RMSE: 21.047 | MIN MAE: 16.706 | MIN MAPE: 8.147 |
