# Ensaio de Machine Learning - Regressão

## 0. Bibliotecas e _Helper Functions_

In [25]:
import warnings
import numpy  as np
import pandas as pd
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet, RANSACRegressor
from sklearn.tree         import DecisionTreeRegressor
from sklearn.ensemble     import RandomForestRegressor
from sklearn.preprocessing import PolynomialFeatures, MinMaxScaler
from sklearn.metrics       import r2_score, mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline      import Pipeline

In [26]:
def personal_settings():
    pd.options.display.max_columns = None
    pd.options.display.max_rows = None
    pd.set_option('display.float_format', lambda x:'%.2f' % x)
    warnings.filterwarnings('ignore')


def model_avaliation(model_name, y_true, y_pred):
    r2 = r2_score( y_true, y_pred )
    mse = mean_squared_error( y_true, y_pred )
    rmse = mean_squared_error( y_true, y_pred, squared=False )
    mae = mean_absolute_error( y_true, y_pred )
    mape = mean_absolute_percentage_error( y_true, y_pred )
    
    return pd.DataFrame( {'Model': model_name,
                         'R²': r2,
                         'MSE': mse,
                         'RMSE': rmse,
                         'MAE': mae,
                         'MAPE': mape}, index=[0] )

personal_settings()

## 1. Carregando Dados

### Treinamento

In [27]:
X_train = pd.read_csv( 'data/X_training.csv', low_memory=False )
print( X_train.shape )
display( X_train.sample( 5 ) )

(10547, 13)


Unnamed: 0,song_duration_ms,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,audio_mode,speechiness,tempo,time_signature,audio_valence
6036,-1.27,0.8,0.54,0.48,0.03,0.73,0.31,-0.43,1,0.06,1.62,0.8,0.86
1593,0.01,0.44,0.66,0.56,0.0,0.0,0.08,-0.0,1,0.26,-0.18,0.8,0.32
5477,0.46,0.53,0.63,0.43,0.0,0.36,0.07,-0.12,1,0.06,-1.57,0.8,0.25
6161,-0.65,0.83,0.47,0.23,0.01,0.27,0.11,-1.32,1,0.03,-2.14,0.8,0.36
1498,-0.44,0.15,0.62,0.76,0.01,0.73,0.21,0.64,0,0.13,-1.13,0.8,0.72


In [28]:
y_train = pd.read_csv( 'data/y_training.csv', low_memory=False )
y_train = y_train.values.ravel()
print( y_train.shape )

(10547,)


### Validação

In [29]:
X_val = pd.read_csv( 'data/X_validation.csv', low_memory=False )
print( X_val.shape )

(4521, 13)


In [30]:
y_val = pd.read_csv( 'data/y_val.csv', low_memory=False )
y_val = y_val.values.ravel()
print( y_val.shape )

(4521,)


### Teste

In [31]:
X_test = pd.read_csv( 'data/X_test.csv', low_memory=False )
print( X_test.shape )

(3767, 13)


In [32]:
y_test = pd.read_csv( 'data/y_test.csv', low_memory=False )
y_test = y_test.values.ravel()
print( y_test.shape )

(3767,)


## 2. Treinamento dos Algoritmos

### 2.1 Sem Alteração nos Parâmetros e Treino + Teste

#### Linear Regression

In [33]:
lr_model = LinearRegression()
lr_model_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('Linear Regression', lr_model)])

lr_model_pipeline.fit( X_train, y_train )

y_pred_lr = lr_model_pipeline.predict( X_test )

#### Decision Tree

In [34]:
tree_model = DecisionTreeRegressor( random_state=42 )
tree_model_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('Decision Tree', tree_model)])

tree_model_pipeline.fit( X_train, y_train )

y_pred_tree = tree_model_pipeline.predict( X_test )

#### Random Forest

In [35]:
rf_model = RandomForestRegressor( random_state=42 )
rf_model_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('Random Forest', rf_model)])

rf_model_pipeline.fit( X_train, y_train )

y_pred_rf = rf_model_pipeline.predict( X_test )

#### Polynomial

In [36]:
poly = PolynomialFeatures()
X_test_poly  = poly.fit_transform( X_test )
X_train_poly = poly.fit_transform( X_train )

In [37]:
poly_model = LinearRegression()
poly_model.fit( X_train_poly, y_train )

y_pred_poly = poly_model.predict( X_test_poly )

#### RANSAC

In [38]:
ransac_model = RANSACRegressor( random_state=42 )
ransac_model_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('RANSAC', ransac_model)])

ransac_model_pipeline.fit( X_train, y_train )

y_pred_ransac = ransac_model_pipeline.predict( X_test )

#### Linear Regression - Lasso

In [39]:
lr_lasso_model = Lasso()
lr_lasso_model_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('LR Lasso', lr_lasso_model)])

lr_lasso_model_pipeline.fit( X_train, y_train )

y_pred_lr_lasso = lr_lasso_model_pipeline.predict( X_test )

#### Linear Regression - Ridge

In [40]:
lr_ridge_model = Ridge()
lr_ridge_model_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('LR Ridge', lr_ridge_model)])

lr_ridge_model_pipeline.fit( X_train, y_train )

y_pred_lr_ridge = lr_ridge_model_pipeline.predict( X_test )

#### Linear Regression - Elastic Net

In [41]:
lr_elastic_model = ElasticNet()
lr_elastic_model_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('LR Elastic', lr_elastic_model)])

lr_elastic_model_pipeline.fit( X_train, y_train )

y_pred_lr_elastic = lr_elastic_model_pipeline.predict( X_test )

#### Polynomial Regression - Lasso

In [42]:
poly_lasso_model = Lasso()
poly_lasso_model.fit( X_train_poly, y_train )

y_pred_poly_lasso = poly_lasso_model.predict( X_test_poly )

#### Polynomial Regression - Ridge

In [43]:
poly_ridge_model = Ridge()
poly_ridge_model.fit( X_train_poly, y_train )

y_pred_poly_ridge = poly_ridge_model.predict( X_test_poly )

#### Polynomial Regression - Elastic Net

In [44]:
poly_elastic_model = ElasticNet()
poly_elastic_model.fit( X_train_poly, y_train )

y_pred_poly_elastic = poly_elastic_model.predict( X_test_poly )

#### Performance

In [45]:
lr_metrics = model_avaliation( 'Linear Regression', y_test, y_pred_lr )
tree_metrics = model_avaliation( 'Decision Tree', y_test, y_pred_tree )
rf_metrics = model_avaliation( 'Random Forest', y_test, y_pred_rf )
poly_metrics = model_avaliation( 'Polynomial Regression', y_test, y_pred_poly )
ransac_metrics = model_avaliation( 'RANSAC', y_test, y_pred_ransac )
lr_lasso_metrics = model_avaliation( 'LR - Lasso', y_test, y_pred_lr_lasso )
lr_ridge_metrics = model_avaliation( 'LR - Ridge', y_test, y_pred_lr_ridge )
lr_elastic_metrics = model_avaliation( 'LR - Elastic', y_test, y_pred_lr_elastic )
poly_lasso_metrics = model_avaliation( 'Polynomial - Lasso', y_test, y_pred_poly_lasso )
poly_ridge_metrics = model_avaliation( 'Polynomial - Ridge', y_test, y_pred_poly_ridge )
poly_elastic_metrics = model_avaliation( 'Polynomial - Elastic', y_test, y_pred_poly_elastic)


metrics = pd.concat([lr_metrics, tree_metrics, rf_metrics, poly_metrics, ransac_metrics,
                     lr_lasso_metrics, lr_ridge_metrics, lr_elastic_metrics,
                     poly_lasso_metrics, poly_ridge_metrics, poly_elastic_metrics])

display( metrics.sort_values( by='R²', ascending=False ) )

Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,Random Forest,0.35,315.85,17.77,13.05,6.51
0,Polynomial Regression,0.09,443.04,21.05,16.72,8.24
0,Polynomial - Ridge,0.09,443.49,21.06,16.73,8.29
0,Linear Regression,0.05,461.43,21.48,17.13,8.52
0,LR - Ridge,0.05,461.45,21.48,17.13,8.52
0,Polynomial - Elastic,0.01,481.7,21.95,17.43,8.75
0,Polynomial - Lasso,0.01,482.82,21.97,17.46,8.76
0,LR - Elastic,0.0,486.61,22.06,17.54,8.71
0,LR - Lasso,-0.0,486.96,22.07,17.55,8.71
0,Decision Tree,-0.24,603.81,24.57,16.87,6.33


### 2.2 Buscando os Melhores Parâmetros e Treino + Validação + Teste

#### Decision Tree

In [46]:
tree_model = DecisionTreeRegressor( max_depth=5, min_samples_leaf=50, random_state=42 )

params = {"max_depth": [2, 5, 10, 20, 50], 
          "min_samples_leaf": [25, 50, 100, 200, 500, 800]}

tree_grid = GridSearchCV(tree_model, params, cv=3, verbose=0, scoring='r2')

tree_grid.fit(X_train, y_train)

In [47]:
y_pred_tree_grid = tree_grid.predict( X_val )

tree_grid_metrics = model_avaliation( 'Decision Tree - Grid Search', y_val, y_pred_tree_grid )

print( f"Decision Tree - Best Parameters: {tree_grid.best_params_}" )
display( tree_grid_metrics )

Decision Tree - Best Parameters: {'max_depth': 10, 'min_samples_leaf': 100}


Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,Decision Tree - Grid Search,0.08,441.14,21.0,16.75,8.35


#### Random Forest

In [48]:
rf_model = RandomForestRegressor( n_estimators=300, max_depth=5, min_samples_leaf=50, random_state=42 )

params = {"n_estimators": [50, 100, 200, 300, 500], 
          "min_samples_leaf": [5, 10, 20, 50, 100]}

rf_grid = GridSearchCV(rf_model, params, cv=3, verbose=0, scoring='r2')

rf_grid.fit(X_train, y_train)

In [49]:
y_pred_rf_grid = rf_grid.predict( X_val )

rf_grid_metrics = model_avaliation( 'Random Forest - Grid Search', y_val, y_pred_rf_grid )

print( f"Random Forest - Best Parameters: {rf_grid.best_params_}" )
display( rf_grid_metrics )

Random Forest - Best Parameters: {'min_samples_leaf': 5, 'n_estimators': 50}


Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,Random Forest - Grid Search,0.1,430.8,20.76,16.51,8.41


#### Polynomial Regression

In [50]:
degrees = np.arange(1, 4, 1)
poly_tunning_metrics = pd.DataFrame()

for d in degrees:
    poly = PolynomialFeatures( degree = d )
    X_val_poly   = poly.fit_transform( X_val )
    X_train_poly = poly.fit_transform( X_train )

    model = LinearRegression()
    model.fit( X_train_poly, y_train )

    y_pred = model.predict( X_val_poly ) 

    text = f"Polynomial - Degree {d}"

    loop_metrics = model_avaliation( text, y_val, y_pred )

    poly_tunning_metrics = pd.concat( [poly_tunning_metrics, loop_metrics] )

In [51]:
poly_tunning_metrics = poly_tunning_metrics.sort_values( by='R²', ascending=False )
display( poly_tunning_metrics )
poly_grid_metrics = poly_tunning_metrics.head( 1 )

Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,Polynomial - Degree 2,0.07,445.77,21.11,16.75,8.55
0,Polynomial - Degree 1,0.04,458.45,21.41,17.04,8.68
0,Polynomial - Degree 3,-0.05,500.33,22.37,17.09,8.68


#### RANSAC

In [52]:
ransac_model = RANSACRegressor( max_trials=100, min_samples=50, residual_threshold=5.0, random_state=42 )

params = {"max_trials": [50, 100, 200], 
          "min_samples": [50, 100, 200],
          "residual_threshold": [2.5, 5.0]}

ransac_grid = GridSearchCV(ransac_model, params, cv=3, verbose=0, scoring='r2')

ransac_grid.fit(X_train, y_train)

In [53]:
y_pred_ransac_grid = ransac_grid.predict( X_val )

ransac_grid_metrics = model_avaliation( 'RANSAC - Grid Search', y_val, y_pred_ransac_grid )

print( f"Random Forest - Best Parameters: {ransac_grid.best_params_}" )
display( ransac_grid_metrics )

Random Forest - Best Parameters: {'max_trials': 100, 'min_samples': 200, 'residual_threshold': 2.5}


Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,RANSAC - Grid Search,0.0,477.12,21.84,17.19,8.92


#### Linear Regression - Lasso

In [54]:
lr_lasso_model = Lasso( alpha=1, max_iter=1000 )

params = {"alpha": [0.5, 1, 5, 10, 20, 50], 
          "max_iter": [500, 1000, 2000, 5000]}

lr_lasso_grid = GridSearchCV(lr_lasso_model, params, cv=3, verbose=0, scoring='r2')

lr_lasso_grid.fit(X_train, y_train)

In [55]:
y_pred_lr_lasso_grid = lr_lasso_grid.predict( X_val )

lr_lasso_grid_metrics = model_avaliation( 'LR Lasso - Grid Search', y_val, y_pred_lr_lasso_grid )

print( f"LR Lasso - Best Parameters: {lr_lasso_grid.best_params_}" )
display( lr_lasso_grid_metrics )

LR Lasso - Best Parameters: {'alpha': 0.5, 'max_iter': 500}


Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,LR Lasso - Grid Search,0.01,472.56,21.74,17.24,8.7


#### Linear Regression - Ridge

In [56]:
lr_ridge_model = Ridge( alpha=1, max_iter=1000 )

params = {"alpha": [0.5, 1, 5, 10, 20, 50], 
          "max_iter": [500, 1000, 2000, 5000]}

lr_ridge_grid = GridSearchCV(lr_ridge_model, params, cv=3, verbose=0, scoring='r2')

lr_ridge_grid.fit(X_train, y_train)

In [57]:
y_pred_lr_ridge_grid = lr_ridge_grid.predict( X_val )

lr_ridge_grid_metrics = model_avaliation( 'LR Ridge - Grid Search', y_val, y_pred_lr_ridge_grid )

print( f"LR Ridge - Best Parameters: {lr_ridge_grid.best_params_}" )
display( lr_ridge_grid_metrics )

LR Ridge - Best Parameters: {'alpha': 10, 'max_iter': 500}


Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,LR Ridge - Grid Search,0.04,458.44,21.41,17.04,8.68


#### Linear Regression - Elastic Net

In [58]:
lr_elastic_model = ElasticNet( alpha=1, max_iter=1000, l1_ratio=0.5 )

params = {"alpha": [0.5, 1, 5, 10, 20, 50], 
          "max_iter": [500, 1000, 2000, 5000],
          "l1_ratio": [0.25, 0.5, 0.75]}

lr_elastic_grid = GridSearchCV(lr_elastic_model, params, cv=3, verbose=0, scoring='r2')

lr_elastic_grid.fit(X_train, y_train)

In [59]:
y_pred_lr_elastic_grid = lr_elastic_grid.predict( X_val )

lr_elastic_grid_metrics = model_avaliation( 'LR Elastic - Grid Search', y_val, y_pred_lr_elastic_grid )

print( f"LR Elastic - Best Parameters: {lr_elastic_grid.best_params_}" )
display( lr_elastic_grid_metrics )

LR Elastic - Best Parameters: {'alpha': 0.5, 'l1_ratio': 0.25, 'max_iter': 500}


Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,LR Elastic - Grid Search,0.01,471.08,21.7,17.22,8.69


#### Polynomial Regression - Lasso

In [60]:
degrees = np.arange(1, 4, 1)
poly_lasso_tunning_metrics = pd.DataFrame()

for d in degrees:
    poly = PolynomialFeatures( degree = d )
    X_val_poly   = poly.fit_transform( X_val )
    X_train_poly = poly.fit_transform( X_train )

    model = Lasso( alpha=1, max_iter=1000 )

    params = {"alpha": [0.5, 1, 5, 10, 20, 50], 
          "max_iter": [500, 1000, 2000, 5000]}
    
    model_grid = GridSearchCV(model, params, cv=3, verbose=0, scoring='r2')

    model_grid.fit(X_train_poly, y_train)

    y_pred = model_grid.predict( X_val_poly ) 

    text = f"Polynomial Lasso - Degree {d}"

    loop_metrics = model_avaliation( text, y_val, y_pred )

    print( f"Degree {d} - Best Parameters: {model_grid.best_params_}" )

    poly_lasso_tunning_metrics = pd.concat( [poly_lasso_tunning_metrics, loop_metrics] )

Degree 1 - Best Parameters: {'alpha': 0.5, 'max_iter': 500}
Degree 2 - Best Parameters: {'alpha': 0.5, 'max_iter': 500}
Degree 3 - Best Parameters: {'alpha': 1, 'max_iter': 500}


In [61]:
poly_lasso_tunning_metrics = poly_lasso_tunning_metrics.sort_values( by='R²', ascending=False )
display( poly_lasso_tunning_metrics )
poly_lasso_grid_metrics = poly_lasso_tunning_metrics.head( 1 )

Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,Polynomial Lasso - Degree 2,0.02,469.11,21.66,17.15,8.68
0,Polynomial Lasso - Degree 3,0.01,470.76,21.7,17.18,8.66
0,Polynomial Lasso - Degree 1,0.01,472.56,21.74,17.24,8.7


#### Polynomial Regression - Ridge

In [62]:
degrees = np.arange(1, 4, 1)
poly_ridge_tunning_metrics = pd.DataFrame()

for d in degrees:
    poly = PolynomialFeatures( degree = d )
    X_val_poly   = poly.fit_transform( X_val )
    X_train_poly = poly.fit_transform( X_train )

    model = Ridge( alpha=1, max_iter=1000 )

    params = {"alpha": [0.5, 1, 5, 10, 20, 50], 
          "max_iter": [500, 1000, 2000, 5000]}
    
    model_grid = GridSearchCV(model, params, cv=3, verbose=0, scoring='r2')

    model_grid.fit(X_train_poly, y_train)

    y_pred = model_grid.predict( X_val_poly ) 

    text = f"Polynomial Ridge - Degree {d}"

    loop_metrics = model_avaliation( text, y_val, y_pred )

    print( f"Degree {d} - Best Parameters: {model_grid.best_params_}" )

    poly_ridge_tunning_metrics = pd.concat( [poly_ridge_tunning_metrics, loop_metrics] )

Degree 1 - Best Parameters: {'alpha': 10, 'max_iter': 500}
Degree 2 - Best Parameters: {'alpha': 5, 'max_iter': 500}
Degree 3 - Best Parameters: {'alpha': 50, 'max_iter': 500}


In [63]:
poly_ridge_tunning_metrics = poly_ridge_tunning_metrics.sort_values( by='R²', ascending=False )
display( poly_ridge_tunning_metrics )
poly_ridge_grid_metrics = poly_ridge_tunning_metrics.head( 1 )

Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,Polynomial Ridge - Degree 2,0.07,445.24,21.1,16.74,8.59
0,Polynomial Ridge - Degree 1,0.04,458.44,21.41,17.04,8.68
0,Polynomial Ridge - Degree 3,-0.01,482.72,21.97,16.78,8.55


#### Polynomial Regression - Elastic Net

In [64]:
degrees = np.arange(1, 4, 1)
poly_elastic_tunning_metrics = pd.DataFrame()

for d in degrees:
    poly = PolynomialFeatures( degree = d )
    X_val_poly   = poly.fit_transform( X_val )
    X_train_poly = poly.fit_transform( X_train )

    model = ElasticNet( alpha=1, max_iter=1000, l1_ratio=0.5 )

    params = {"alpha": [0.5, 1, 5, 10, 20, 50], 
          "max_iter": [500, 1000, 2000, 5000],
          "l1_ratio": [0.25, 0.5, 0.75]}
    
    model_grid = GridSearchCV(model, params, cv=3, verbose=0, scoring='r2')

    model_grid.fit(X_train_poly, y_train)

    y_pred = model_grid.predict( X_val_poly ) 

    text = f"Polynomial Elastic - Degree {d}"

    loop_metrics = model_avaliation( text, y_val, y_pred )

    print( f"Degree {d} - Best Parameters: {model_grid.best_params_}" )

    poly_elastic_tunning_metrics = pd.concat( [poly_elastic_tunning_metrics, loop_metrics] )

Degree 1 - Best Parameters: {'alpha': 0.5, 'l1_ratio': 0.25, 'max_iter': 500}
Degree 2 - Best Parameters: {'alpha': 0.5, 'l1_ratio': 0.25, 'max_iter': 500}
Degree 3 - Best Parameters: {'alpha': 1, 'l1_ratio': 0.75, 'max_iter': 500}


In [65]:
poly_elastic_tunning_metrics = poly_elastic_tunning_metrics.sort_values( by='R²', ascending=False )
display( poly_elastic_tunning_metrics )
poly_elastic_grid_metrics = poly_elastic_tunning_metrics.head( 1 )

Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,Polynomial Elastic - Degree 2,0.03,464.81,21.56,17.07,8.66
0,Polynomial Elastic - Degree 3,0.01,470.74,21.7,17.18,8.66
0,Polynomial Elastic - Degree 1,0.01,471.08,21.7,17.22,8.69


#### Performance

In [66]:
grid_metrics = pd.concat([tree_grid_metrics, rf_grid_metrics, poly_grid_metrics, ransac_grid_metrics,
                          lr_lasso_grid_metrics, lr_ridge_grid_metrics, lr_elastic_grid_metrics,
                          poly_lasso_grid_metrics, poly_ridge_grid_metrics, poly_elastic_grid_metrics])

display( grid_metrics.sort_values( by='R²', ascending=False ) )

Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,Random Forest - Grid Search,0.1,430.8,20.76,16.51,8.41
0,Decision Tree - Grid Search,0.08,441.14,21.0,16.75,8.35
0,Polynomial Ridge - Degree 2,0.07,445.24,21.1,16.74,8.59
0,Polynomial - Degree 2,0.07,445.77,21.11,16.75,8.55
0,LR Ridge - Grid Search,0.04,458.44,21.41,17.04,8.68
0,Polynomial Elastic - Degree 2,0.03,464.81,21.56,17.07,8.66
0,Polynomial Lasso - Degree 2,0.02,469.11,21.66,17.15,8.68
0,LR Elastic - Grid Search,0.01,471.08,21.7,17.22,8.69
0,LR Lasso - Grid Search,0.01,472.56,21.74,17.24,8.7
0,RANSAC - Grid Search,0.0,477.12,21.84,17.19,8.92


In [67]:
join_metrics = pd.concat( [metrics, grid_metrics] )

display( join_metrics.sort_values( by='R²', ascending=False ) )

Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,Random Forest,0.35,315.85,17.77,13.05,6.51
0,Random Forest - Grid Search,0.1,430.8,20.76,16.51,8.41
0,Polynomial Regression,0.09,443.04,21.05,16.72,8.24
0,Polynomial - Ridge,0.09,443.49,21.06,16.73,8.29
0,Decision Tree - Grid Search,0.08,441.14,21.0,16.75,8.35
0,Polynomial Ridge - Degree 2,0.07,445.24,21.1,16.74,8.59
0,Polynomial - Degree 2,0.07,445.77,21.11,16.75,8.55
0,Linear Regression,0.05,461.43,21.48,17.13,8.52
0,LR - Ridge,0.05,461.45,21.48,17.13,8.52
0,LR Ridge - Grid Search,0.04,458.44,21.41,17.04,8.68


### 2.3 Performance com os Dados de Teste + Melhores Parâmetros

In [93]:
# Juntando os Dados de Treino + Validação
X_final = np.concatenate( ( X_train, X_val ))
y_final = np.concatenate( ( y_train, y_val ))

#### Linear Regression

In [94]:
final_lr = LinearRegression()
final_lr_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('Linear Regression', final_lr)])

final_lr_pipeline.fit( X_final, y_final )

y_pred_lr_final = final_lr_pipeline.predict( X_test )

#### Decision Tree

In [95]:
final_tree = DecisionTreeRegressor( max_depth=10, min_samples_leaf=100, random_state=42 )
final_tree_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('Decision Tree', final_tree)])

final_tree_pipeline.fit( X_final, y_final )

y_pred_tree_final = final_tree_pipeline.predict( X_test )

#### Random Forest

In [96]:
final_rf = RandomForestRegressor( min_samples_leaf=5, n_estimators=50, random_state=42 )
final_rf_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('Random Forest', final_rf)])

final_rf_pipeline.fit( X_final, y_final )

y_pred_rf_final = final_rf_pipeline.predict( X_test )

#### Polynomial

In [97]:
poly = PolynomialFeatures( degree=2 )
X_test_poly  = poly.fit_transform( X_test )
X_final_poly = poly.fit_transform( X_final )

In [98]:
final_poly = LinearRegression()
final_poly.fit( X_final_poly, y_final )

y_pred_poly_final = final_poly.predict( X_test_poly )

#### RANSAC

In [99]:
final_ransac = RANSACRegressor( max_trials=100, min_samples=200, residual_threshold=2.5, random_state=42 )
final_ransac_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('RANSAC', final_ransac)])

final_ransac_pipeline.fit( X_final, y_final )

y_pred_ransac_final = final_ransac_pipeline.predict( X_test )

#### Linear Regression - Lasso

In [100]:
final_lr_lasso = Lasso( alpha=0.5, max_iter=500 )
final_lr_lasso_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('LR Lasso', final_lr_lasso)])

final_lr_lasso_pipeline.fit( X_final, y_final )

y_pred_lr_lasso_final = final_lr_lasso_pipeline.predict( X_test )

#### Linear Regression - Ridge

In [101]:
final_lr_ridge = Ridge( alpha=10, max_iter=500 )
final_lr_ridge_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('LR Ridge', final_lr_ridge)])

final_lr_ridge_pipeline.fit( X_final, y_final )

y_pred_lr_ridge_final = final_lr_ridge_pipeline.predict( X_test )

#### Linear Regression - Elastic Net

In [102]:
final_lr_elastic = ElasticNet( alpha=0.5, l1_ratio=0.25, max_iter=500 )
final_lr_elastic_pipeline = Pipeline([('MinMaxScaler', MinMaxScaler()), ('LR Elastic', final_lr_elastic)])

final_lr_elastic_pipeline.fit( X_final, y_final )

y_pred_lr_elastic_final = final_lr_elastic_pipeline.predict( X_test )

#### Polynomial Regression - Lasso

In [103]:
poly = PolynomialFeatures( degree=2 )
X_test_poly  = poly.fit_transform( X_test )
X_final_poly = poly.fit_transform( X_final )

In [104]:
final_poly_lasso = Lasso( alpha=0.5, max_iter=500 )
final_poly_lasso.fit( X_final_poly, y_final )

y_pred_poly_lasso_final = final_poly_lasso.predict( X_test_poly )

#### Polynomial Regression - Ridge

In [105]:
poly = PolynomialFeatures( degree=2 )
X_test_poly  = poly.fit_transform( X_test )
X_final_poly = poly.fit_transform( X_final )

In [106]:
final_poly_ridge = Ridge( alpha=5, max_iter=500 )
final_poly_ridge.fit( X_final_poly, y_final )

y_pred_poly_ridge_final = final_poly_ridge.predict( X_test_poly )

#### Polynomial Regression - Elastic Net

In [107]:
poly = PolynomialFeatures( degree=2 )
X_test_poly  = poly.fit_transform( X_test )
X_final_poly = poly.fit_transform( X_final )

In [108]:
final_poly_elastic = ElasticNet( alpha=0.5, l1_ratio=0.25, max_iter=500 )
final_poly_elastic.fit( X_final_poly, y_final )

y_pred_poly_elastic_final = final_poly_elastic.predict( X_test_poly )

#### Performance

In [109]:
final_lr_metrics = model_avaliation( 'Linear Regression - Final', y_test, y_pred_lr_final )
final_tree_metrics = model_avaliation( 'Decision Tree - Final', y_test, y_pred_tree_final )
final_rf_metrics = model_avaliation( 'Random Forest - Final', y_test, y_pred_rf_final )
final_poly_metrics = model_avaliation( 'Polynomial Regression', y_test, y_pred_poly_final )
final_ransac_metrics = model_avaliation( 'RANSAC - Final', y_test, y_pred_ransac_final )
final_lr_lasso_metrics = model_avaliation(' Linear Regression Lasso - Final', y_test, y_pred_lr_lasso_final )
final_lr_ridge_metrics = model_avaliation( 'Linear Regression Ridge - Final', y_test, y_pred_lr_ridge_final )
final_lr_elastic_metrics = model_avaliation( 'Linear Regression Elastic', y_test, y_pred_lr_elastic_final )
final_poly_lasso_metrics = model_avaliation( 'Polynomial Lasso', y_test, y_pred_poly_lasso_final )
final_poly_ridge_metrics = model_avaliation( 'Polynomial Ridge - Final', y_test, y_pred_poly_ridge_final )
final_poly_elastic_metrics = model_avaliation( 'Polynomial Elastic - Final', y_test, y_pred_poly_elastic_final )

In [110]:
final_metrics = pd.concat( [final_lr_metrics, final_tree_metrics, final_rf_metrics, final_poly_metrics,
                           final_ransac_metrics, final_lr_lasso_metrics, final_lr_ridge_metrics, 
                           final_lr_elastic_metrics, final_poly_lasso_metrics, final_poly_ridge_metrics,
                           final_poly_elastic_metrics] )
display( final_metrics.sort_values( by='R²', ascending=False) )

Unnamed: 0,Model,R²,MSE,RMSE,MAE,MAPE
0,Random Forest - Final,0.36,313.02,17.69,13.47,6.86
0,Polynomial Regression,0.09,442.64,21.04,16.74,8.28
0,Decision Tree - Final,0.09,442.84,21.04,16.79,8.13
0,Polynomial Ridge - Final,0.09,443.96,21.07,16.76,8.34
0,Linear Regression - Final,0.05,461.99,21.49,17.14,8.53
0,Linear Regression Ridge - Final,0.05,462.15,21.5,17.15,8.53
0,Polynomial Elastic - Final,0.03,474.63,21.79,17.29,8.7
0,Polynomial Lasso,0.02,479.06,21.89,17.39,8.75
0,Linear Regression Lasso - Final,0.01,483.88,22.0,17.48,8.68
0,Linear Regression Elastic,0.01,484.09,22.0,17.49,8.7
