## 0.0 Impots

In [1]:
import json
import os
import random

import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import (
    ElasticNet,
    Lasso,
    LinearRegression,
    Ridge,
)
from sklearn.metrics import (
    mean_absolute_error,
    mean_absolute_percentage_error,
    mean_squared_error,
    r2_score,
    root_mean_squared_error,
)
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor

## 0.1 Loading Dataset

In [2]:
path_home = os.path.dirname(os.getcwd())
# Dados de Treino
path_X_traning = os.path.join(path_home, 'data', 'X_training.csv')
path_y_traning = os.path.join(path_home, 'data', 'y_training.csv')

x_training  = pd.read_csv(path_X_traning)
y_training = pd.read_csv(path_y_traning)

# Dados de Validação
path_X_validation = os.path.join(path_home, 'data', 'X_validation.csv')
path_y_validation = os.path.join(path_home, 'data', 'y_validation.csv')

x_validation  = pd.read_csv(path_X_validation)
y_validation = pd.read_csv(path_y_validation)

# Dados de Teste
path_X_test = os.path.join(path_home, 'data', 'X_test.csv')
path_y_test = os.path.join(path_home, 'data', 'y_test.csv')

x_test  = pd.read_csv(path_X_test)
y_test = pd.read_csv(path_y_test)

## 0.2 Helpe Function

In [3]:
def get_metrics(y_data, yhat_model):
    metrics = {
        'R2': r2_score(y_data, yhat_model),
        'MSE': mean_squared_error(y_data, yhat_model),
        'RMSE': root_mean_squared_error(y_data, yhat_model),
        'MAE': mean_absolute_error(y_data, yhat_model),
        'MAPE': mean_absolute_percentage_error(y_data, yhat_model),

    }
    return metrics
    
def classifier_evaluation(model_classifier, param, data):
    data_mapping = {
        'training': (x_training, y_training),
        'validation': (x_training, y_training),
        'test': (
                    pd.concat([x_training, x_validation]).reset_index(drop=True),
                    pd.concat([y_training, y_validation]).reset_index(drop=True),
                )
    }

    x_data, y_data = data_mapping.get(data, (None, None))

    if x_data is None or y_data is None:
        raise ValueError("Invalid data type provided. Choose from 'training', 'validation', or 'test'.")

    model = model_classifier(**param)
    model.fit(x_data, y_data.values.ravel())

    predict_mapping = {
        'training': x_data,
        'validation': x_validation,
        'test': x_test
    }

    yhat_model = model.predict(predict_mapping[data])

    metrics_mapping = {
        'training': y_training,
        'validation': y_validation,
        'test': y_test
    }

    metrics = get_metrics(metrics_mapping[data], yhat_model)

    result = create_result(model, param, metrics)

    return result

def classifier_evaluation_poli(model_classifier, param, dataset, data):
    x_data, y_data = dataset['x'], dataset['y']

    model = model_classifier(**param)
    model.fit(x_data, y_data.values.ravel()) 

    predict_mapping = {
        'training': x_data,
        'validation': dataset.get('x_validation', None),
        'test': dataset.get('x_test',None)
    }

    yhat_model = model.predict(predict_mapping[data])
    
    metrics_mapping = {
        'training': y_training,
        'validation': dataset.get('y_validation', None),
        'test': dataset.get('y_test',None)
    }
    
    metrics = get_metrics(metrics_mapping[data], yhat_model)
    result = create_result(model, param, metrics)
    
    return result

def create_result(model, param, metrics):
    metric = pd.DataFrame({
        'name': model.__class__.__name__,
        **metrics,
        'param': json.dumps(param)
    }, index=[0])

    return metric

# 1.0 Training

In [4]:
data = 'training'

## 1.1 Linear Regresson

In [5]:
resul_lr = pd.DataFrame()
model = LinearRegression()
param = {
        'fit_intercept': True
    }

result = classifier_evaluation(model, param, data)

resul_lr = pd.concat([resul_lr,result]).reset_index(drop=True)

In [6]:
resul_lr.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,LinearRegression,0.046058,455.996112,21.354065,16.998249,8.653186,"{""fit_intercept"": true}"


## 1.2 Linear Regresson Lasso

In [7]:
resul_ls = pd.DataFrame()
model = Lasso
para = {
        'alpha': [1, 2, 3],
        'max_iter': [1000, 1500, 2000]
    }

for i in range(2, 20, 1):
    param = {
        'alpha': random.choice(para['alpha']),
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation(model, param, data)

    resul_ls = pd.concat([resul_ls,result]).reset_index(drop=True)

In [8]:
resul_ls.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
3,Lasso,0.007401,474.474834,21.782443,17.305484,8.736697,"{""alpha"": 1, ""max_iter"": 1500}"
5,Lasso,0.007401,474.474834,21.782443,17.305484,8.736697,"{""alpha"": 1, ""max_iter"": 1000}"
6,Lasso,0.007401,474.474834,21.782443,17.305484,8.736697,"{""alpha"": 1, ""max_iter"": 2000}"
9,Lasso,0.007401,474.474834,21.782443,17.305484,8.736697,"{""alpha"": 1, ""max_iter"": 1000}"
13,Lasso,0.007401,474.474834,21.782443,17.305484,8.736697,"{""alpha"": 1, ""max_iter"": 1000}"
12,Lasso,0.007401,474.474834,21.782443,17.305484,8.736697,"{""alpha"": 1, ""max_iter"": 2000}"
8,Lasso,0.001125,477.474834,21.851198,17.355395,8.741522,"{""alpha"": 2, ""max_iter"": 1000}"
1,Lasso,0.001125,477.474834,21.851198,17.355395,8.741522,"{""alpha"": 2, ""max_iter"": 1500}"
15,Lasso,0.001125,477.474834,21.851198,17.355395,8.741522,"{""alpha"": 2, ""max_iter"": 1000}"
4,Lasso,0.001125,477.474834,21.851198,17.355395,8.741522,"{""alpha"": 2, ""max_iter"": 1500}"


## 1.3 Linear Regresson Ridge

In [9]:
resul_lrr = pd.DataFrame()
model = Ridge
para = {
        'alpha': [1, 2, 3],
        'max_iter': [1000, 1500, 2000]
    }

for i in range(2, 20, 1):
    param = {
        'alpha': random.choice(para['alpha']),
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation(model, param, data)

    resul_lrr = pd.concat([resul_lrr,result]).reset_index(drop=True)

In [10]:
resul_lrr.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,Ridge,0.046058,455.996401,21.354072,16.998308,8.653415,"{""alpha"": 1, ""max_iter"": 1000}"
1,Ridge,0.046058,455.996401,21.354072,16.998308,8.653415,"{""alpha"": 1, ""max_iter"": 1500}"
15,Ridge,0.046058,455.996401,21.354072,16.998308,8.653415,"{""alpha"": 1, ""max_iter"": 1500}"
13,Ridge,0.046058,455.996401,21.354072,16.998308,8.653415,"{""alpha"": 1, ""max_iter"": 1000}"
10,Ridge,0.046058,455.996401,21.354072,16.998308,8.653415,"{""alpha"": 1, ""max_iter"": 1000}"
6,Ridge,0.046058,455.996401,21.354072,16.998308,8.653415,"{""alpha"": 1, ""max_iter"": 1500}"
7,Ridge,0.046058,455.996401,21.354072,16.998308,8.653415,"{""alpha"": 1, ""max_iter"": 1000}"
9,Ridge,0.046058,455.996401,21.354072,16.998308,8.653415,"{""alpha"": 1, ""max_iter"": 1000}"
12,Ridge,0.046056,455.997238,21.354092,16.998366,8.653638,"{""alpha"": 2, ""max_iter"": 2000}"
8,Ridge,0.046056,455.997238,21.354092,16.998366,8.653638,"{""alpha"": 2, ""max_iter"": 2000}"


## 1.4 Linear Regresson ElasticNet

In [11]:
resul_lre = pd.DataFrame()
model = ElasticNet
para = {
        'alpha': [1, 2, 3],
        'max_iter': [1000, 1500, 2000],
        'l1_ratio': [0.3, 0.5, 0.7]
    }

for i in range(2, 20, 1):
    param = {
        'alpha': random.choice(para['alpha']),
        'max_iter': random.choice(para['max_iter']),
        'l1_ratio': random.choice(para['l1_ratio'])
    }

    result = classifier_evaluation(model, param, data)

    resul_lre = pd.concat([resul_lre,result]).reset_index(drop=True)

In [12]:
resul_lre.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
3,ElasticNet,0.008744,473.833027,21.767706,17.29095,8.727685,"{""alpha"": 1, ""max_iter"": 1500, ""l1_ratio"": 0.3}"
7,ElasticNet,0.008744,473.833027,21.767706,17.29095,8.727685,"{""alpha"": 1, ""max_iter"": 1500, ""l1_ratio"": 0.3}"
10,ElasticNet,0.008744,473.833027,21.767706,17.29095,8.727685,"{""alpha"": 1, ""max_iter"": 1000, ""l1_ratio"": 0.3}"
15,ElasticNet,0.007832,474.268889,21.777715,17.299507,8.7323,"{""alpha"": 1, ""max_iter"": 2000, ""l1_ratio"": 0.5}"
2,ElasticNet,0.007832,474.268889,21.777715,17.299507,8.7323,"{""alpha"": 1, ""max_iter"": 1000, ""l1_ratio"": 0.5}"
14,ElasticNet,0.007832,474.268889,21.777715,17.299507,8.7323,"{""alpha"": 1, ""max_iter"": 1000, ""l1_ratio"": 0.5}"
6,ElasticNet,0.007832,474.268889,21.777715,17.299507,8.7323,"{""alpha"": 1, ""max_iter"": 1000, ""l1_ratio"": 0.5}"
8,ElasticNet,0.007388,474.480839,21.782581,17.304805,8.735937,"{""alpha"": 1, ""max_iter"": 1000, ""l1_ratio"": 0.7}"
12,ElasticNet,0.00494,475.651113,21.809427,17.321482,8.738012,"{""alpha"": 2, ""max_iter"": 1500, ""l1_ratio"": 0.3}"
17,ElasticNet,0.004368,475.924363,21.815691,17.32682,8.739289,"{""alpha"": 2, ""max_iter"": 2000, ""l1_ratio"": 0.5}"


## 1.5 Decision Tree

In [13]:
resul_dt = pd.DataFrame()
model = DecisionTreeRegressor
for i in range(2, 20, 1):
    param = {
            'max_depth': i
        }

    result = classifier_evaluation(model, param, data)

    resul_dt = pd.concat([resul_dt,result]).reset_index(drop=True)

In [14]:
resul_dt.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
17,DecisionTreeRegressor,0.904914,45.452203,6.741825,2.758908,0.548129,"{""max_depth"": 19}"
16,DecisionTreeRegressor,0.87278,60.812589,7.798243,3.525231,0.819045,"{""max_depth"": 18}"
15,DecisionTreeRegressor,0.83622,78.288988,8.848106,4.402138,1.131357,"{""max_depth"": 17}"
14,DecisionTreeRegressor,0.789535,100.605062,10.030207,5.440952,1.559575,"{""max_depth"": 16}"
13,DecisionTreeRegressor,0.738234,125.127579,11.186044,6.606331,2.01241,"{""max_depth"": 15}"
12,DecisionTreeRegressor,0.678037,153.902155,12.405731,7.863702,2.434356,"{""max_depth"": 14}"
11,DecisionTreeRegressor,0.608236,187.268192,13.684597,9.170869,2.928375,"{""max_depth"": 13}"
10,DecisionTreeRegressor,0.537269,221.191422,14.872506,10.471226,3.442608,"{""max_depth"": 12}"
9,DecisionTreeRegressor,0.459861,258.193172,16.06839,11.749243,4.169772,"{""max_depth"": 11}"
8,DecisionTreeRegressor,0.384624,294.157341,17.151016,12.925051,4.871411,"{""max_depth"": 10}"


## 1.6 Random Forest

In [15]:
resul_dt = pd.DataFrame()
model = RandomForestRegressor
para = {
        'n_estimators': [100, 200, 300],
    }
for i in range(2, 20, 1):
    param = {
            'max_depth': i,
            'n_estimators': random.choice(para['n_estimators'])
        }

    result = classifier_evaluation(model, param, data)

    resul_dt = pd.concat([resul_dt,result]).reset_index(drop=True)

In [16]:
resul_dt.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
17,RandomForestRegressor,0.875451,59.536012,7.715958,5.773215,2.816742,"{""max_depth"": 19, ""n_estimators"": 200}"
16,RandomForestRegressor,0.860795,66.541594,8.157303,6.179378,2.912545,"{""max_depth"": 18, ""n_estimators"": 300}"
15,RandomForestRegressor,0.844132,74.506945,8.631741,6.589878,3.114612,"{""max_depth"": 17, ""n_estimators"": 200}"
14,RandomForestRegressor,0.816056,87.927529,9.376968,7.224204,3.306687,"{""max_depth"": 16, ""n_estimators"": 300}"
13,RandomForestRegressor,0.782215,104.103737,10.203124,7.954441,3.509047,"{""max_depth"": 15, ""n_estimators"": 300}"
12,RandomForestRegressor,0.738703,124.903448,11.176021,8.761765,3.84766,"{""max_depth"": 14, ""n_estimators"": 300}"
11,RandomForestRegressor,0.682186,151.919275,12.325554,9.705756,4.257693,"{""max_depth"": 13, ""n_estimators"": 100}"
10,RandomForestRegressor,0.622428,180.484034,13.434435,10.636008,4.712799,"{""max_depth"": 12, ""n_estimators"": 300}"
9,RandomForestRegressor,0.546718,216.67432,14.719861,11.691909,5.19114,"{""max_depth"": 11, ""n_estimators"": 300}"
8,RandomForestRegressor,0.468267,254.175167,15.942872,12.671513,5.780141,"{""max_depth"": 10, ""n_estimators"": 200}"


## 1.7 Polynomial Features

In [17]:
resul_pf = pd.DataFrame()
model = LinearRegression

for i in range(2, 6, 1):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(x_training)
    data_poly = {'x': X_poly, 'y': y_training}

    param = {
        'fit_intercept': True,
    }

    result = classifier_evaluation_poli(model, param, data_poly, data)

    resul_pf = pd.concat([resul_pf,result]).reset_index(drop=True)

In [18]:
resul_pf.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
3,LinearRegression,0.7253,131.310015,11.459058,7.266166,2.215335,"{""fit_intercept"": true}"
2,LinearRegression,0.333957,318.377086,17.843124,13.614247,5.913391,"{""fit_intercept"": true}"
1,LinearRegression,0.154418,404.19895,20.1047,15.883592,7.800181,"{""fit_intercept"": true}"
0,LinearRegression,0.094195,432.98621,20.808321,16.458032,8.35054,"{""fit_intercept"": true}"


## 1.8 Polynomial Features Lasso

In [19]:
resul_pfl = pd.DataFrame()
model = Lasso
para = {
        'max_iter': [1000, 1500, 2000],
        'alpha': [1, 2, 3]
    }

for i in range(2, 6, 1):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(x_training)
    data_poly = {'x': X_poly, 'y': y_training}

    param = {
        'alpha': i,
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation_poli(model, param, data_poly, data)

    resul_pfl = pd.concat([resul_pfl, result]).reset_index(drop=True)

  model = cd_fast.enet_coordinate_descent(


In [20]:
resul_pfl.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
3,Lasso,0.002244,476.939911,21.838954,17.337843,8.643126,"{""alpha"": 5, ""max_iter"": 1500}"
0,Lasso,0.002072,477.021941,21.840832,17.345632,8.715425,"{""alpha"": 2, ""max_iter"": 1500}"
2,Lasso,0.001632,477.232587,21.845654,17.350025,8.659397,"{""alpha"": 4, ""max_iter"": 2000}"
1,Lasso,0.001039,477.515926,21.852138,17.355498,8.724224,"{""alpha"": 3, ""max_iter"": 2000}"


## 1.9 Polynomial Features Ridge

In [21]:
resul_pfr = pd.DataFrame()
model = Ridge
para = {
        'max_iter': [1000, 1500, 2000],
        'alpha': [1, 2, 3]
    }

for i in range(2, 6, 1):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(x_training)
    data_poly = {'x': X_poly, 'y': y_training}

    param = {
        'alpha': i,
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation_poli(model, param, data_poly, data)

    resul_pfr = pd.concat([resul_pfr, result]).reset_index(drop=True)

In [22]:
resul_pfr.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
3,Ridge,0.262806,352.388127,18.772004,14.617676,6.916855,"{""alpha"": 5, ""max_iter"": 1000}"
2,Ridge,0.185298,389.43757,19.734173,15.556016,7.630291,"{""alpha"": 4, ""max_iter"": 1500}"
1,Ridge,0.128082,416.787952,20.415385,16.12957,8.090728,"{""alpha"": 3, ""max_iter"": 1000}"
0,Ridge,0.092544,433.775338,20.827274,16.479345,8.382553,"{""alpha"": 2, ""max_iter"": 1000}"


## 1.10 Polynomial Features Elastic Net

In [23]:
resul_pfe = pd.DataFrame()
model = ElasticNet
para = {
        'max_iter': [1000, 1500, 2000],
        'alpha': [1, 2, 3]
    }

for i in range(2, 6, 1):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(x_training)
    data_poly = {'x': X_poly, 'y': y_training}

    param = {
        'alpha': i,
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation_poli(model, param, data_poly, data)

    resul_pfe = pd.concat([resul_pfe, result]).reset_index(drop=True)

  model = cd_fast.enet_coordinate_descent(


In [24]:
resul_pfe.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,ElasticNet,0.006652,474.832593,21.790654,17.303883,8.703584,"{""alpha"": 2, ""max_iter"": 1000}"
1,ElasticNet,0.005507,475.380158,21.803214,17.313891,8.679992,"{""alpha"": 3, ""max_iter"": 1500}"
2,ElasticNet,0.00436,475.928549,21.815787,17.323616,8.630591,"{""alpha"": 4, ""max_iter"": 1500}"
3,ElasticNet,0.004257,475.9775,21.816909,17.318502,8.632547,"{""alpha"": 5, ""max_iter"": 2000}"


# 2.0 Validation

In [25]:
data = 'validation'

## 2.1 Linear Regresson

In [26]:
resul_lr = pd.DataFrame()
model = LinearRegression
param = {
        'fit_intercept': True
    }

result = classifier_evaluation(model, param, data)

resul_lr = pd.concat([resul_lr,result]).reset_index(drop=True)

In [27]:
resul_lr.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,LinearRegression,0.039925,458.447042,21.411376,17.039754,8.682542,"{""fit_intercept"": true}"


## 2.2 Linear Regresson Lasso

In [28]:
resul_ls = pd.DataFrame()
model = Lasso
para = {
        'alpha': [1, 2, 3],
        'max_iter': [1000, 1500, 2000]
    }

for i in range(2, 20, 1):
    param = {
        'alpha': random.choice(para['alpha']),
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation(model, param, data)

    resul_ls = pd.concat([resul_ls,result]).reset_index(drop=True)

In [29]:
resul_ls.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,Lasso,0.007883643,473.747081,21.765732,17.264922,8.695808,"{""alpha"": 1, ""max_iter"": 1500}"
10,Lasso,0.007883643,473.747081,21.765732,17.264922,8.695808,"{""alpha"": 1, ""max_iter"": 1500}"
3,Lasso,0.007883643,473.747081,21.765732,17.264922,8.695808,"{""alpha"": 1, ""max_iter"": 1000}"
4,Lasso,0.007883643,473.747081,21.765732,17.264922,8.695808,"{""alpha"": 1, ""max_iter"": 2000}"
5,Lasso,0.007883643,473.747081,21.765732,17.264922,8.695808,"{""alpha"": 1, ""max_iter"": 2000}"
9,Lasso,0.007883643,473.747081,21.765732,17.264922,8.695808,"{""alpha"": 1, ""max_iter"": 1500}"
14,Lasso,0.001172285,476.951832,21.839227,17.339643,8.680632,"{""alpha"": 2, ""max_iter"": 1500}"
16,Lasso,0.001172285,476.951832,21.839227,17.339643,8.680632,"{""alpha"": 2, ""max_iter"": 2000}"
8,Lasso,0.001172285,476.951832,21.839227,17.339643,8.680632,"{""alpha"": 2, ""max_iter"": 1500}"
6,Lasso,0.001172285,476.951832,21.839227,17.339643,8.680632,"{""alpha"": 2, ""max_iter"": 1000}"


## 2.3 Linear Regresson Ridge

In [30]:
resul_lrr = pd.DataFrame()
model = Ridge
para = {
        'alpha': [1, 2, 3],
        'max_iter': [1000, 1500, 2000]
    }

for i in range(2, 20, 1):
    param = {
        'alpha': random.choice(para['alpha']),
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation(model, param, data)

    resul_lrr = pd.concat([resul_lrr,result]).reset_index(drop=True)

In [31]:
resul_lrr.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
8,Ridge,0.039933,458.443057,21.411283,17.038968,8.682161,"{""alpha"": 3, ""max_iter"": 1500}"
15,Ridge,0.039933,458.443057,21.411283,17.038968,8.682161,"{""alpha"": 3, ""max_iter"": 1000}"
14,Ridge,0.039933,458.443057,21.411283,17.038968,8.682161,"{""alpha"": 3, ""max_iter"": 2000}"
12,Ridge,0.039933,458.443057,21.411283,17.038968,8.682161,"{""alpha"": 3, ""max_iter"": 2000}"
7,Ridge,0.039933,458.443057,21.411283,17.038968,8.682161,"{""alpha"": 3, ""max_iter"": 1000}"
6,Ridge,0.039933,458.443057,21.411283,17.038968,8.682161,"{""alpha"": 3, ""max_iter"": 1000}"
17,Ridge,0.039933,458.443057,21.411283,17.038968,8.682161,"{""alpha"": 3, ""max_iter"": 2000}"
3,Ridge,0.039933,458.443057,21.411283,17.038968,8.682161,"{""alpha"": 3, ""max_iter"": 1500}"
4,Ridge,0.039931,458.444152,21.411309,17.039201,8.682285,"{""alpha"": 2, ""max_iter"": 2000}"
16,Ridge,0.039931,458.444152,21.411309,17.039201,8.682285,"{""alpha"": 2, ""max_iter"": 2000}"


## 2.4 Linear Regresson ElasticNet

In [32]:
resul_lre = pd.DataFrame()
model = ElasticNet
para = {
        'alpha': [1, 2, 3],
        'max_iter': [1000, 1500, 2000],
        'l1_ratio': [0.3, 0.5, 0.7]
    }

for i in range(2, 20, 1):
    param = {
        'alpha': random.choice(para['alpha']),
        'max_iter': random.choice(para['max_iter']),
        'l1_ratio': random.choice(para['l1_ratio'])
    }

    result = classifier_evaluation(model, param, data)

    resul_lre = pd.concat([resul_lre,result]).reset_index(drop=True)

In [33]:
resul_lre.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,ElasticNet,0.008887,473.268144,21.754727,17.256527,8.690803,"{""alpha"": 1, ""max_iter"": 2000, ""l1_ratio"": 0.3}"
14,ElasticNet,0.008887,473.268144,21.754727,17.256527,8.690803,"{""alpha"": 1, ""max_iter"": 2000, ""l1_ratio"": 0.3}"
5,ElasticNet,0.008887,473.268144,21.754727,17.256527,8.690803,"{""alpha"": 1, ""max_iter"": 2000, ""l1_ratio"": 0.3}"
8,ElasticNet,0.008887,473.268144,21.754727,17.256527,8.690803,"{""alpha"": 1, ""max_iter"": 1500, ""l1_ratio"": 0.3}"
6,ElasticNet,0.008117,473.635616,21.763171,17.262903,8.694035,"{""alpha"": 1, ""max_iter"": 2000, ""l1_ratio"": 0.5}"
11,ElasticNet,0.008117,473.635616,21.763171,17.262903,8.694035,"{""alpha"": 1, ""max_iter"": 1000, ""l1_ratio"": 0.5}"
10,ElasticNet,0.007823,473.776145,21.766399,17.265637,8.695211,"{""alpha"": 1, ""max_iter"": 1500, ""l1_ratio"": 0.7}"
15,ElasticNet,0.007823,473.776145,21.766399,17.265637,8.695211,"{""alpha"": 1, ""max_iter"": 2000, ""l1_ratio"": 0.7}"
12,ElasticNet,0.005157,475.049055,21.79562,17.29442,8.688047,"{""alpha"": 2, ""max_iter"": 2000, ""l1_ratio"": 0.3}"
3,ElasticNet,0.005157,475.049055,21.79562,17.29442,8.688047,"{""alpha"": 2, ""max_iter"": 2000, ""l1_ratio"": 0.3}"


## 2.5 Decision Tree

In [34]:
resul_dt = pd.DataFrame()
model = DecisionTreeRegressor
for i in range(2, 20, 1):
    param = {
            'max_depth': i
        }

    result = classifier_evaluation(model, param, data)

    resul_dt = pd.concat([resul_dt,result]).reset_index(drop=True)

In [35]:
resul_dt.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
3,DecisionTreeRegressor,0.063559,447.161319,21.146189,16.843452,8.395778,"{""max_depth"": 5}"
4,DecisionTreeRegressor,0.063296,447.286802,21.149156,16.748081,8.324423,"{""max_depth"": 6}"
2,DecisionTreeRegressor,0.062168,447.825853,21.161896,16.849288,8.536141,"{""max_depth"": 4}"
5,DecisionTreeRegressor,0.058373,449.637594,21.20466,16.724101,7.997308,"{""max_depth"": 7}"
1,DecisionTreeRegressor,0.054812,451.338427,21.244727,16.89655,8.469922,"{""max_depth"": 3}"
0,DecisionTreeRegressor,0.037609,459.552983,21.437187,16.985102,8.480125,"{""max_depth"": 2}"
6,DecisionTreeRegressor,0.034198,461.181654,21.47514,16.894859,7.951209,"{""max_depth"": 8}"
7,DecisionTreeRegressor,0.019782,468.065533,21.634822,16.88722,7.860791,"{""max_depth"": 9}"
8,DecisionTreeRegressor,-0.004866,479.835316,21.905144,16.874534,7.879172,"{""max_depth"": 10}"
9,DecisionTreeRegressor,-0.034846,494.151037,22.229508,16.956245,7.959271,"{""max_depth"": 11}"


## 2.6 Random Forest

In [36]:
resul_dt = pd.DataFrame()
model = RandomForestRegressor
para = {
        'n_estimators': [100, 200, 300],
    }
for i in range(2, 20, 1):
    param = {
            'max_depth': i,
            'n_estimators': random.choice(para['n_estimators'])
        }

    result = classifier_evaluation(model, param, data)

    resul_dt = pd.concat([resul_dt,result]).reset_index(drop=True)

In [37]:
resul_dt.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
17,RandomForestRegressor,0.330822,319.5401,17.875685,13.24647,7.155156,"{""max_depth"": 19, ""n_estimators"": 200}"
16,RandomForestRegressor,0.325259,322.196429,17.949831,13.351268,7.101032,"{""max_depth"": 18, ""n_estimators"": 100}"
15,RandomForestRegressor,0.324214,322.695461,17.963726,13.457605,7.213398,"{""max_depth"": 17, ""n_estimators"": 300}"
14,RandomForestRegressor,0.317293,326.000357,18.05548,13.640863,7.279099,"{""max_depth"": 16, ""n_estimators"": 300}"
13,RandomForestRegressor,0.304214,332.24598,18.227616,13.84497,7.322427,"{""max_depth"": 15, ""n_estimators"": 100}"
12,RandomForestRegressor,0.293757,337.239245,18.364075,14.113843,7.390629,"{""max_depth"": 14, ""n_estimators"": 100}"
11,RandomForestRegressor,0.277667,344.922366,18.572086,14.391832,7.518465,"{""max_depth"": 13, ""n_estimators"": 100}"
10,RandomForestRegressor,0.257985,354.320654,18.823407,14.695133,7.599754,"{""max_depth"": 12, ""n_estimators"": 100}"
9,RandomForestRegressor,0.233079,366.213738,19.136712,15.039048,7.769171,"{""max_depth"": 11, ""n_estimators"": 100}"
8,RandomForestRegressor,0.214129,375.262332,19.371689,15.276295,7.858329,"{""max_depth"": 10, ""n_estimators"": 300}"


## 2.7 Polynomial Features

In [38]:
resul_pf = pd.DataFrame()
model = LinearRegression

for i in range(2, 6, 1):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(x_training)
    X_poly_validation = poly.transform(x_validation)

    data_poly = {'x': X_poly, 'y': y_training, 
                 'x_validation': X_poly_validation, 'y_validation': y_validation
                }

    param = {
        'fit_intercept': True,
    }

    result = classifier_evaluation_poli(model, param, data_poly, data)

    resul_pf = pd.concat([resul_pf,result]).reset_index(drop=True)

  model = cd_fast.enet_coordinate_descent(


In [39]:
resul_pf.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
1,Lasso,0.014148,470.755769,21.696907,17.180595,8.655828,"{""fit_intercept"": true}"
0,Lasso,0.009631,472.912694,21.746556,17.238379,8.681847,"{""fit_intercept"": true}"
2,Lasso,-0.00989,482.234352,21.959835,17.244445,8.681069,"{""fit_intercept"": true}"
3,Lasso,-1.471362,1180.104209,34.352645,17.623298,8.6799,"{""fit_intercept"": true}"


## 2.8 Polynomial Features Lasso

In [40]:
resul_pfl = pd.DataFrame()
model = Lasso
para = {
        'max_iter': [1000, 1500, 2000],
        'alpha': [1, 2, 3]
    }

for i in range(2, 6, 1):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(x_training)
    X_poly_validation = poly.transform(x_validation)

    data_poly = {'x': X_poly, 'y': y_training, 
                 'x_validation': X_poly_validation, 'y_validation': y_validation
                }

    param = {
        'alpha': i,
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation_poli(model, param, data_poly, data)

    resul_pfl = pd.concat([resul_pfl, result]).reset_index(drop=True)

  model = cd_fast.enet_coordinate_descent(


In [41]:
resul_pfl.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,Lasso,0.002343,476.392774,21.826424,17.32599,8.671844,"{""alpha"": 2, ""max_iter"": 1500}"
1,Lasso,0.001254,476.912717,21.838331,17.338645,8.67633,"{""alpha"": 3, ""max_iter"": 2000}"
3,Lasso,-0.006163,480.454666,21.919276,17.366076,8.676206,"{""alpha"": 5, ""max_iter"": 2000}"
2,Lasso,-0.010344,482.451078,21.964769,17.37792,8.68508,"{""alpha"": 4, ""max_iter"": 2000}"


## 2.9 Polynomial Features Ridge

In [42]:
resul_pfr = pd.DataFrame()
model = Ridge
para = {
        'max_iter': [1000, 1500, 2000],
        'alpha': [1, 2, 3]
    }

for i in range(2, 6, 1):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(x_training)
    X_poly_validation = poly.transform(x_validation)
    
    data_poly = {'x': X_poly, 'y': y_training, 
                 'x_validation': X_poly_validation, 'y_validation': y_validation
                }

    param = {
        'alpha': i,
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation_poli(model, param, data_poly, data)

    resul_pfr = pd.concat([resul_pfr, result]).reset_index(drop=True)

In [43]:
resul_pfr.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,Ridge,0.067695,445.186573,21.099445,16.739734,8.575067,"{""alpha"": 2, ""max_iter"": 1000}"
1,Ridge,-0.064732,508.421988,22.548215,16.818626,8.531903,"{""alpha"": 3, ""max_iter"": 2000}"
2,Ridge,-25.431738,12621.462001,112.34528,18.630133,8.505195,"{""alpha"": 4, ""max_iter"": 1000}"
3,Ridge,-97.994347,47270.95025,217.418836,22.733237,8.349673,"{""alpha"": 5, ""max_iter"": 1500}"


## 2.10 Polynomial Features Elastic Net

In [44]:
resul_pfe = pd.DataFrame()
model = ElasticNet
para = {
        'max_iter': [1000, 1500, 2000],
        'alpha': [1, 2, 3]
    }

for i in range(2, 6, 1):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(x_training)
    X_poly_validation = poly.transform(x_validation)
    
    data_poly = {'x': X_poly, 'y': y_training, 
                 'x_validation': X_poly_validation, 'y_validation': y_validation
                }

    param = {
        'alpha': i,
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation_poli(model, param, data_poly, data)

    resul_pfe = pd.concat([resul_pfe, result]).reset_index(drop=True)

  model = cd_fast.enet_coordinate_descent(


In [45]:
resul_pfe.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,ElasticNet,0.006948,474.193795,21.775991,17.269331,8.677584,"{""alpha"": 2, ""max_iter"": 2000}"
1,ElasticNet,0.004127,475.540815,21.806898,17.299139,8.665203,"{""alpha"": 3, ""max_iter"": 1500}"
2,ElasticNet,0.003959,475.621199,21.808741,17.306251,8.67989,"{""alpha"": 4, ""max_iter"": 2000}"
3,ElasticNet,-0.040411,496.80836,22.289198,17.405567,8.683365,"{""alpha"": 5, ""max_iter"": 2000}"


# 3.0 Test

In [46]:
data = 'test'

## 3.1 Linear Regresson

In [47]:
resul_lr = pd.DataFrame()
model = LinearRegression
param = {
        'fit_intercept': True
    }

result = classifier_evaluation(model, param, data)

resul_lr = pd.concat([resul_lr,result]).reset_index(drop=True)

In [48]:
resul_lr.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,LinearRegression,0.051166,461.988435,21.493916,17.144197,8.531355,"{""fit_intercept"": true}"


## 3.2 Linear Regresson Lasso

In [49]:
resul_ls = pd.DataFrame()
model = Lasso
para = {
        'alpha': [1, 2, 3],
        'max_iter': [1000, 1500, 2000]
    }

for i in range(2, 20, 1):
    param = {
        'alpha': random.choice(para['alpha']),
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation(model, param, data)

    resul_ls = pd.concat([resul_ls,result]).reset_index(drop=True)

In [50]:
resul_ls.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
17,Lasso,0.007814,483.096411,21.979454,17.47241,8.752995,"{""alpha"": 1, ""max_iter"": 2000}"
1,Lasso,0.007814,483.096411,21.979454,17.47241,8.752995,"{""alpha"": 1, ""max_iter"": 2000}"
15,Lasso,0.001278,486.278712,22.051728,17.537475,8.718866,"{""alpha"": 2, ""max_iter"": 1000}"
13,Lasso,0.001278,486.278712,22.051728,17.537475,8.718866,"{""alpha"": 2, ""max_iter"": 2000}"
11,Lasso,0.001278,486.278712,22.051728,17.537475,8.718866,"{""alpha"": 2, ""max_iter"": 2000}"
10,Lasso,0.001278,486.278712,22.051728,17.537475,8.718866,"{""alpha"": 2, ""max_iter"": 2000}"
0,Lasso,0.001278,486.278712,22.051728,17.537475,8.718866,"{""alpha"": 2, ""max_iter"": 2000}"
4,Lasso,0.001278,486.278712,22.051728,17.537475,8.718866,"{""alpha"": 2, ""max_iter"": 1500}"
6,Lasso,0.001278,486.278712,22.051728,17.537475,8.718866,"{""alpha"": 2, ""max_iter"": 1500}"
7,Lasso,-0.000119,486.958763,22.067142,17.551842,8.713642,"{""alpha"": 3, ""max_iter"": 1500}"


## 3.3 Linear Regresson Ridge

In [51]:
resul_lrr = pd.DataFrame()
model = Ridge
para = {
        'alpha': [1, 2, 3],
        'max_iter': [1000, 1500, 2000]
    }

for i in range(2, 20, 1):
    param = {
        'alpha': random.choice(para['alpha']),
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation(model, param, data)

    resul_lrr = pd.concat([resul_lrr,result]).reset_index(drop=True)

In [52]:
resul_lrr.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
2,Ridge,0.051167,461.987749,21.4939,17.143729,8.532726,"{""alpha"": 2, ""max_iter"": 1000}"
10,Ridge,0.051167,461.987749,21.4939,17.143729,8.532726,"{""alpha"": 2, ""max_iter"": 1500}"
12,Ridge,0.051167,461.987749,21.4939,17.143729,8.532726,"{""alpha"": 2, ""max_iter"": 1000}"
3,Ridge,0.051167,461.987772,21.493901,17.143954,8.532046,"{""alpha"": 1, ""max_iter"": 1000}"
4,Ridge,0.051167,461.987772,21.493901,17.143954,8.532046,"{""alpha"": 1, ""max_iter"": 1500}"
5,Ridge,0.051167,461.987772,21.493901,17.143954,8.532046,"{""alpha"": 1, ""max_iter"": 1500}"
6,Ridge,0.051167,461.987772,21.493901,17.143954,8.532046,"{""alpha"": 1, ""max_iter"": 1000}"
9,Ridge,0.051167,461.987772,21.493901,17.143954,8.532046,"{""alpha"": 1, ""max_iter"": 2000}"
0,Ridge,0.051166,461.988326,21.493914,17.143516,8.533395,"{""alpha"": 3, ""max_iter"": 1500}"
15,Ridge,0.051166,461.988326,21.493914,17.143516,8.533395,"{""alpha"": 3, ""max_iter"": 1000}"


## 3.4 Linear Regresson ElasticNet

In [53]:
resul_lre = pd.DataFrame()
model = ElasticNet
para = {
        'alpha': [1, 2, 3],
        'max_iter': [1000, 1500, 2000],
        'l1_ratio': [0.3, 0.5, 0.7]
    }

for i in range(2, 20, 1):
    param = {
        'alpha': random.choice(para['alpha']),
        'max_iter': random.choice(para['max_iter']),
        'l1_ratio': random.choice(para['l1_ratio'])
    }

    result = classifier_evaluation(model, param, data)

    resul_lre = pd.concat([resul_lre,result]).reset_index(drop=True)

In [54]:
resul_lre.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
8,ElasticNet,0.008836,482.59871,21.968129,17.462886,8.740648,"{""alpha"": 1, ""max_iter"": 1000, ""l1_ratio"": 0.3}"
15,ElasticNet,0.008836,482.59871,21.968129,17.462886,8.740648,"{""alpha"": 1, ""max_iter"": 1500, ""l1_ratio"": 0.3}"
3,ElasticNet,0.008836,482.59871,21.968129,17.462886,8.740648,"{""alpha"": 1, ""max_iter"": 1500, ""l1_ratio"": 0.3}"
12,ElasticNet,0.007897,483.055928,21.978533,17.47163,8.746263,"{""alpha"": 1, ""max_iter"": 1000, ""l1_ratio"": 0.5}"
9,ElasticNet,0.007897,483.055928,21.978533,17.47163,8.746263,"{""alpha"": 1, ""max_iter"": 2000, ""l1_ratio"": 0.5}"
6,ElasticNet,0.007897,483.055928,21.978533,17.47163,8.746263,"{""alpha"": 1, ""max_iter"": 1000, ""l1_ratio"": 0.5}"
16,ElasticNet,0.007642,483.179934,21.981354,17.474017,8.751676,"{""alpha"": 1, ""max_iter"": 1000, ""l1_ratio"": 0.7}"
10,ElasticNet,0.005012,484.460597,22.010466,17.49961,8.735342,"{""alpha"": 2, ""max_iter"": 1500, ""l1_ratio"": 0.3}"
17,ElasticNet,0.004537,484.691673,22.015714,17.504456,8.733079,"{""alpha"": 2, ""max_iter"": 1500, ""l1_ratio"": 0.5}"
7,ElasticNet,0.004537,484.691673,22.015714,17.504456,8.733079,"{""alpha"": 2, ""max_iter"": 1500, ""l1_ratio"": 0.5}"


## 3.5 Decision Tree

In [55]:
resul_dt = pd.DataFrame()
model = DecisionTreeRegressor
for i in range(2, 20, 1):
    param = {
            'max_depth': i
        }

    result = classifier_evaluation(model, param, data)

    resul_dt = pd.concat([resul_dt,result]).reset_index(drop=True)

In [56]:
resul_dt.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
4,DecisionTreeRegressor,0.099494,438.457181,20.939369,16.699697,7.736272,"{""max_depth"": 6}"
7,DecisionTreeRegressor,0.096973,439.684869,20.968664,16.525373,7.1482,"{""max_depth"": 9}"
5,DecisionTreeRegressor,0.095354,440.473032,20.987449,16.694581,7.608475,"{""max_depth"": 7}"
6,DecisionTreeRegressor,0.093329,441.459158,21.010929,16.612163,7.271458,"{""max_depth"": 8}"
8,DecisionTreeRegressor,0.091927,442.141615,21.027164,16.418615,6.893443,"{""max_depth"": 10}"
3,DecisionTreeRegressor,0.090475,442.848381,21.043963,16.829781,7.883226,"{""max_depth"": 5}"
9,DecisionTreeRegressor,0.076436,449.683989,21.205754,16.311587,6.825977,"{""max_depth"": 11}"
2,DecisionTreeRegressor,0.065048,455.228893,21.336094,17.033429,8.129478,"{""max_depth"": 4}"
1,DecisionTreeRegressor,0.055447,459.903903,21.44537,17.111421,8.172098,"{""max_depth"": 3}"
10,DecisionTreeRegressor,0.053211,460.992262,21.47073,16.279172,6.674297,"{""max_depth"": 12}"


## 3.6 Random Forest

In [57]:
resul_dt = pd.DataFrame()
model = RandomForestRegressor
para = {
        'n_estimators': [100, 200, 300],
    }
for i in range(2, 20, 1):
    param = {
            'max_depth': i,
            'n_estimators': random.choice(para['n_estimators'])
        }

    result = classifier_evaluation(model, param, data)

    resul_dt = pd.concat([resul_dt,result]).reset_index(drop=True)

In [58]:
resul_dt.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
17,RandomForestRegressor,0.3948,294.672399,17.166025,12.655269,6.392848,"{""max_depth"": 19, ""n_estimators"": 300}"
16,RandomForestRegressor,0.387723,298.118164,17.266099,12.854273,6.395905,"{""max_depth"": 18, ""n_estimators"": 100}"
15,RandomForestRegressor,0.382744,300.542322,17.336157,13.005456,6.515489,"{""max_depth"": 17, ""n_estimators"": 300}"
14,RandomForestRegressor,0.371952,305.797292,17.487061,13.231518,6.54434,"{""max_depth"": 16, ""n_estimators"": 100}"
13,RandomForestRegressor,0.362112,310.588127,17.623511,13.523839,6.582296,"{""max_depth"": 15, ""n_estimators"": 300}"
12,RandomForestRegressor,0.34465,319.090602,17.863107,13.864788,6.584671,"{""max_depth"": 14, ""n_estimators"": 100}"
11,RandomForestRegressor,0.325034,328.641556,18.128474,14.172234,6.776065,"{""max_depth"": 13, ""n_estimators"": 200}"
10,RandomForestRegressor,0.300739,340.471105,18.451859,14.524489,6.844082,"{""max_depth"": 12, ""n_estimators"": 100}"
9,RandomForestRegressor,0.274043,353.469389,18.800782,14.915644,7.041895,"{""max_depth"": 11, ""n_estimators"": 100}"
8,RandomForestRegressor,0.247303,366.488803,19.143897,15.247822,7.174317,"{""max_depth"": 10, ""n_estimators"": 300}"


## 3.7 Polynomial Features

In [59]:
resul_pf = pd.DataFrame()
model = LinearRegression

for i in range(2, 6, 1):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(x_training)
    X_poly_test = poly.transform(x_test)
    
    data_poly = {'x': X_poly, 'y': y_training, 
                 'x_test': X_poly_test, 'y_test': y_test
                }

    param = {
        'fit_intercept': True,
    }

    result = classifier_evaluation_poli(model, param, data_poly, data)

    resul_pf = pd.concat([resul_pf,result]).reset_index(drop=True)

In [60]:
resul_pf.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,LinearRegression,0.09007934,443.0413,21.048545,16.720535,8.242464,"{""fit_intercept"": true}"
1,LinearRegression,-0.2617516,614.3481,24.786046,17.178214,7.956229,"{""fit_intercept"": true}"
2,LinearRegression,-563.3125,274764.3,524.179649,40.303351,19.956478,"{""fit_intercept"": true}"
3,LinearRegression,-1454237.0,708070100.0,26609.585801,1524.866175,465.112363,"{""fit_intercept"": true}"


## 3.8 Polynomial Features Lasso

In [61]:
resul_pfl = pd.DataFrame()
model = Lasso
para = {
        'max_iter': [1000, 1500, 2000],
        'alpha': [1, 2, 3]
    }

for i in range(2, 6, 1):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(x_training)
    X_poly_test = poly.transform(x_test)
    
    data_poly = {'x': X_poly, 'y': y_training, 
                 'x_test': X_poly_test, 'y_test': y_test
                }

    param = {
        'alpha': i,
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation_poli(model, param, data_poly, data)

    resul_pfl = pd.concat([resul_pfl, result]).reset_index(drop=True)

  model = cd_fast.enet_coordinate_descent(


In [62]:
resul_pfl.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,Lasso,0.002095,485.880941,22.042707,17.529487,8.720824,"{""alpha"": 2, ""max_iter"": 2000}"
1,Lasso,0.001586,486.128803,22.048329,17.532303,8.716763,"{""alpha"": 3, ""max_iter"": 1000}"
3,Lasso,-0.038148,505.475034,22.482772,17.608314,8.721032,"{""alpha"": 5, ""max_iter"": 1500}"
2,Lasso,-1.125439,1034.87809,32.169521,17.927483,8.734078,"{""alpha"": 4, ""max_iter"": 1000}"


## 3.9 Polynomial Features Ridge

In [63]:
resul_pfr = pd.DataFrame()
model = Ridge
para = {
        'max_iter': [1000, 1500, 2000],
        'alpha': [1, 2, 3]
    }

for i in range(2, 6, 1):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(x_training)
    X_poly_test = poly.transform(x_test)
    
    data_poly = {'x': X_poly, 'y': y_training, 
                 'x_test': X_poly_test, 'y_test': y_test
                }

    param = {
        'alpha': i,
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation_poli(model, param, data_poly, data)

    resul_pfr = pd.concat([resul_pfr, result]).reset_index(drop=True)

In [64]:
resul_pfr.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,Ridge,0.088422,443.8481,21.067702,16.735629,8.304551,"{""alpha"": 2, ""max_iter"": 2000}"
1,Ridge,-0.022191,497.7057,22.309318,16.778435,8.16835,"{""alpha"": 3, ""max_iter"": 2000}"
2,Ridge,-101.703353,50006.36,223.621025,20.358712,8.139158,"{""alpha"": 4, ""max_iter"": 1000}"
3,Ridge,-7292.300888,3551115.0,1884.44032,49.094646,9.174958,"{""alpha"": 5, ""max_iter"": 2000}"


## 3.10 Polynomial Features Elastic Net

In [65]:
resul_pfe = pd.DataFrame()
model = ElasticNet
para = {
        'max_iter': [1000, 1500, 2000],
        'alpha': [1, 2, 3]
    }

for i in range(2, 6, 1):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(x_training)
    X_poly_test = poly.transform(x_test)
    
    data_poly = {'x': X_poly, 'y': y_training, 
                 'x_test': X_poly_test, 'y_test': y_test
                }

    param = {
        'alpha': i,
        'max_iter': random.choice(para['max_iter'])
    }

    result = classifier_evaluation_poli(model, param, data_poly, data)

    resul_pfe = pd.concat([resul_pfe, result]).reset_index(drop=True)

  model = cd_fast.enet_coordinate_descent(


In [66]:
resul_pfe.sort_values('RMSE')

Unnamed: 0,name,R2,MSE,RMSE,MAE,MAPE,param
0,ElasticNet,0.005895,484.030912,22.000703,17.483889,8.740537,"{""alpha"": 2, ""max_iter"": 1000}"
1,ElasticNet,-0.001066,487.419868,22.077587,17.517493,8.728824,"{""alpha"": 3, ""max_iter"": 1000}"
3,ElasticNet,-0.003134,488.426955,22.100384,17.539236,8.722945,"{""alpha"": 5, ""max_iter"": 1500}"
2,ElasticNet,-0.095934,533.611171,23.100025,17.634064,8.728394,"{""alpha"": 4, ""max_iter"": 2000}"
