## Machine Learning Develop

#### We Create a dataset to save the accuracies

In [66]:
import pandas as pd

accuracy_saves = pd.DataFrame(columns=['Modelo', 'Accuracy'])

def guardar_accuracy(modelo,accuracy):

    # Accedo a la variable accuracy saves donde guardaremos los resultados
    global accuracy_saves
    
    accuracy_saves.loc[len(accuracy_saves.index)] = [modelo, accuracy]


def actualizar_accuracy(modelo,accuracy):
    
    # Accedo a la variable accuracy saves donde actualizaremos los resultados
    global accuracy_saves
    accuracy_saves.loc[accuracy_saves['Modelo'] == modelo, 'Accuracy'] = accuracy

### Import the datasets


In [67]:
import pandas as pd

# load the csv data sets

# Con Index = False, no guardamos el indice en el archivo resultante
X_train = pd.read_csv("../data/processed/X_train.csv")
X_test = pd.read_csv("../data/processed/X_test.csv")
X_train_scaled = pd.read_csv("../data/processed/X_train_scaled.csv")
X_test_scaled = pd.read_csv("../data/processed/X_test_scaled.csv")

y_train = pd.read_csv("../data/processed/y_train.csv")
y_test = pd.read_csv("../data/processed/y_test.csv")


### Load the model

#### Prediction with Lasso (L1)

In [68]:
from sklearn.linear_model import Lasso

# Carga de los datos de train y test
# Estos datos deben haber sido normalizados y correctamente tratados en un EDA completo

lasso_model = Lasso(alpha = 0.1, max_iter = 1000)

lasso_model.fit(X_train_scaled, y_train)

In [69]:
y_pred_train = lasso_model.predict(X_train_scaled)
y_pred_train

array([13.72869751, 13.05410317, 12.18463078, ..., 15.84671437,
       16.04586601, 10.69142036])

In [70]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

target = y_train
predict = y_pred_train

texto_a_guardar = 'Lasso train'

# Calcula el Mean Absolute Error (MAE)
mae_text = 'Mean Absolute Error'
mae = mean_absolute_error(target, predict)
print(f'MAE: {mae}')
guardar_accuracy(f'{mae_text} {texto_a_guardar}' ,mae )

# Calcula el Mean Squared Error (MSE)
mse_text = 'Mean Squared Error'
mse = mean_squared_error(target, predict)
print(f'MSE: {mse}')
guardar_accuracy(f'{mse_text} {texto_a_guardar}' ,mse )

# Calcula el Root Mean Squared Error (RMSE)
rmse_text = 'Root Mean Squared Error'
rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')
guardar_accuracy(f'{rmse_text} {texto_a_guardar}' ,rmse )

# Calcula el Coeficiente de Determinación (R²)
r2_text = 'R²'
r2 = r2_score(target, predict)
print(f'R²: {r2}')
guardar_accuracy(f'{r2_text} {texto_a_guardar}' ,r2 )



MAE: 0.086180130776197
MSE: 0.011964585236093107
RMSE: 0.10938274651924364
R²: 0.9983517111480121


In [71]:
y_pred_test = lasso_model.predict(X_test_scaled)


In [72]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

target = y_test
predict = y_pred_test

texto_a_guardar = 'Lasso test'

# Calcula el Mean Absolute Error (MAE)
mae_text = 'Mean Absolute Error'
mae = mean_absolute_error(target, predict)
print(f'MAE: {mae}')
guardar_accuracy(f'{mae_text} {texto_a_guardar}' ,mae )

# Calcula el Mean Squared Error (MSE)
mse_text = 'Mean Squared Error'
mse = mean_squared_error(target, predict)
print(f'MSE: {mse}')
guardar_accuracy(f'{mse_text} {texto_a_guardar}' ,mse )

# Calcula el Root Mean Squared Error (RMSE)
rmse_text = 'Root Mean Squared Error'
rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')
guardar_accuracy(f'{rmse_text} {texto_a_guardar}' ,rmse )

# Calcula el Coeficiente de Determinación (R²)
r2_text = 'R²'
r2 = r2_score(target, predict)
print(f'R²: {r2}')
guardar_accuracy(f'{r2_text} {texto_a_guardar}' ,r2 )



MAE: 0.22325133381298345
MSE: 0.08591949676412967
RMSE: 0.2931202769583327
R²: 0.9893176832587962


Lasso Hyperparameters

In [73]:
# Obténgo la lista de parámetros válidos y sus valores actuales
params = lasso_model.get_params()
params

{'alpha': 0.1,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': 1000,
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [74]:
from sklearn.model_selection import GridSearchCV

# Definir el modelo Lasso
lasso_model = Lasso()

# Definir los parámetros que deseas explorar en la cuadrícula
param_grid = {

 'alpha': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],              #un valor más alto implica una regularización más fuerte, lo que puede llevar a la reducción de algunos coeficientes a cero
 'copy_X': [True],            #El parámetro copy_x indica si se debe realizar una copia de los datos originales antes de aplicar ciertas operaciones, evitando así modificar los datos originales durante el proceso.
 'fit_intercept': [True],     #determina si se debe ajustar un término de intercepción en el modelo de regresión lineal, permitiendo o evitando la estimación de un término independiente.
 'max_iter': [10000],          #especifica el número máximo de iteraciones que el algoritmo de optimización puede realizar para converger y encontrar la solución.
 'positive': [False],         #impone restricciones para que los coeficientes del modelo sean no negativos, lo que significa que solo se permiten valores positivos o cero para prevenir coeficientes negativos.
 'precompute': [True],        #indica si se deben precalcular productos internos para mejorar la velocidad del ajuste; si es True, se precalculan, y si es False (valor por defecto), se calculan durante la optimización.
 'random_state': [None],      #establece una semilla para la generación de números aleatorios, lo que garantiza que el proceso de ajuste del modelo sea reproducible
 'selection': ['cyclic'],     #determina el método utilizado para actualizar los coeficientes durante la optimización, donde "cyclic" sigue un orden cíclico y "random" selecciona aleatoriamente predictores en cada paso
 'tol': [0.0001],             #establece la tolerancia para la convergencia del algoritmo de optimización, indicando la precisión requerida para detener el proceso de ajuste cuando la mejora en el objetivo es pequeña
 'warm_start': [True]        #permite reutilizar la solución del modelo anterior como punto de inicio para el ajuste del modelo actual, lo que puede ser útil para realizar ajustes incrementales o iterativos en conjuntos de datos cambiantes.
 
 }


# Configurar la búsqueda de cuadrícula
grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, scoring='r2')

In [75]:
# Entrenar el modelo con todas las combinaciones de parámetros
grid_search.fit(X_train_scaled, y_train)

# Mostrar los mejores parámetros encontrados
print("Mejores parámetros:", grid_search.best_params_)

# Obtener el mejor modelo
best_lasso_model = grid_search.best_estimator_

Mejores parámetros: {'alpha': 0.0001, 'copy_X': True, 'fit_intercept': True, 'max_iter': 10000, 'positive': False, 'precompute': True, 'random_state': None, 'selection': 'cyclic', 'tol': 0.0001, 'warm_start': True}


In [76]:
y_pred_train = best_lasso_model.predict(X_train_scaled)
y_pred_train

array([13.76311093, 13.07289984, 12.15857857, ..., 15.96131804,
       16.15274707, 10.59882662])

In [77]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

target = y_train
predict = y_pred_train

texto_a_guardar = 'Lasso gridSearch'

# Calcula el Mean Absolute Error (MAE)
mae_text = 'Mean Absolute Error'
mae = mean_absolute_error(target, predict)
print(f'MAE: {mae}')
guardar_accuracy(f'{mae_text} {texto_a_guardar}' ,mae )

# Calcula el Mean Squared Error (MSE)
mse_text = 'Mean Squared Error'
mse = mean_squared_error(target, predict)
print(f'MSE: {mse}')
guardar_accuracy(f'{mse_text} {texto_a_guardar}' ,mse )

# Calcula el Root Mean Squared Error (RMSE)
rmse_text = 'Root Mean Squared Error'
rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')
guardar_accuracy(f'{rmse_text} {texto_a_guardar}' ,rmse )

# Calcula el Coeficiente de Determinación (R²)
r2_text = 'R²'
r2 = r2_score(target, predict)
print(f'R²: {r2}')
guardar_accuracy(f'{r2_text} {texto_a_guardar}' ,r2 )



MAE: 0.03301591559565055
MSE: 0.0018549929760126387
RMSE: 0.04306962939256198
R²: 0.9997444487892774


In [78]:
y_pred_test = best_lasso_model.predict(X_test_scaled)
y_pred_test

array([14.57202606, 10.17823363, 18.65869644, 10.5286923 ,  9.36870214,
        9.51802785, 11.42790218, 17.5175672 , 11.57569291,  8.89036244,
       11.48467174, 16.18821586,  8.61230338, 14.38073297, 10.94024064,
       13.37166292, 13.17229311, 14.51808526, 16.62240152, 12.27787277,
       14.1343203 , 11.99764561, 10.81410348, 15.32296853, 13.22122311,
       10.60604335,  7.36287055, 11.04969834, 10.71113595, 17.85698886,
       20.03683326, 16.89870926, 14.14712223, 12.80691467, 11.36164325,
       15.2903374 , 14.56489975, 10.13664043, 11.95737321,  8.99659067,
       16.61060311, 10.70769959, 11.04022611, 12.65832715, 10.61610488,
       14.85343258, 13.80187045, 13.79399793, 16.61097202, 17.63473675,
       14.08560542, 23.23416242, 10.60552871, 16.75133184, 13.23759893,
       18.70594677, 11.37408174, 10.70968727, 13.19192386, 11.80715195,
        8.99472657, 13.80189739, 13.24638893, 10.08217718, 11.99614918,
       10.32940613, 11.38224961, 12.62555838, 12.55894733, 12.08

In [79]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

target = y_test
predict = y_pred_test

texto_a_guardar = 'Lasso gridSearch TEST'

# Calcula el Mean Absolute Error (MAE)
mae_text = 'Mean Absolute Error'
mae = mean_absolute_error(target, predict)
print(f'MAE: {mae}')
guardar_accuracy(f'{mae_text} {texto_a_guardar}' ,mae )

# Calcula el Mean Squared Error (MSE)
mse_text = 'Mean Squared Error'
mse = mean_squared_error(target, predict)
print(f'MSE: {mse}')
guardar_accuracy(f'{mse_text} {texto_a_guardar}' ,mse )

# Calcula el Root Mean Squared Error (RMSE)
rmse_text = 'Root Mean Squared Error'
rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')
guardar_accuracy(f'{rmse_text} {texto_a_guardar}' ,rmse )

# Calcula el Coeficiente de Determinación (R²)
r2_text = 'R²'
r2 = r2_score(target, predict)
print(f'R²: {r2}')
guardar_accuracy(f'{r2_text} {texto_a_guardar}' ,r2 )



MAE: 0.1744520870535314
MSE: 0.0475828398112005
RMSE: 0.2181349119494642
R²: 0.9940840555932887


#### Prediction with Ridge (L2)

In [80]:
from sklearn.linear_model import Ridge

# Carga de los datos de train y test
# Estos datos deben haber sido normalizados y correctamente tratados en un EDA completo

ridge_model = Ridge(alpha = 0.1, max_iter = 1000)

ridge_model.fit(X_train_scaled, y_train)

In [81]:
y_pred_train = ridge_model.predict(X_train_scaled)
y_pred_train

array([[13.77498836],
       [13.08765696],
       [12.15799113],
       ...,
       [15.9530305 ],
       [16.16843567],
       [10.60003923]])

In [82]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

target = y_train
predict = y_pred_train

texto_a_guardar = 'Ridge train'

# Calcula el Mean Absolute Error (MAE)
mae_text = 'Mean Absolute Error'
mae = mean_absolute_error(target, predict)
print(f'MAE: {mae}')
guardar_accuracy(f'{mae_text} {texto_a_guardar}' ,mae )

# Calcula el Mean Squared Error (MSE)
mse_text = 'Mean Squared Error'
mse = mean_squared_error(target, predict)
print(f'MSE: {mse}')
guardar_accuracy(f'{mse_text} {texto_a_guardar}' ,mse )

# Calcula el Root Mean Squared Error (RMSE)
rmse_text = 'Root Mean Squared Error'
rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')
guardar_accuracy(f'{rmse_text} {texto_a_guardar}' ,rmse )

# Calcula el Coeficiente de Determinación (R²)
r2_text = 'R²'
r2 = r2_score(target, predict)
print(f'R²: {r2}')
guardar_accuracy(f'{r2_text} {texto_a_guardar}' ,r2 )



MAE: 0.0329096516177228
MSE: 0.0017216266446838891
RMSE: 0.04149248901528913
R²: 0.9997628218655539


In [83]:
y_pred_test = ridge_model.predict(X_test_scaled)


In [84]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

target = y_test
predict = y_pred_test

texto_a_guardar = 'Ridge test'

# Calcula el Mean Absolute Error (MAE)
mae_text = 'Mean Absolute Error'
mae = mean_absolute_error(target, predict)
print(f'MAE: {mae}')
guardar_accuracy(f'{mae_text} {texto_a_guardar}' ,mae )

# Calcula el Mean Squared Error (MSE)
mse_text = 'Mean Squared Error'
mse = mean_squared_error(target, predict)
print(f'MSE: {mse}')
guardar_accuracy(f'{mse_text} {texto_a_guardar}' ,mse )

# Calcula el Root Mean Squared Error (RMSE)
rmse_text = 'Root Mean Squared Error'
rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')
guardar_accuracy(f'{rmse_text} {texto_a_guardar}' ,rmse )

# Calcula el Coeficiente de Determinación (R²)
r2_text = 'R²'
r2 = r2_score(target, predict)
print(f'R²: {r2}')
guardar_accuracy(f'{r2_text} {texto_a_guardar}' ,r2 )


MAE: 0.17417978115159788
MSE: 0.04726686985971877
RMSE: 0.21740945209378265
R²: 0.9941233399376989


Ridge Hyperparameters

In [85]:
# Obténgo la lista de parámetros válidos y sus valores actuales
params = ridge_model.get_params()
params

{'alpha': 0.1,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': 1000,
 'positive': False,
 'random_state': None,
 'solver': 'auto',
 'tol': 0.0001}

In [86]:
from sklearn.model_selection import GridSearchCV

# Definir el modelo Ridge
ridge_model = Ridge()

# Definir los parámetros que deseas explorar en la cuadrícula
param_grid = {

 'alpha': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],              #un valor más alto implica una regularización más fuerte, lo que puede llevar a la reducción de algunos coeficientes a cero
 'copy_X': [True],            #El parámetro copy_x indica si se debe realizar una copia de los datos originales antes de aplicar ciertas operaciones, evitando así modificar los datos originales durante el proceso.
 'fit_intercept': [True],     #determina si se debe ajustar un término de intercepción en el modelo de regresión lineal, permitiendo o evitando la estimación de un término independiente.
 'max_iter': [10000],          #especifica el número máximo de iteraciones que el algoritmo de optimización puede realizar para converger y encontrar la solución.
 'positive': [False],         #impone restricciones para que los coeficientes del modelo sean no negativos, lo que significa que solo se permiten valores positivos o cero para prevenir coeficientes negativos.
 'precompute': [True],        #indica si se deben precalcular productos internos para mejorar la velocidad del ajuste; si es True, se precalculan, y si es False (valor por defecto), se calculan durante la optimización.
 'random_state': [None],      #establece una semilla para la generación de números aleatorios, lo que garantiza que el proceso de ajuste del modelo sea reproducible
 'selection': ['cyclic'],     #determina el método utilizado para actualizar los coeficientes durante la optimización, donde "cyclic" sigue un orden cíclico y "random" selecciona aleatoriamente predictores en cada paso
 'tol': [0.0001],             #establece la tolerancia para la convergencia del algoritmo de optimización, indicando la precisión requerida para detener el proceso de ajuste cuando la mejora en el objetivo es pequeña
 'warm_start': [True]        #permite reutilizar la solución del modelo anterior como punto de inicio para el ajuste del modelo actual, lo que puede ser útil para realizar ajustes incrementales o iterativos en conjuntos de datos cambiantes.
 
 }


# Configurar la búsqueda de cuadrícula
grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, scoring='r2')

In [87]:
# Entrenar el modelo con todas las combinaciones de parámetros
grid_search.fit(X_train_scaled, y_train)

# Mostrar los mejores parámetros encontrados
print("Mejores parámetros:", grid_search.best_params_)

# Obtener el mejor modelo
best_ridge_model = grid_search.best_estimator_

Mejores parámetros: {'alpha': 0.0001, 'copy_X': True, 'fit_intercept': True, 'max_iter': 10000, 'positive': False, 'precompute': True, 'random_state': None, 'selection': 'cyclic', 'tol': 0.0001, 'warm_start': True}


In [98]:
import pickle

#Cambiar el objeto por el que queramos guardar
objeto = best_ridge_model

ruta_guardado = '../models/ridge_model.pkl'

with open(ruta_guardado, "wb") as archivo:
    pickle.dump(objeto, archivo)

In [88]:
y_pred_train = best_ridge_model.predict(X_train_scaled)
y_pred_train

array([13.76311093, 13.07289984, 12.15857857, ..., 15.96131804,
       16.15274707, 10.59882662])

In [89]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

target = y_train
predict = y_pred_train

texto_a_guardar = 'Ridge gridSearch'

# Calcula el Mean Absolute Error (MAE)
mae_text = 'Mean Absolute Error'
mae = mean_absolute_error(target, predict)
print(f'MAE: {mae}')
guardar_accuracy(f'{mae_text} {texto_a_guardar}' ,mae )

# Calcula el Mean Squared Error (MSE)
mse_text = 'Mean Squared Error'
mse = mean_squared_error(target, predict)
print(f'MSE: {mse}')
guardar_accuracy(f'{mse_text} {texto_a_guardar}' ,mse )

# Calcula el Root Mean Squared Error (RMSE)
rmse_text = 'Root Mean Squared Error'
rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')
guardar_accuracy(f'{rmse_text} {texto_a_guardar}' ,rmse )

# Calcula el Coeficiente de Determinación (R²)
r2_text = 'R²'
r2 = r2_score(target, predict)
print(f'R²: {r2}')
guardar_accuracy(f'{r2_text} {texto_a_guardar}' ,r2 )



MAE: 0.03301591559565055
MSE: 0.0018549929760126387
RMSE: 0.04306962939256198
R²: 0.9997444487892774


In [90]:
y_pred_test = best_lasso_model.predict(X_test_scaled)
y_pred_test

array([14.57202606, 10.17823363, 18.65869644, 10.5286923 ,  9.36870214,
        9.51802785, 11.42790218, 17.5175672 , 11.57569291,  8.89036244,
       11.48467174, 16.18821586,  8.61230338, 14.38073297, 10.94024064,
       13.37166292, 13.17229311, 14.51808526, 16.62240152, 12.27787277,
       14.1343203 , 11.99764561, 10.81410348, 15.32296853, 13.22122311,
       10.60604335,  7.36287055, 11.04969834, 10.71113595, 17.85698886,
       20.03683326, 16.89870926, 14.14712223, 12.80691467, 11.36164325,
       15.2903374 , 14.56489975, 10.13664043, 11.95737321,  8.99659067,
       16.61060311, 10.70769959, 11.04022611, 12.65832715, 10.61610488,
       14.85343258, 13.80187045, 13.79399793, 16.61097202, 17.63473675,
       14.08560542, 23.23416242, 10.60552871, 16.75133184, 13.23759893,
       18.70594677, 11.37408174, 10.70968727, 13.19192386, 11.80715195,
        8.99472657, 13.80189739, 13.24638893, 10.08217718, 11.99614918,
       10.32940613, 11.38224961, 12.62555838, 12.55894733, 12.08

In [91]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

target = y_test
predict = y_pred_test

texto_a_guardar = 'Ridge gridSearch TEST'

# Calcula el Mean Absolute Error (MAE)
mae_text = 'Mean Absolute Error'
mae = mean_absolute_error(target, predict)
print(f'MAE: {mae}')
guardar_accuracy(f'{mae_text} {texto_a_guardar}' ,mae )

# Calcula el Mean Squared Error (MSE)
mse_text = 'Mean Squared Error'
mse = mean_squared_error(target, predict)
print(f'MSE: {mse}')
guardar_accuracy(f'{mse_text} {texto_a_guardar}' ,mse )

# Calcula el Root Mean Squared Error (RMSE)
rmse_text = 'Root Mean Squared Error'
rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')
guardar_accuracy(f'{rmse_text} {texto_a_guardar}' ,rmse )

# Calcula el Coeficiente de Determinación (R²)
r2_text = 'R²'
r2 = r2_score(target, predict)
print(f'R²: {r2}')
guardar_accuracy(f'{r2_text} {texto_a_guardar}' ,r2 )



MAE: 0.1744520870535314
MSE: 0.0475828398112005
RMSE: 0.2181349119494642
R²: 0.9940840555932887


#### Prediction with ElasticNet

In [92]:
from sklearn.linear_model import ElasticNet

alpha = 0.5  # Factor de mezcla entre las penalizaciones L1 y L2, ajusta según tu necesidad
l1_ratio = 0.5  # Parámetro que controla la mezcla de L1 y L2, ajusta según tu necesidad

elastic_net_model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
elastic_net_model.fit(X_train_scaled, y_train)

In [93]:
y_pred_train = elastic_net_model.predict(X_train_scaled)
y_pred_train

array([13.95931935, 13.03855906, 11.92713958, ..., 15.80457317,
       15.97159214, 11.32447882])

In [94]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

target = y_train
predict = y_pred_train

texto_a_guardar = 'ElasticNet Train'

# Calcula el Mean Absolute Error (MAE)
mae_text = 'Mean Absolute Error'
mae = mean_absolute_error(target, predict)
print(f'MAE: {mae}')
guardar_accuracy(f'{mae_text} {texto_a_guardar}' ,mae )

# Calcula el Mean Squared Error (MSE)
mse_text = 'Mean Squared Error'
mse = mean_squared_error(target, predict)
print(f'MSE: {mse}')
guardar_accuracy(f'{mse_text} {texto_a_guardar}' ,mse )

# Calcula el Root Mean Squared Error (RMSE)
rmse_text = 'Root Mean Squared Error'
rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')
guardar_accuracy(f'{rmse_text} {texto_a_guardar}' ,rmse )

# Calcula el Coeficiente de Determinación (R²)
r2_text = 'R²'
r2 = r2_score(target, predict)
print(f'R²: {r2}')
guardar_accuracy(f'{r2_text} {texto_a_guardar}' ,r2 )

MAE: 0.39302311587449107
MSE: 0.24468991617108676
RMSE: 0.49466141568863725
R²: 0.966290543879285


In [95]:
y_pred_test = elastic_net_model.predict(X_test_scaled)

In [96]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

target = y_test
predict = y_pred_test

texto_a_guardar = 'ElasticNet test'

# Calcula el Mean Absolute Error (MAE)
mae_text = 'Mean Absolute Error'
mae = mean_absolute_error(target, predict)
print(f'MAE: {mae}')
guardar_accuracy(f'{mae_text} {texto_a_guardar}' ,mae )

# Calcula el Mean Squared Error (MSE)
mse_text = 'Mean Squared Error'
mse = mean_squared_error(target, predict)
print(f'MSE: {mse}')
guardar_accuracy(f'{mse_text} {texto_a_guardar}' ,mse )

# Calcula el Root Mean Squared Error (RMSE)
rmse_text = 'Root Mean Squared Error'
rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')
guardar_accuracy(f'{rmse_text} {texto_a_guardar}' ,rmse )

# Calcula el Coeficiente de Determinación (R²)
r2_text = 'R²'
r2 = r2_score(target, predict)
print(f'R²: {r2}')
guardar_accuracy(f'{r2_text} {texto_a_guardar}' ,r2 )

MAE: 0.4788394063612089
MSE: 0.39433363025135476
RMSE: 0.6279598954163831
R²: 0.950972748925453


Finally we see the accuracies obtained from different models

In [97]:
busqueda = 'R²'

accuracy_saves[accuracy_saves['Modelo'].str.contains(busqueda, case=False)]

Unnamed: 0,Modelo,Accuracy
3,R² Lasso train,0.998352
7,R² Lasso test,0.989318
11,R² Lasso gridSearch,0.999744
15,R² Lasso gridSearch TEST,0.994084
19,R² Ridge train,0.999763
23,R² Ridge test,0.994123
27,R² Ridge gridSearch,0.999744
31,R² Ridge gridSearch TEST,0.994084
35,R² ElasticNet Train,0.966291
39,R² ElasticNet test,0.950973


After searching through different models, we decided on either the Ridge or Lasso model after tuning the hyperparameters

After this practice, we have learned: 
- how to enhance linear regression with hyperparameters
- how L1 and L2 behave to reduce the complexity of the model, and prevent overfitting.