# Regresion Lineal Entrenamiento

In [15]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import ElasticNetCV
from sklearn import metrics
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import TimeSeriesSplit
# Para ignorar los warnings
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

Como siempre, realizamos los cambios necesarios para poder trabajar con los datos que nos interesan:

In [16]:
wind_ava = pd.read_csv('wind_ava.csv.gz', compression="gzip", sep=",")

sotavento_columns = [col for col in wind_ava.columns if ".13" in col]
sotavento_columns.insert(0, "datetime")
sotavento_columns.append("energy")
wind_ava_sotavento = wind_ava[sotavento_columns]

wind_ava_sotavento = wind_ava_sotavento.rename(columns = {
    "energy": "Energy", 
    "datetime": "Datetime", 
    "p54.162.13": "Vertical integral of temperature", 
    "p55.162.13": "Vertical integral of water vapour", 
    "cape.13": "Convective available potential energy", 
    "p59.162.13": "Vertical integral of divergence of kinetic energy", 
    "lai_lv.13": "Leaf area index, low vegetation", 
    "lai_hv.13": "Leaf area index, high vegetation", 
    "u10n.13": "Neutral wind at 10 m u-component", 
    "v10n.13": "Neutral wind at 10 m v-component", 
    "sp.13": "Surface pressure",
    "stl1.13": "Soil temperature level 1",
    "u10.13": "10 metre U wind component",
    "v10.13": "10 metre V wind component",
    "t2m.13": "2 metre temperature", 
    "stl2.13": "Soil temperature level 2", 
    "stl3.13": "Soil temperature level 3", 
    "iews.13": "Instantaneous eastward turbulent surface stress",
    "inss.13": "Instantaneous northward turbulent surface", 
    "stl4.13": "Soil temperature level 4", 
    "fsr.13": "Forecast surface roughness", 
    "flsr.13": "Forecast logarithm of surface roughness for heat",
    "u100.13": "100 metre U wind component",
    "v100.13": "100 metre V wind component", })

wind_ava_sotavento["Datetime"] = pd.to_datetime(wind_ava_sotavento["Datetime"])
wind_ava_sotavento.dtypes

Datetime                                             datetime64[ns]
Vertical integral of temperature                            float64
Vertical integral of water vapour                           float64
Convective available potential energy                       float64
Vertical integral of divergence of kinetic energy           float64
Leaf area index, low vegetation                             float64
Leaf area index, high vegetation                            float64
Neutral wind at 10 m u-component                            float64
Neutral wind at 10 m v-component                            float64
Surface pressure                                            float64
Soil temperature level 1                                    float64
10 metre U wind component                                   float64
10 metre V wind component                                   float64
2 metre temperature                                         float64
Soil temperature level 2                        

In [17]:
duplicate_rows = wind_ava_sotavento[wind_ava_sotavento.duplicated()]
print("Número de filas duplicadas: ", duplicate_rows.shape)
null_rows = wind_ava_sotavento[wind_ava_sotavento.isnull().any(axis=1)]
print("Número de filas nulas: ", null_rows.shape)

Número de filas duplicadas:  (0, 24)
Número de filas nulas:  (0, 24)


Definimos el train y test:

In [18]:
import copy

# Dividir la columna 'Datetime' en dos nuevas columnas: 'fecha' y 'año'
wind_ava_sotavento['Month'] = wind_ava_sotavento['Datetime'].dt.month
wind_ava_sotavento['Year'] = wind_ava_sotavento['Datetime'].dt.year

# Eliminar la columna 'Datetime'
wind_ava_sotavento.drop(columns=['Datetime'], inplace=True)
# Ordenar el DataFrame wind_ava_sotavento por 'Datetime'
wind_ava_sotavento.sort_values(by=['Year', 'Month'], inplace=True)
# Crear una copia profunda del DataFrame wind_ava_sotavento
dataset_1 = copy.deepcopy(wind_ava_sotavento)
# Eliminar la columna 'Energy' de dataset_1
del dataset_1['Energy']
# Crear una copia profunda del DataFrame wind_ava_sotavento
dataset_3 = copy.deepcopy(wind_ava_sotavento)
dataset_2 = dataset_3["Energy"]
dataset_1.info()
dataset_2.info()

x_train, x_test, y_train, y_test = train_test_split(dataset_1, dataset_2, test_size=0.33, random_state=42)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4748 entries, 0 to 4747
Data columns (total 24 columns):
 #   Column                                             Non-Null Count  Dtype  
---  ------                                             --------------  -----  
 0   Vertical integral of temperature                   4748 non-null   float64
 1   Vertical integral of water vapour                  4748 non-null   float64
 2   Convective available potential energy              4748 non-null   float64
 3   Vertical integral of divergence of kinetic energy  4748 non-null   float64
 4   Leaf area index, low vegetation                    4748 non-null   float64
 5   Leaf area index, high vegetation                   4748 non-null   float64
 6   Neutral wind at 10 m u-component                   4748 non-null   float64
 7   Neutral wind at 10 m v-component                   4748 non-null   float64
 8   Surface pressure                                   4748 non-null   float64
 9   Soil tem

# Evaluación con hiperparametros por omisión

In [19]:
# Creamos el modelo de regresión lineal
t_inicio = time.time() # Guardamos el tiempo inicial
LR = LinearRegression()
LR.fit(x_train, y_train)  # Se entrena el modelo
accuracy_train_og = LR.score(x_test, y_test)  # Se obtiene el resultado de aciertos
t_fin = time.time() # Guardamos el tiempo final
print('El modelo de regresión lineal con hiperparámetros por omisión ha obtenido una media de resultado de {:.2f}'.format(accuracy_train_og))
# Miramos sus errores
prediction = LR.predict(x_test)
error_rmse_og = sqrt(mean_squared_error(y_test, prediction))
error_mae_og = mean_absolute_error(y_test, prediction)
print('Como errores:\n\tRMSE\t= {}\n\tMAE\t= {}'.format(error_rmse_og, error_mae_og))
t_og = t_fin - t_inicio
print('El tiempo de ejecucion ha sido de {}'.format(t_og))

El modelo de regresión lineal con hiperparámetros por omisión ha obtenido una media de resultado de 0.34
Como errores:
	RMSE	= 541.5647062294927
	MAE	= 420.53952472369485
El tiempo de ejecucion ha sido de 0.01052546501159668


# Hiperparametros

In [20]:
params = {'fit_intercept': [True, False],
          'positive': [True, False]}
# Creamos el modelo de regresión lineal con reducción Ridge
t_inicio = time.time() # Guardamos el tiempo inicial
LR = LinearRegression()
np.random.seed(42)  # Para la toma de decisión entre varios puntos
cv = TimeSeriesSplit(n_splits=3, test_size=(365*2))

LR_cv = GridSearchCV(LR, params, cv=cv)
LR_cv.fit(x_train, y_train)
accuracy_train_lrcv = LR_cv.score(x_test, y_test)  # Se obtiene el resultado de aciertos
t_fin = time.time() # Guardamos el tiempo final
print('El modelo de regresión lineal con regulación Ridge (fit_intercept = {} ; positive = {}) ha obtenido una media de resultado de {:.12f}'.format(LR_cv.best_params_['fit_intercept'], LR_cv.best_params_['positive'], accuracy_train_lrcv))
# Miramos sus errores
prediction = LR_cv.predict(x_test)
error_rmse_lrcv = sqrt(mean_squared_error(y_test, prediction))
error_mae_lrcv = mean_absolute_error(y_test, prediction)
print('Como errores:\n\tRMSE\t= {}\n\tMAE\t= {}'.format(error_rmse_lrcv, error_mae_lrcv))
t_lrcv = t_fin - t_inicio
print('El tiempo de ejecucion ha sido de {}'.format(t_lrcv))

El modelo de regresión lineal con regulación Ridge (fit_intercept = True ; positive = False) ha obtenido una media de resultado de 0.335192380817
Como errores:
	RMSE	= 541.5647062294927
	MAE	= 420.53952472369485
El tiempo de ejecucion ha sido de 0.07566952705383301


  s[P] = solve(AtA[P_ind[:, None], P_ind[None, :]], Atb[P],
  s[P] = solve(AtA[P_ind[:, None], P_ind[None, :]], Atb[P],
  s[P] = solve(AtA[P_ind[:, None], P_ind[None, :]], Atb[P],
  s[P] = solve(AtA[P_ind[:, None], P_ind[None, :]], Atb[P],
  s[P] = solve(AtA[P_ind[:, None], P_ind[None, :]], Atb[P],
  s[P] = solve(AtA[P_ind[:, None], P_ind[None, :]], Atb[P],
  s[P] = solve(AtA[P_ind[:, None], P_ind[None, :]], Atb[P],


In [21]:
dt_lrcv = pd.DataFrame(LR_cv.cv_results_)
dt_lrcv.style

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_fit_intercept,param_positive,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.004243,0.000717,0.002505,0.00071,True,True,"{'fit_intercept': True, 'positive': True}",0.218422,0.21647,0.266613,0.233835,0.023191,3
1,0.004551,0.000778,0.001761,0.000169,True,False,"{'fit_intercept': True, 'positive': False}",0.31027,0.248734,0.37017,0.309725,0.049578,1
2,0.002511,0.000429,0.002009,1.1e-05,False,True,"{'fit_intercept': False, 'positive': True}",0.21162,0.218195,0.268284,0.2327,0.025305,4
3,0.003647,0.000764,0.001399,0.000455,False,False,"{'fit_intercept': False, 'positive': False}",0.310973,0.248696,0.369228,0.309632,0.049216,2


# Regulacion Ridge

In [22]:
# Creamos el modelo de regresión lineal con reducción Ridge
t_inicio = time.time() # Guardamos el tiempo inicial
R = Ridge()
R.fit(x_train, y_train)  # Se entrena el modelo
accuracy_train_r = R.score(x_test, y_test)  # Se obtiene el resultado de aciertos
t_fin = time.time() # Guardamos el tiempo final
print('El modelo de regresión lineal con regulación Ridge ha obtenido una media de resultado de {:.2f}'.format(accuracy_train_r))
# Miramos sus errores
prediction = R.predict(x_test)
error_rmse_r = sqrt(mean_squared_error(y_test, prediction))
error_mae_r = mean_absolute_error(y_test, prediction)
print('Como errores:\n\tRMSE\t= {}\n\tMAE\t= {}'.format(error_rmse_r, error_mae_r))
t_r = t_fin - t_inicio
print('El tiempo de ejecucion ha sido de {}'.format(t_r))

El modelo de regresión lineal con regulación Ridge ha obtenido una media de resultado de 0.30
Como errores:
	RMSE	= 555.300206774558
	MAE	= 432.56248868785514
El tiempo de ejecucion ha sido de 0.004067659378051758


Buscamos entonces el alpha óptimo:

In [23]:
# Creamos 100 ejemplos de alphas que vayan desde 1e-10 hasta 1000
params = {'alpha': np.logspace(-10, 3, 100)}
# Creamos el modelo de regresión lineal con reducción Ridge
t_inicio = time.time() # Guardamos el tiempo inicial
R = Ridge()

cv = TimeSeriesSplit(n_splits=5)

R_cv = GridSearchCV(R, params, cv=cv)
R_cv.fit(x_train, y_train)
accuracy_train_rcv = R_cv.score(x_test, y_test)  # Se obtiene el resultado de aciertos
t_fin = time.time() # Guardamos el tiempo final
print('El modelo de regresión lineal con regulación Ridge (alpha = {}) ha obtenido una media de resultado de {:.2f}'.format(R_cv.best_params_['alpha'], accuracy_train_rcv))
# Miramos sus errores
prediction = R_cv.predict(x_test)
error_rmse_rcv = sqrt(mean_squared_error(y_test, prediction))
error_mae_rcv = mean_absolute_error(y_test, prediction)
print('Como errores:\n\tRMSE\t= {}\n\tMAE\t= {}'.format(error_rmse_rcv, error_mae_rcv))
t_rcv = t_fin - t_inicio
print('El tiempo de ejecucion ha sido de {}'.format(t_rcv))

  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, 

El modelo de regresión lineal con regulación Ridge (alpha = 0.002257019719633926) ha obtenido una media de resultado de 0.34
Como errores:
	RMSE	= 541.6114823221181
	MAE	= 420.59782605318964
El tiempo de ejecucion ha sido de 2.119220495223999


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T


In [24]:
dt_grid = pd.DataFrame(R_cv.cv_results_)
dt_grid.style

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002139,0.000785,0.001742,0.000752,0.0,{'alpha': 1e-10},0.318441,0.317182,0.292701,0.307692,0.352909,0.317785,0.019827,60
1,0.002408,0.000796,0.001602,0.000354,0.0,{'alpha': 1.3530477745798075e-10},0.318441,0.317182,0.292701,0.307692,0.352909,0.317785,0.019827,59
2,0.002692,0.000994,0.0013,0.000366,0.0,{'alpha': 1.8307382802953697e-10},0.318441,0.317182,0.292701,0.307692,0.352909,0.317785,0.019827,58
3,0.00202,6.2e-05,0.001616,0.000589,0.0,{'alpha': 2.477076355991714e-10},0.318441,0.317182,0.292701,0.307692,0.352909,0.317785,0.019827,57
4,0.012694,0.019567,0.002209,0.00086,0.0,{'alpha': 3.351602650938848e-10},0.318441,0.317182,0.292701,0.307692,0.352909,0.317785,0.019827,56
5,0.002381,0.000519,0.001678,0.000528,0.0,{'alpha': 4.5348785081285916e-10},0.318441,0.317182,0.292701,0.307692,0.352909,0.317785,0.019827,55
6,0.00203,0.000615,0.001294,0.000401,0.0,{'alpha': 6.135907273413163e-10},0.318441,0.317182,0.292701,0.307692,0.352909,0.317785,0.019827,54
7,0.002101,0.000706,0.001585,0.000508,0.0,{'alpha': 8.302175681319736e-10},0.318441,0.317182,0.292701,0.307692,0.352909,0.317785,0.019827,53
8,0.00218,0.000476,0.001603,0.0003,0.0,{'alpha': 1.1233240329780266e-09},0.318441,0.317182,0.292701,0.307692,0.352909,0.317785,0.019827,52
9,0.001872,0.000387,0.001397,0.000339,0.0,{'alpha': 1.519911082952933e-09},0.318441,0.317182,0.292701,0.307692,0.352909,0.317785,0.019827,51


Nueva evaluacion, ahora con el alpha generado esperando una mejora:

In [26]:
# Creamos el modelo de regresión lineal con reducción Ridge
t_inicio = time.time() # Guardamos el tiempo inicial
R = Ridge(alpha = 0.002257019719633926)
R.fit(x_train, y_train)  # Se entrena el modelo
accuracy_train_r = R.score(x_test, y_test)  # Se obtiene el resultado de aciertos
t_fin = time.time() # Guardamos el tiempo final
print('El modelo de regresión lineal con regulación Ridge ha obtenido una media de resultado de {:.2f}'.format(accuracy_train_r))
# Miramos sus errores
prediction = R.predict(x_test)
error_rmse_r = sqrt(mean_squared_error(y_test, prediction))
error_mae_r = mean_absolute_error(y_test, prediction)
print('Como errores:\n\tRMSE\t= {}\n\tMAE\t= {}'.format(error_rmse_r, error_mae_r))
t_r = t_fin - t_inicio
print('El tiempo de ejecucion ha sido de {}'.format(t_r))

El modelo de regresión lineal con regulación Ridge ha obtenido una media de resultado de 0.34
Como errores:
	RMSE	= 541.6114823221181
	MAE	= 420.59782605318964
El tiempo de ejecucion ha sido de 0.037305593490600586


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T


Se observa la mejora, pasamos de un 30% a un 34%

# Regulación Lasso

In [27]:
# Creamos el modelo de regresión lineal con reducción Lasso
t_inicio = time.time() # Guardamos el tiempo inicial
L = Lasso()
L.fit(x_train, y_train)  # Se entrena el modelo
accuracy_train_l = L.score(x_test, y_test)  # Se obtiene el resultado de aciertos
t_fin = time.time() # Guardamos el tiempo final
print('El modelo de regresión lineal con regulación Lasso ha obtenido una media de resultado de {:.2f}'.format(accuracy_train_l))
# Miramos sus errores
prediction = L.predict(x_test)
error_rmse_l = sqrt(mean_squared_error(y_test, prediction))
error_mae_l = mean_absolute_error(y_test, prediction)
print('Como errores:\n\tRMSE\t= {}\n\tMAE\t= {}'.format(error_rmse_l, error_mae_l))
t_l = t_fin - t_inicio
print('El tiempo de ejecucion ha sido de {}'.format(t_l))

El modelo de regresión lineal con regulación Lasso ha obtenido una media de resultado de 0.30
Como errores:
	RMSE	= 557.0228247499601
	MAE	= 434.6129491061683
El tiempo de ejecucion ha sido de 0.06743979454040527


In [28]:
# Creamos 100 ejemplos de alphas que vayan desde 1e-10 hasta 1000
params = {'alpha': np.logspace(-10, 3, 100)}
# Creamos el modelo de regresión lineal
t_inicio = time.time() # Guardamos el tiempo inicial
L = Lasso()
np.random.seed(42)  # Para la toma de decisión entre varios puntos
cv = TimeSeriesSplit(n_splits=5)

L_cv = GridSearchCV(L, params, cv=cv)
L_cv.fit(x_train, y_train)  # Se entrena el modelo
accuracy_train_lcv = L_cv.score(x_test, y_test)  # Se obtiene el resultado de aciertos
t_fin = time.time() # Guardamos el tiempo final
print('El modelo de regresión lineal con regulación Lasso (alpha = {}) ha obtenido una media de resultado de {:.2f}'.format(L_cv.best_params_['alpha'], accuracy_train_lcv))
# Miramos sus errores
prediction = L_cv.predict(x_test)
error_rmse_lcv = sqrt(mean_squared_error(y_test, prediction))
error_mae_lcv = mean_absolute_error(y_test, prediction)
print('Como errores:\n\tRMSE\t= {}\n\tMAE\t= {}'.format(error_rmse_lcv, error_mae_lcv))
t_lcv = t_fin - t_inicio
print('El tiempo de ejecucion ha sido de {}'.format(t_lcv))

El modelo de regresión lineal con regulación Lasso (alpha = 0.08497534359086473) ha obtenido una media de resultado de 0.33
Como errores:
	RMSE	= 541.941966811329
	MAE	= 421.31687943791763
El tiempo de ejecucion ha sido de 16.377796411514282


In [29]:
dt_grid = pd.DataFrame(L_cv.cv_results_)
dt_grid.style

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.037856,0.015041,0.001584,0.000449,0.0,{'alpha': 1e-10},0.319878,0.31622,0.293102,0.307941,0.353088,0.318046,0.019794,71
1,0.034146,0.016871,0.001728,0.000408,0.0,{'alpha': 1.3530477745798075e-10},0.319878,0.31622,0.293102,0.307941,0.353088,0.318046,0.019794,70
2,0.034665,0.016348,0.001707,0.000414,0.0,{'alpha': 1.8307382802953697e-10},0.319878,0.31622,0.293102,0.307941,0.353088,0.318046,0.019794,69
3,0.037241,0.02121,0.001919,0.000497,0.0,{'alpha': 2.477076355991714e-10},0.319878,0.31622,0.293102,0.307941,0.353088,0.318046,0.019794,68
4,0.037951,0.016685,0.001909,0.000494,0.0,{'alpha': 3.351602650938848e-10},0.319878,0.31622,0.293102,0.307941,0.353088,0.318046,0.019794,67
5,0.036305,0.019298,0.001617,0.000378,0.0,{'alpha': 4.5348785081285916e-10},0.319878,0.31622,0.293102,0.307941,0.353088,0.318046,0.019794,66
6,0.037714,0.018513,0.001445,0.000483,0.0,{'alpha': 6.135907273413163e-10},0.319878,0.31622,0.293102,0.307941,0.353088,0.318046,0.019794,65
7,0.035097,0.015921,0.001411,0.000552,0.0,{'alpha': 8.302175681319736e-10},0.319878,0.31622,0.293102,0.307941,0.353088,0.318046,0.019794,64
8,0.03239,0.015418,0.001559,0.000467,0.0,{'alpha': 1.1233240329780266e-09},0.319878,0.31622,0.293102,0.307941,0.353088,0.318046,0.019794,63
9,0.032008,0.014555,0.001828,0.000394,0.0,{'alpha': 1.519911082952933e-09},0.319878,0.31622,0.293102,0.307941,0.353088,0.318046,0.019794,62


# Regulacion Elastic net

In [30]:
# Creamos el modelo de regresión lineal con reducción Elastic net
t_inicio = time.time() # Guardamos el tiempo inicial
EN = ElasticNet()
EN.fit(x_train, y_train)  # Se entrena el modelo
accuracy_train_en = EN.score(x_test, y_test)  # Se obtiene el resultado de aciertos
t_fin = time.time() # Guardamos el tiempo final
print('El modelo de regresión lineal con regulación Elastic net ha obtenido una media de resultado de {:.2f}'.format(accuracy_train_en))
# Miramos sus errores
prediction = EN.predict(x_test)
error_rmse_en = sqrt(mean_squared_error(y_test, prediction))
error_mae_en = mean_absolute_error(y_test, prediction)
print('Como errores:\n\tRMSE\t= {}\n\tMAE\t= {}'.format(error_rmse_en, error_mae_en))
t_en = t_fin - t_inicio
print('El tiempo de ejecucion ha sido de {}'.format(t_en))

El modelo de regresión lineal con regulación Elastic net ha obtenido una media de resultado de 0.25
Como errores:
	RMSE	= 574.901142839115
	MAE	= 456.3650447244232
El tiempo de ejecucion ha sido de 0.033965349197387695


In [31]:
# Creamos 100 ejemplos de alphas que vayan desde 1e-10 hasta 10
params = {'alpha': np.logspace(-10, 3, 100),
          'l1_ratio': np.arange(0, 1, 0.05)}
# Creamos el modelo de regresión lineal con reducción Elastic net
t_inicio = time.time() # Guardamos el tiempo inicial
EN = ElasticNet()

cv = TimeSeriesSplit(n_splits=5)

EN_cv = GridSearchCV(EN, params, cv=cv)
EN_cv.fit(x_train, y_train)  # Se entrena el modelo
accuracy_train_encv = EN_cv.score(x_test, y_test)  # Se obtiene el resultado de aciertos
t_fin = time.time() # Guardamos el tiempo final
print('El modelo de regresión lineal con regulación Elastic net (alpha = {} ; l1_ratio = {}) ha obtenido una media de resultado de {:.2f}'.format(EN_cv.best_params_['alpha'], EN_cv.best_params_['l1_ratio'], accuracy_train_encv))
# Miramos sus errores
prediction = EN_cv.predict(x_test)
error_rmse_encv = sqrt(mean_squared_error(y_test, prediction))
error_mae_encv = mean_absolute_error(y_test, prediction)
print('Como errores:\n\tRMSE\t= {}\n\tMAE\t= {}'.format(error_rmse_encv, error_mae_encv))
t_encv = t_fin - t_inicio
print('El tiempo de ejecucion ha sido de {}'.format(t_encv))

El modelo de regresión lineal con regulación Elastic net (alpha = 7.220809018385471e-06 ; l1_ratio = 0.65) ha obtenido una media de resultado de 0.33
Como errores:
	RMSE	= 541.879579155234
	MAE	= 420.9355947919723
El tiempo de ejecucion ha sido de 308.89621686935425


In [32]:
# Creamos el modelo de regresión lineal con reducción Elastic net
t_inicio = time.time() # Guardamos el tiempo inicial
EN = ElasticNet(alpha=7.220809018385471e-06, l1_ratio=0.65)
np.random.seed(42)  # Para la toma de decisión entre varios puntos

EN.fit(x_train, y_train)  # Se entrena el modelo
accuracy_train_encv = EN.score(x_test, y_test)  # Se obtiene el resultado de aciertos
t_fin = time.time() # Guardamos el tiempo final
print('El modelo de regresión lineal con regulación Elastic net ha obtenido una media de resultado de {:.12f}'.format(accuracy_train_encv))
# Miramos sus errores
prediction = EN.predict(x_test)
error_rmse_encv = sqrt(mean_squared_error(y_test, prediction))
error_mae_encv = mean_absolute_error(y_test, prediction)
print('Como errores:\n\tRMSE\t= {}\n\tMAE\t= {}'.format(error_rmse_encv, error_mae_encv))
t_encv = t_fin - t_inicio
print('El tiempo de ejecucion ha sido de {}'.format(t_encv))

El modelo de regresión lineal con regulación Elastic net ha obtenido una media de resultado de 0.334419100094
Como errores:
	RMSE	= 541.879579155234
	MAE	= 420.9355947919723
El tiempo de ejecucion ha sido de 0.10938096046447754


# Conclusion

In [35]:
# Creamos el modelo de regresión lineal con reducción Ridge
t_inicio = time.time() # Guardamos el tiempo inicial
R = Ridge(alpha = 0.002257019719633926)
R.fit(x_train, y_train)  # Se entrena el modelo
accuracy_train_final = R.score(x_test, y_test)  # Se obtiene el resultado de aciertos
t_fin = time.time() # Guardamos el tiempo final
print('El modelo de regresión lineal con regulación Ridge ha obtenido una media de resultado de {:.2f}'.format(accuracy_train_r))
# Miramos sus errores
prediction = R.predict(x_test)
error_rmse_final = sqrt(mean_squared_error(y_test, prediction))
error_mae_final = mean_absolute_error(y_test, prediction)
print('Como errores:\n\tRMSE\t= {}\n\tMAE\t= {}'.format(error_rmse_r, error_mae_r))
t_final = t_fin - t_inicio
print('El tiempo de ejecucion ha sido de {}'.format(t_r))

El modelo de regresión lineal con regulación Ridge ha obtenido una media de resultado de 0.34
Como errores:
	RMSE	= 541.6114823221181
	MAE	= 420.59782605318964
El tiempo de ejecucion ha sido de 0.037305593490600586


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T


In [36]:
print('El modelo de regresión lineal con hiperparámetros por omisión ha obtenido una media de resultado de {:.12f}'.format(accuracy_train_og))
print('Como errores:\n\tRMSE\t= {}\n\tMAE\t= {}'.format(error_rmse_og, error_mae_og))
print('El tiempo de ejecucion con hiperparámetros por omisión ha sido de {}'.format(t_og))
print("")
print('El modelo de regresión lineal con regulación Ridge ha obtenido una media de resultado de {:.12f}'.format(accuracy_train_final))
print('Como errores:\n\tRMSE\t= {}\n\tMAE\t= {}'.format(error_rmse_final, error_mae_final))
print('El tiempo de ejecucion con hiperparámetros por omisión ha sido de {}'.format(t_final))

El modelo de regresión lineal con hiperparámetros por omisión ha obtenido una media de resultado de 0.335192380817
Como errores:
	RMSE	= 541.5647062294927
	MAE	= 420.53952472369485
El tiempo de ejecucion con hiperparámetros por omisión ha sido de 0.01052546501159668

El modelo de regresión lineal con regulación Ridge ha obtenido una media de resultado de 0.335077534168
Como errores:
	RMSE	= 541.6114823221181
	MAE	= 420.59782605318964
El tiempo de ejecucion con hiperparámetros por omisión ha sido de 0.011455059051513672
