# LTSM SIMPLE



## Importación de librerías y datos

Por medio de nuestra libería ESIOS_contoller.py importamos nuestro último dataset de datos y lo parseamos para su uso. Sirve tanto como para Drive como jupiter.

In [2]:
import json, urllib, datetime, pickle, time
import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt
from sklearn.model_selection import *
from sklearn.preprocessing import *
from sklearn.metrics import *
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from scipy.stats import *
from importlib.machinery import SourceFileLoader


try:
  from google.colab import drive
  drive.mount('/content/drive')
  path = '/content/drive/My Drive/TFM/Utils/ESIOS_contoller.py'
  in_colab = True
except:
  path = '../utils/ESIOS_contoller.py'
  in_colab = False
  

esios_assembler = SourceFileLoader('esios', path).load_module()

esios_controller = esios_assembler.ESIOS(in_colab)
data_consumo = esios_controller.get_data('non-secuencial')

Using TensorFlow backend.


Mostrando los datos de data_total_for_non_serial.csv
(30555, 33)
________________________________________________________________________________


## Preparación de los datos

In [0]:
x_data = esios_controller.get_data_real_time()
y_data = esios_controller.get_target_data()

# Split the data
x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size=0.33, shuffle=False)

#Reshape for the LSTM
x_train = x_train.to_numpy()
x_valid = x_valid.to_numpy()
y_train = y_train.to_numpy()
y_valid = y_valid.to_numpy()

print('Xtrain_dim:', x_train.shape)
print('Ytrain_dim:', y_train.shape)

Xtrain_dim: (20471, 23)
Ytrain_dim: (20471, 1)


##Modelo

In [0]:
from sklearn.tree import DecisionTreeRegressor

regressor = DecisionTreeRegressor(random_state=0)
regressor.fit(x_train, y_train)

DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
                      max_leaf_nodes=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      presort=False, random_state=0, splitter='best')

In [0]:
from sklearn.model_selection import cross_val_score

cross_val_score(regressor, x_valid, y_valid, cv=10)

array([0.78775875, 0.52461888, 0.6820827 , 0.72300306, 0.59406513,
       0.80706985, 0.73176272, 0.76880528, 0.7622391 , 0.5381068 ])

In [0]:
print('R2 en entrenamiento es: ', regressor.score(x_train, y_train))
print('R2 en validación es: ', regressor.score(x_valid, y_valid))

print('MAE: ', mean_absolute_error(regressor.predict(x_valid), y_valid))
print('MSE: ', mean_squared_error(regressor.predict(x_valid), y_valid))
#print('RMSE: ', mean_squared_log_error(regressor.predict(x_valid), y_valid))
print('Variance: ', explained_variance_score(regressor.predict(x_valid), y_valid))
print('R2: ', r2_score(regressor.predict(x_valid), y_valid))
#print(regressor.coef_)

R2 en entrenamiento es:  1.0
R2 en validación es:  0.8140189333930462
MAE:  3.5193722729075763
MSE:  23.547518117810395
Variance:  0.8228015232487185
R2:  0.8218263409334737


## Normalizados

In [0]:
scaler = StandardScaler()
x_train_est = scaler.fit_transform(x_train)
y_train_est = scaler.fit_transform(y_train)
x_valid_est = scaler.fit_transform(x_valid)
y_valid_est = scaler.fit_transform(y_valid)

In [0]:
regressor_standarized = DecisionTreeRegressor(random_state=0)
regressor_standarized.fit(x_train_est, y_train_est)

DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
                      max_leaf_nodes=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      presort=False, random_state=0, splitter='best')

In [0]:
print('R2 en entrenamiento es: ', regressor_standarized.score(x_train_est, y_train_est))
print('R2 en validación es: ', regressor_standarized.score(x_valid_est, y_valid_est))

print('MAE: ', mean_absolute_error(regressor_standarized.predict(x_valid_est), y_valid_est))
print('MSE: ', mean_squared_error(regressor_standarized.predict(x_valid_est), y_valid_est))
#print('RMSE: ', mean_squared_log_error(regressor_standarized.predict(x_valid_est), y_valid_est))
print('Variance: ', explained_variance_score(regressor_standarized.predict(x_valid_est), y_valid_est))
print('R2: ', r2_score(regressor_standarized.predict(x_valid_est), y_valid_est))

R2 en entrenamiento es:  0.9999999971664847
R2 en validación es:  0.8564415424845535
MAE:  0.2715218174119526
MSE:  0.14355845751544644
Variance:  0.8473509162286471
R2:  0.8472343601302845


## Optimización de modelos

In [0]:
param_grid = {"criterion": ["mse", "mae"],
              "min_samples_split": [10, 20, 40],
              "max_depth": [2, 6, 8],
              "min_samples_leaf": [20, 40, 100],
              "max_leaf_nodes": [5, 20, 100],
              }

clf = GridSearchCV(regressor, param_grid, cv=5)
clf.fit(x_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=DecisionTreeRegressor(criterion='mse', max_depth=None,
                                             max_features=None,
                                             max_leaf_nodes=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             presort=False, random_state=0,
                                             splitter='best'),
             iid='warn', n_jobs=None,
             param_grid={'criterion': ['mse', 'mae'], 'max_depth': [2, 6, 8],
                         'max_leaf_nodes': [5, 20, 100],
                         'min_samples_leaf': [20, 40, 100],
                    

In [0]:
print(clf.best_params_)


{'criterion': 'mse', 'max_depth': 8, 'max_leaf_nodes': 100, 'min_samples_leaf': 40, 'min_samples_split': 10}


## Try best model

In [0]:
regressor = DecisionTreeRegressor(criterion='mse', max_depth= 8, max_leaf_nodes= 200, min_samples_leaf= 40, min_samples_split= 2)
model_fit = regressor.fit(x_train, y_train)

In [0]:
from sklearn.model_selection import cross_val_score

cross_val_score(regressor, x_valid, y_valid, cv=10)

array([0.89486021, 0.72979586, 0.80117989, 0.81741218, 0.76559692,
       0.88774078, 0.83484429, 0.86445979, 0.83767378, 0.71632333])

In [0]:
print('R2 en entrenamiento es: ', regressor.score(x_train, y_train))
print('R2 en validación es: ', regressor.score(x_valid, y_valid))

print('MAE: ', mean_absolute_error(regressor.predict(x_valid), y_valid))
print('MSE: ', mean_squared_error(regressor.predict(x_valid), y_valid))
#print('RMSE: ', mean_squared_log_error(regressor.predict(x_valid), y_valid))
print('Variance: ', explained_variance_score(regressor.predict(x_valid), y_valid))
print('R2: ', r2_score(regressor.predict(x_valid), y_valid))
#print(regressor.coef_)

R2 en entrenamiento es:  0.949767732603002
R2 en validación es:  0.9163412188583834
MAE:  2.3705676905981328
MSE:  10.592243073911355
Variance:  0.9072843005214764
R2:  0.905683241767359
