In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import keras
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler #scaling de los datos entre 0 y 1
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import make_scorer
from sklearn.metrics import r2_score
from os import listdir
from os.path import isfile, join
plt.style.use('seaborn')
plt.style.use('matplotlibrc.py')

Using TensorFlow backend.


In [2]:
  dic_localidades = {
        'RiesgoBariloche':'Bariloche',
        'RiesgoBuenosAires':'Buenos Aires',
        'RiesgoCABACABANA':'CABA',
        'RiesgoChacoNA':'Chaco',
        'RiesgoCórdobaCórdoba':'Córdoba',
        'RiesgoEntreRiosRíos':'Entre Ríos',
        'RiesgoJujuyJujuy':'Jujuy',
        'RiesgoLaRiojaRioja':'La Rioja',
        'RiesgoMendozaMendoza':'Mendoza',
        'RiesgoNeuquénNeuquén':'Neuquén',
        'RiesgoRioNegro':'Río Negro',
        'RiesgoSaltaSalta':'Salta',
        'RiesgoSantaCruzSantaCruz':'Santa Cruz',
        'RiesgoSantaFeSantaFe':'Santa Fe',
        'RiesgoTierradelFuegoTierradel':'Tierra del Fuego',
        'RiesgoTucumanTucuman':'Tucumán'
    }

In [3]:
def split_sequence(sequence, n_steps_in, n_steps_out):
	X, y = list(), list()
	for i in range(len(sequence)):
		end_ix = i + n_steps_in
		out_end_ix = end_ix + n_steps_out
		if out_end_ix > len(sequence):
			break
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return np.array(X), np.array(y)

In [4]:
def graph_predictions(l,df_original,y_train_pr,y_test_pr,name):
    plt.plot(df_original) 
    months_tr = np.arange(l,len(y_train_pr)+l) #meses de training
    months_ts = np.arange(len(y_train_pr)+(2*l)+1,len(df_original)-1) #meses de testing
    plt.plot(months_tr,y_train_pr,label='train') #grafico de train results
    plt.plot(months_ts,y_test_pr,label='test') #grafico de test results
    plt.title(dic_localidades[str(name)])
    plt.xlabel('Días')
    plt.ylabel('Casos/100 mil hab')
    plt.legend(fontsize=12)
    plt.savefig(dic_localidades[str(name)]+'_fit_2.pdf')
    plt.show()

In [5]:
seed = 7
np.random.seed(seed)

In [6]:
mypath = 'Datos'
files = [f for f in listdir(mypath) if isfile(join(mypath, f))] #get all file names of that path
df_train_total = [] #aca guardamos todos los datos

In [7]:
scaler = MinMaxScaler(feature_range=(0, 1))
n_steps_in = 7
n_steps_out = 7

# armo scaler para los datos

In [8]:
for file in files:
    print(file)
    data = pd.read_csv(mypath+str('/')+file,sep=",",quotechar='"',na_values=[''])
    data = data["incidenciaAcum14d"]
    df = pd.DataFrame(data)
    df = df.to_numpy()
    df = df[:-20]
    total_size = df.shape[0]
    train_size = int(0.8*total_size)
    test_size = total_size - train_size
    train_data = df[:-test_size]
    df_train_total.extend(list(train_data.flatten()))

RiesgoBariloche
RiesgoBuenosAires
RiesgoCABACABANA
RiesgoChacoNA
RiesgoCórdobaCórdoba
RiesgoEntreRiosRíos
RiesgoJujuyJujuy
RiesgoLaRiojaRioja
RiesgoMendozaMendoza
RiesgoNeuquénNeuquén
RiesgoRioNegro
RiesgoSaltaSalta
RiesgoSantaCruzSantaCruz
RiesgoSantaFeSantaFe
RiesgoTierradelFuegoTierradel
RiesgoTucumanTucuman


In [9]:
scaler = MinMaxScaler(feature_range=(0, 1))
df_train_total = scaler.fit_transform(np.array(df_train_total).reshape(-1,1))

# ahora armo los datos de train, test y forecast

In [10]:
df_forecast = [] #aca van a estar los datos para hacer el forecasting

In [11]:
for file in files:
    print(file)
    data = pd.read_csv(mypath+str('/')+file,sep=",",quotechar='"',na_values=[''])
    data = data["incidenciaAcum14d"]
    df = pd.DataFrame(data)
    df = df.to_numpy()
    df_forecast.append(df[-20:-10])
    df = df[:-20]
    total_size = df.shape[0]
    train_size = int(0.8*total_size)
    test_size = total_size - train_size
    train_data = df[:-test_size]
    test_data = df[-test_size:]
    train_data = scaler.transform(train_data.reshape(-1,1))
    test_data = scaler.transform(test_data.reshape(-1,1))
    if file == 'RiesgoBariloche':
        x_train_total, y_train_total = split_sequence(train_data, n_steps_in, n_steps_out)
        x_test_total, y_test_total = split_sequence(test_data, n_steps_in, n_steps_out)
    else:
        #train
        x_train, y_train = split_sequence(train_data, n_steps_in, n_steps_out)
        x_train_total = np.vstack((x_train_total,x_train))
        y_train_total = np.vstack((y_train_total,y_train))
        #test
        x_test, y_test = split_sequence(test_data, n_steps_in, n_steps_out)
        x_test_total = np.vstack((x_test_total,x_test))
        y_test_total = np.vstack((y_test_total,y_test))
            

RiesgoBariloche
RiesgoBuenosAires
RiesgoCABACABANA
RiesgoChacoNA
RiesgoCórdobaCórdoba
RiesgoEntreRiosRíos
RiesgoJujuyJujuy
RiesgoLaRiojaRioja
RiesgoMendozaMendoza
RiesgoNeuquénNeuquén
RiesgoRioNegro
RiesgoSaltaSalta
RiesgoSantaCruzSantaCruz
RiesgoSantaFeSantaFe
RiesgoTierradelFuegoTierradel
RiesgoTucumanTucuman


In [12]:
x_train_total = x_train_total.reshape(x_train_total.shape[0],1,x_train_total.shape[1])
x_test_total = x_test_total.reshape(x_test_total.shape[0],1,x_test_total.shape[1])
y_train_total =  y_train_total.reshape(y_train_total.shape[0],y_train_total.shape[1])
y_test_total = y_test_total.reshape(y_test_total.shape[0],y_test_total.shape[1])

In [None]:
model = keras.Sequential()
model.add(keras.layers.LSTM(units=128,activation='relu',return_sequences=True,input_shape=(1,n_steps_in)))
model.add(keras.layers.LSTM(units=128,activation='relu'))
model.add(keras.layers.Dense(units=n_steps_out))
model.compile(optimizer='adam',loss=keras.losses.MSE,metrics=['mse']) 
model.summary()
history = model.fit(x_train_total, y_train_total,epochs=400,batch_size=128,validation_data=(x_test_total,y_test_total),verbose=2) 

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 1, 128)            69632     
_________________________________________________________________
lstm_2 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dense_1 (Dense)              (None, 7)                 903       
Total params: 202,119
Trainable params: 202,119
Non-trainable params: 0
_________________________________________________________________
Train on 3334 samples, validate on 686 samples
Epoch 1/400
 - 1s - loss: 0.0411 - mse: 0.0411 - val_loss: 0.6628 - val_mse: 0.6628
Epoch 2/400
 - 0s - loss: 0.0162 - mse: 0.0162 - val_loss: 3.6861 - val_mse: 3.6861
Epoch 3/400
 - 0s - loss: 0.0044 - mse: 0.0044 - val_loss: 3.0585 - val_mse: 3.0585
Epoch 4/400
 - 0s - loss: 0.0029 - mse: 0.0029 - val_loss: 1.2310 - val_mse: 1.

In [None]:
plt.plot(history.history['loss'],label='train')
plt.plot(history.history['val_loss'],label='test')
plt.xlabel('Épocas')
plt.ylabel('Loss')
plt.legend(fontsize=12)

In [None]:
y_train_pr = model.predict(x_train_total)
y_test_pr = model.predict(x_test_total)
y_train_pr = scaler.inverse_transform(y_train_pr)
y_test_pr = scaler.inverse_transform(y_test_pr)
y_train =  scaler.inverse_transform(y_train_total)
y_test = scaler.inverse_transform(y_test_total)
print('train mse squared:',mean_squared_error(y_train_total,y_train)) 
print('test mse squared:',mean_squared_error(y_test_total,y_test)) 

# veo como se ajustan a los train y test data para distintas localidades

In [None]:
mae = tf.keras.losses.MeanAbsolutePercentageError()

In [None]:
for file in files:
    print(file)
    data = pd.read_csv(mypath+str('/')+file,sep=",",quotechar='"',na_values=[''])
    data = data["incidenciaAcum14d"]
    df = pd.DataFrame(data)
    df = df.to_numpy()
    df = df[:-10]
    y_forecast = np.copy(df[-n_steps_in:])
    df = df[:-n_steps_in]
    x_toforecast = np.copy(df[-n_steps_in:])
    x_toforecast = scaler.transform(x_toforecast.reshape(-1,1))
    x_toforecast = x_toforecast.flatten()
    x_toforecast = x_toforecast.reshape(1,1,n_steps_in)
    y_forecasted = model.predict(x_toforecast)
    y_forecasted = scaler.inverse_transform(y_forecasted)
    plt.title(dic_localidades[str(file)])
    plt.plot(y_forecasted.flatten(),'o',label='forecasted')
    plt.plot(y_forecast,label='true data')
    plt.legend()
    plt.show()