In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import keras
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler #scaling de los datos entre 0 y 1
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import make_scorer
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from os import listdir
from os.path import isfile, join
plt.style.use('seaborn')
plt.style.use('matplotlibrc.py')

Using TensorFlow backend.


In [2]:
  dic_localidades = {
        'RiesgoBariloche':'Bariloche',
        'RiesgoBuenosAires':'Buenos Aires',
        'RiesgoCABACABANA':'CABA',
        'RiesgoChacoNA':'Chaco',
        'RiesgoCórdobaCórdoba':'Córdoba',
        'RiesgoEntreRiosRíos':'Entre Ríos',
        'RiesgoJujuyJujuy':'Jujuy',
        'RiesgoLaRiojaRioja':'La Rioja',
        'RiesgoMendozaMendoza':'Mendoza',
        'RiesgoNeuquénNeuquén':'Neuquén',
        'RiesgoRioNegro':'Río Negro',
        'RiesgoSaltaSalta':'Salta',
        'RiesgoSantaCruzSantaCruz':'Santa Cruz',
        'RiesgoSantaFeSantaFe':'Santa Fe',
        'RiesgoTierradelFuegoTierradel':'Tierra del Fuego',
        'RiesgoTucumanTucuman':'Tucumán'
    }

In [3]:
def split_sequence(sequence, n_steps_in, n_steps_out):
	X, y = list(), list()
	for i in range(len(sequence)):
		end_ix = i + n_steps_in
		out_end_ix = end_ix + n_steps_out
		if out_end_ix > len(sequence):
			break
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return np.array(X), np.array(y)

In [4]:
def graph_predictions(l,df_original,y_train_pr,y_test_pr,name):
    plt.plot(df_original) 
    months_tr = np.arange(l,len(y_train_pr)+l) #meses de training
    months_ts = np.arange(len(y_train_pr)+(2*l)+1,len(df_original)-1) #meses de testing
    plt.plot(months_tr,y_train_pr,label='train') #grafico de train results
    plt.plot(months_ts,y_test_pr,label='test') #grafico de test results
    plt.title(dic_localidades[str(name)])
    plt.xlabel('Días')
    plt.ylabel('Casos/100 mil hab')
    plt.legend(fontsize=12)
    plt.savefig(dic_localidades[str(name)]+'_fit_2.pdf')
    plt.show()

In [5]:
seed = 7
np.random.seed(seed)

In [6]:
mypath = 'Datos'
files = [f for f in listdir(mypath) if isfile(join(mypath, f))] #get all file names of that path
df_train_total = [] #aca guardamos todos los datos

In [7]:
scaler = MinMaxScaler(feature_range=(0, 1))
n_steps_in = 10
n_steps_out = 10

# armo scaler para los datos

In [8]:
for file in files:
    print(file)
    data = pd.read_csv(mypath+str('/')+file,sep=",",quotechar='"',na_values=[''])
    data = data["incidenciaAcum14d"]
    df = pd.DataFrame(data)
    df = df.to_numpy()
    df = df[:-20]
    total_size = df.shape[0]
    train_size = int(0.8*total_size)
    test_size = total_size - train_size
    train_data = df[:-test_size]
    df_train_total.extend(list(train_data.flatten()))

RiesgoBariloche
RiesgoBuenosAires
RiesgoCABACABANA
RiesgoChacoNA
RiesgoCórdobaCórdoba
RiesgoEntreRiosRíos
RiesgoJujuyJujuy
RiesgoLaRiojaRioja
RiesgoMendozaMendoza
RiesgoNeuquénNeuquén
RiesgoRioNegro
RiesgoSaltaSalta
RiesgoSantaCruzSantaCruz
RiesgoSantaFeSantaFe
RiesgoTierradelFuegoTierradel
RiesgoTucumanTucuman


In [9]:
scaler = MinMaxScaler(feature_range=(0, 1))
df_train_total = scaler.fit_transform(np.array(df_train_total).reshape(-1,1))

# ahora armo los datos de train, test y forecast

In [10]:
df_forecast = [] #aca van a estar los datos para hacer el forecasting

In [11]:
for file in files:
    print(file)
    data = pd.read_csv(mypath+str('/')+file,sep=",",quotechar='"',na_values=[''])
    data = data["incidenciaAcum14d"]
    df = pd.DataFrame(data)
    df = df.to_numpy()
    df_forecast.append(df[-20:-10])
    df = df[:-20]
    total_size = df.shape[0]
    train_size = int(0.8*total_size)
    test_size = total_size - train_size
    train_data = df[:-test_size]
    test_data = df[-test_size:]
    train_data = scaler.transform(train_data.reshape(-1,1))
    test_data = scaler.transform(test_data.reshape(-1,1))
    if file == 'RiesgoBariloche':
        x_train_total, y_train_total = split_sequence(train_data, n_steps_in, n_steps_out)
        x_test_total, y_test_total = split_sequence(test_data, n_steps_in, n_steps_out)
    else:
        #train
        x_train, y_train = split_sequence(train_data, n_steps_in, n_steps_out)
        x_train_total = np.vstack((x_train_total,x_train))
        y_train_total = np.vstack((y_train_total,y_train))
        #test
        x_test, y_test = split_sequence(test_data, n_steps_in, n_steps_out)
        x_test_total = np.vstack((x_test_total,x_test))
        y_test_total = np.vstack((y_test_total,y_test))
            

RiesgoBariloche
RiesgoBuenosAires
RiesgoCABACABANA
RiesgoChacoNA
RiesgoCórdobaCórdoba
RiesgoEntreRiosRíos
RiesgoJujuyJujuy
RiesgoLaRiojaRioja
RiesgoMendozaMendoza
RiesgoNeuquénNeuquén
RiesgoRioNegro
RiesgoSaltaSalta
RiesgoSantaCruzSantaCruz
RiesgoSantaFeSantaFe
RiesgoTierradelFuegoTierradel
RiesgoTucumanTucuman


In [12]:
x_train_total = x_train_total.reshape(x_train_total.shape[0],1,x_train_total.shape[1])
x_test_total = x_test_total.reshape(x_test_total.shape[0],1,x_test_total.shape[1])
y_train_total =  y_train_total.reshape(y_train_total.shape[0],y_train_total.shape[1])
y_test_total = y_test_total.reshape(y_test_total.shape[0],y_test_total.shape[1])

In [None]:
model = keras.Sequential()
model.add(keras.layers.LSTM(units=256,activation='relu',return_sequences=True,input_shape=(1,n_steps_in)))
model.add(keras.layers.LSTM(units=256,activation='relu',return_sequences=False))
model.add(keras.layers.Dense(units=n_steps_out))
model.compile(optimizer='adam',loss=keras.losses.MSE,metrics=['mse']) 
model.summary()
history = model.fit(x_train_total, y_train_total,epochs=500,batch_size=512,validation_data=(x_test_total,y_test_total),verbose=2) 

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 1, 256)            273408    
_________________________________________________________________
lstm_2 (LSTM)                (None, 256)               525312    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                2570      
Total params: 801,290
Trainable params: 801,290
Non-trainable params: 0
_________________________________________________________________
Train on 3238 samples, validate on 590 samples
Epoch 1/500
 - 1s - loss: 0.0364 - mse: 0.0364 - val_loss: 0.2652 - val_mse: 0.2652
Epoch 2/500
 - 0s - loss: 0.0095 - mse: 0.0095 - val_loss: 0.7894 - val_mse: 0.7894
Epoch 3/500
 - 0s - loss: 0.0046 - mse: 0.0046 - val_loss: 0.4396 - val_mse: 0.4396
Epoch 4/500
 - 0s - loss: 0.0032 - mse: 0.0032 - val_loss: 0.0607 - val_mse: 0.

Epoch 83/500
 - 0s - loss: 5.1690e-04 - mse: 5.1690e-04 - val_loss: 0.0991 - val_mse: 0.0991
Epoch 84/500
 - 0s - loss: 5.2806e-04 - mse: 5.2806e-04 - val_loss: 0.1064 - val_mse: 0.1064
Epoch 85/500
 - 0s - loss: 5.2289e-04 - mse: 5.2289e-04 - val_loss: 0.0884 - val_mse: 0.0884
Epoch 86/500
 - 0s - loss: 5.4930e-04 - mse: 5.4930e-04 - val_loss: 0.1095 - val_mse: 0.1095
Epoch 87/500
 - 0s - loss: 5.0878e-04 - mse: 5.0878e-04 - val_loss: 0.1161 - val_mse: 0.1161
Epoch 88/500
 - 0s - loss: 5.1016e-04 - mse: 5.1016e-04 - val_loss: 0.1098 - val_mse: 0.1098
Epoch 89/500
 - 0s - loss: 5.6591e-04 - mse: 5.6591e-04 - val_loss: 0.1021 - val_mse: 0.1021
Epoch 90/500
 - 0s - loss: 5.4515e-04 - mse: 5.4515e-04 - val_loss: 0.1054 - val_mse: 0.1054
Epoch 91/500
 - 0s - loss: 5.1615e-04 - mse: 5.1615e-04 - val_loss: 0.1068 - val_mse: 0.1068
Epoch 92/500
 - 0s - loss: 5.4181e-04 - mse: 5.4181e-04 - val_loss: 0.0928 - val_mse: 0.0928
Epoch 93/500
 - 0s - loss: 5.9299e-04 - mse: 5.9299e-04 - val_loss: 0.

Epoch 171/500
 - 0s - loss: 4.5067e-04 - mse: 4.5067e-04 - val_loss: 0.0951 - val_mse: 0.0951
Epoch 172/500
 - 0s - loss: 4.5425e-04 - mse: 4.5425e-04 - val_loss: 0.0969 - val_mse: 0.0969
Epoch 173/500
 - 0s - loss: 4.4748e-04 - mse: 4.4748e-04 - val_loss: 0.0942 - val_mse: 0.0942
Epoch 174/500
 - 0s - loss: 4.4549e-04 - mse: 4.4549e-04 - val_loss: 0.0891 - val_mse: 0.0891
Epoch 175/500
 - 0s - loss: 4.4702e-04 - mse: 4.4702e-04 - val_loss: 0.0951 - val_mse: 0.0951
Epoch 176/500
 - 0s - loss: 4.4928e-04 - mse: 4.4928e-04 - val_loss: 0.0814 - val_mse: 0.0814
Epoch 177/500
 - 0s - loss: 4.3453e-04 - mse: 4.3453e-04 - val_loss: 0.1035 - val_mse: 0.1035
Epoch 178/500
 - 0s - loss: 4.7962e-04 - mse: 4.7962e-04 - val_loss: 0.0931 - val_mse: 0.0931
Epoch 179/500
 - 0s - loss: 4.2728e-04 - mse: 4.2728e-04 - val_loss: 0.0948 - val_mse: 0.0948
Epoch 180/500
 - 0s - loss: 4.6104e-04 - mse: 4.6104e-04 - val_loss: 0.1009 - val_mse: 0.1009
Epoch 181/500
 - 0s - loss: 4.7148e-04 - mse: 4.7148e-04 - v

Epoch 259/500
 - 0s - loss: 4.0860e-04 - mse: 4.0860e-04 - val_loss: 0.0833 - val_mse: 0.0833
Epoch 260/500
 - 0s - loss: 4.1082e-04 - mse: 4.1082e-04 - val_loss: 0.1034 - val_mse: 0.1034
Epoch 261/500
 - 0s - loss: 4.0391e-04 - mse: 4.0391e-04 - val_loss: 0.0896 - val_mse: 0.0896
Epoch 262/500
 - 0s - loss: 3.9781e-04 - mse: 3.9781e-04 - val_loss: 0.0974 - val_mse: 0.0974
Epoch 263/500
 - 0s - loss: 3.8636e-04 - mse: 3.8636e-04 - val_loss: 0.0974 - val_mse: 0.0974
Epoch 264/500
 - 0s - loss: 3.9264e-04 - mse: 3.9264e-04 - val_loss: 0.0961 - val_mse: 0.0961
Epoch 265/500
 - 0s - loss: 3.8519e-04 - mse: 3.8519e-04 - val_loss: 0.0925 - val_mse: 0.0925
Epoch 266/500
 - 0s - loss: 4.2029e-04 - mse: 4.2028e-04 - val_loss: 0.1040 - val_mse: 0.1040
Epoch 267/500
 - 0s - loss: 4.6344e-04 - mse: 4.6344e-04 - val_loss: 0.1063 - val_mse: 0.1063
Epoch 268/500
 - 0s - loss: 4.5244e-04 - mse: 4.5244e-04 - val_loss: 0.0957 - val_mse: 0.0957
Epoch 269/500
 - 0s - loss: 4.0682e-04 - mse: 4.0682e-04 - v

Epoch 347/500
 - 0s - loss: 3.8024e-04 - mse: 3.8024e-04 - val_loss: 0.1102 - val_mse: 0.1102
Epoch 348/500
 - 0s - loss: 3.7633e-04 - mse: 3.7633e-04 - val_loss: 0.1117 - val_mse: 0.1117
Epoch 349/500
 - 0s - loss: 3.8976e-04 - mse: 3.8976e-04 - val_loss: 0.1077 - val_mse: 0.1077
Epoch 350/500
 - 0s - loss: 3.7248e-04 - mse: 3.7248e-04 - val_loss: 0.1055 - val_mse: 0.1055
Epoch 351/500
 - 0s - loss: 3.8888e-04 - mse: 3.8888e-04 - val_loss: 0.1069 - val_mse: 0.1069
Epoch 352/500
 - 0s - loss: 4.0610e-04 - mse: 4.0610e-04 - val_loss: 0.1199 - val_mse: 0.1199
Epoch 353/500
 - 0s - loss: 4.5160e-04 - mse: 4.5160e-04 - val_loss: 0.1113 - val_mse: 0.1113
Epoch 354/500
 - 0s - loss: 4.5420e-04 - mse: 4.5420e-04 - val_loss: 0.0930 - val_mse: 0.0930
Epoch 355/500
 - 0s - loss: 3.8983e-04 - mse: 3.8983e-04 - val_loss: 0.1059 - val_mse: 0.1059
Epoch 356/500
 - 0s - loss: 3.7992e-04 - mse: 3.7992e-04 - val_loss: 0.1082 - val_mse: 0.1082
Epoch 357/500
 - 0s - loss: 3.7398e-04 - mse: 3.7398e-04 - v

In [None]:
plt.plot(history.history['loss'],label='train')
plt.plot(history.history['val_loss'],label='test')
plt.xlabel('Épocas')
plt.ylabel('Loss')
plt.legend(fontsize=12)

In [None]:
y_train_pr = model.predict(x_train_total)
y_test_pr = model.predict(x_test_total)
y_train_pr = scaler.inverse_transform(y_train_pr)
y_test_pr = scaler.inverse_transform(y_test_pr)
y_train =  scaler.inverse_transform(y_train_total)
y_test = scaler.inverse_transform(y_test_total)
print('train mse squared:',mean_squared_error(y_train_total,y_train)) 
print('test mse squared:',mean_squared_error(y_test_total,y_test)) 

# veo como se ajustan a los train y test data para distintas localidades

In [None]:
for file in files:
    print(file)
    data = pd.read_csv(mypath+str('/')+file,sep=",",quotechar='"',na_values=[''])
    data = data["incidenciaAcum14d"]
    df = pd.DataFrame(data)
    df = df.to_numpy()
    df = df[:-10]
    y_forecast = np.copy(df[-n_steps_in:])
    df = df[:-n_steps_in]
    x_toforecast = np.copy(df[-n_steps_in:])
    x_toforecast = scaler.transform(x_toforecast.reshape(-1,1))
    x_toforecast = x_toforecast.flatten()
    x_toforecast = x_toforecast.reshape(1,1,n_steps_in)
    y_forecasted = model.predict(x_toforecast)
    y_forecasted = scaler.inverse_transform(y_forecasted)
    plt.title(dic_localidades[str(file)])
    plt.plot(y_forecasted.flatten(),'o',label='forecasted')
    plt.plot(y_forecast,label='true data')
    plt.legend()
    plt.show()