## Library

In [None]:
#@title
!pip install rarfile
import requests, zipfile, io
import rarfile
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

Collecting rarfile
  Downloading https://files.pythonhosted.org/packages/95/f4/c92fab227c7457e3b76a4096ccb655ded9deac869849cb03afbe55dfdc1e/rarfile-4.0-py3-none-any.whl
Installing collected packages: rarfile
Successfully installed rarfile-4.0


## Data reading 

In [None]:
get = requests.get('https://www.ins.gov.co/BoletinesCasosCOVID19Colombia/2021-06-23.rar?')
rar_file = rarfile.RarFile(io.BytesIO(get.content))
rar_file.extractall()

In [None]:
DATA_COLOMBIA_FULL = pd.read_csv('Salida_Datos_Abiertos.csv', header=0, sep=',', encoding="latin2", low_memory=False) 

In [None]:
DATA_COLOMBIA_FULL.head(3)

Unnamed: 0,fecha_hoy_casos,Caso,Fecha Not,Departamento,Departamento_nom,Ciudad_municipio,Ciudad_municipio_nom,Edad,unidad_medida,Sexo,Fuente_tipo_contagio,Ubicacion,Estado,Pais_viajo_1_cod,Pais_viajo_1_nom,Recuperado,Fecha_inicio_sintomas,Fecha_muerte,Fecha_diagnostico,Fecha_recuperado,Tipo_recuperacion,per_etn_,nom_grupo_
0,6/3/2020 0:00:00,1,2/3/2020 0:00:00,11,BOGOTA,11001,BOGOTA,19,1,F,Importado,Casa,Leve,380,ITALIA,Recuperado,27/2/2020 0:00:00,,6/3/2020 0:00:00,13/3/2020 0:00:00,PCR,6.0,
1,9/3/2020 0:00:00,2,6/3/2020 0:00:00,76,VALLE,76111,BUGA,34,1,M,Importado,Casa,Leve,724,ESPAŃA,Recuperado,4/3/2020 0:00:00,,9/3/2020 0:00:00,19/3/2020 0:00:00,PCR,5.0,
2,9/3/2020 0:00:00,3,7/3/2020 0:00:00,5,ANTIOQUIA,5001,MEDELLIN,50,1,F,Importado,Casa,Leve,724,ESPAŃA,Recuperado,29/2/2020 0:00:00,,9/3/2020 0:00:00,15/3/2020 0:00:00,PCR,6.0,


## Data cleansing

### Cleansing

In [None]:
DATA_COLOMBIA_FULL.columns = ["Fecha_reporte","ID","Fecha_notificacion","Codigo_departamento",
				"Nombre_departamento","Codigo_ciudad","Nombre_ciudad",
				"Edad","Unidad_medida","Sexo","Fuente_contagio","Ubicacion","Estado",
				"Codigo_pais","Nombre_pais","Recuperado","Fecha_inicio_sintomas",
				"Fecha_muerte","Fecha_diagnostico","Fecha_recuperado","Tipo_recuperacion",
				"Pertenencia_etnica","Nombre_grupo_etnico"]
DATA_COLOMBIA_FULL.head(3)

Unnamed: 0,Fecha_reporte,ID,Fecha_notificacion,Codigo_departamento,Nombre_departamento,Codigo_ciudad,Nombre_ciudad,Edad,Unidad_medida,Sexo,Fuente_contagio,Ubicacion,Estado,Codigo_pais,Nombre_pais,Recuperado,Fecha_inicio_sintomas,Fecha_muerte,Fecha_diagnostico,Fecha_recuperado,Tipo_recuperacion,Pertenencia_etnica,Nombre_grupo_etnico
0,6/3/2020 0:00:00,1,2/3/2020 0:00:00,11,BOGOTA,11001,BOGOTA,19,1,F,Importado,Casa,Leve,380,ITALIA,Recuperado,27/2/2020 0:00:00,,6/3/2020 0:00:00,13/3/2020 0:00:00,PCR,6.0,
1,9/3/2020 0:00:00,2,6/3/2020 0:00:00,76,VALLE,76111,BUGA,34,1,M,Importado,Casa,Leve,724,ESPAŃA,Recuperado,4/3/2020 0:00:00,,9/3/2020 0:00:00,19/3/2020 0:00:00,PCR,5.0,
2,9/3/2020 0:00:00,3,7/3/2020 0:00:00,5,ANTIOQUIA,5001,MEDELLIN,50,1,F,Importado,Casa,Leve,724,ESPAŃA,Recuperado,29/2/2020 0:00:00,,9/3/2020 0:00:00,15/3/2020 0:00:00,PCR,6.0,


In [None]:
colnew_data = ["Fecha_reporte","Fecha_notificacion","Fecha_inicio_sintomas",
		"Fecha_muerte","Fecha_diagnostico","Fecha_recuperado",
		"Recuperado","Nombre_departamento","Nombre_ciudad","Sexo","Edad"]
DATA_COLOMBIA = DATA_COLOMBIA_FULL[colnew_data]   

In [None]:
print("Recuperado:\n\n",DATA_COLOMBIA['Recuperado'].unique(),'\n')
print("Nombre_departamento:\n\n",DATA_COLOMBIA["Nombre_departamento"].unique(),'\n')
print("Sexo:\n\n",DATA_COLOMBIA["Sexo"].unique(),'\n')

Recuperado:

 ['Recuperado' 'Fallecido' nan 'fallecido' 'Fallecido ' 'Recuperado '
 'Activo' 'Activo '] 

Nombre_departamento:

 ['BOGOTA' 'VALLE' 'ANTIOQUIA' 'CARTAGENA' 'HUILA' 'META' 'RISARALDA'
 'NORTE SANTANDER' 'CALDAS' 'CUNDINAMARCA' 'BARRANQUILLA' 'SANTANDER'
 'QUINDIO' 'TOLIMA' 'CAUCA' 'STA MARTA D.E.' 'CESAR' 'SAN ANDRES'
 'CASANARE' 'NARIŃO' 'ATLANTICO' 'BOYACA' 'CORDOBA' 'BOLIVAR' 'SUCRE'
 'MAGDALENA' 'GUAJIRA' 'CHOCO' 'AMAZONAS' 'CAQUETA' 'PUTUMAYO' 'ARAUCA'
 'VAUPES' 'GUAINIA' 'VICHADA' 'GUAVIARE' 'Tolima' 'Caldas'] 

Sexo:

 ['F' 'M' 'f' 'M ' 'm' 'F ' 'F  '] 



In [None]:
DATA_COLOMBIA = DATA_COLOMBIA.assign(Recuperado = DATA_COLOMBIA["Recuperado"].str.capitalize().str.lstrip().str.rstrip())
DATA_COLOMBIA = DATA_COLOMBIA.assign(Nombre_departamento = DATA_COLOMBIA["Nombre_departamento"].str.capitalize().str.lstrip().str.rstrip())
DATA_COLOMBIA = DATA_COLOMBIA.assign(Sexo = DATA_COLOMBIA["Sexo"].str.capitalize().str.lstrip().str.rstrip())

In [None]:
print("Recuperado:\n\n",DATA_COLOMBIA['Recuperado'].unique(),'\n')
print("Nombre_departamento:\n\n",DATA_COLOMBIA["Nombre_departamento"].unique(),'\n')
print("Sexo:\n\n",DATA_COLOMBIA["Sexo"].unique(),'\n')

Recuperado:

 ['Recuperado' 'Fallecido' nan 'Activo'] 

Nombre_departamento:

 ['Bogota' 'Valle' 'Antioquia' 'Cartagena' 'Huila' 'Meta' 'Risaralda'
 'Norte santander' 'Caldas' 'Cundinamarca' 'Barranquilla' 'Santander'
 'Quindio' 'Tolima' 'Cauca' 'Sta marta d.e.' 'Cesar' 'San andres'
 'Casanare' 'Narińo' 'Atlantico' 'Boyaca' 'Cordoba' 'Bolivar' 'Sucre'
 'Magdalena' 'Guajira' 'Choco' 'Amazonas' 'Caqueta' 'Putumayo' 'Arauca'
 'Vaupes' 'Guainia' 'Vichada' 'Guaviare'] 

Sexo:

 ['F' 'M'] 



### Filter data: Bogota

In [None]:
DATA_BOGOTA = DATA_COLOMBIA[DATA_COLOMBIA['Nombre_departamento'] == 'Bogota']
DATA_BOGOTA_TIME = DATA_BOGOTA.copy()
DATA_BOGOTA_TIME.iloc[:,0:6] = DATA_BOGOTA.iloc[:,0:6].apply(lambda x: x.astype(str).str.replace(" 0:00:00",""))

In [None]:
DATA_BOGOTA_TIME.head(3)

Unnamed: 0,Fecha_reporte,Fecha_notificacion,Fecha_inicio_sintomas,Fecha_muerte,Fecha_diagnostico,Fecha_recuperado,Recuperado,Nombre_departamento,Nombre_ciudad,Sexo,Edad
0,6/3/2020,2/3/2020,27/2/2020,,6/3/2020,13/3/2020,Recuperado,Bogota,BOGOTA,F,19
7,11/3/2020,9/3/2020,6/3/2020,,11/3/2020,21/3/2020,Recuperado,Bogota,BOGOTA,F,22
8,11/3/2020,8/3/2020,7/3/2020,,11/3/2020,23/3/2020,Recuperado,Bogota,BOGOTA,F,28


In [None]:
DATA_BOGOTA_TIME_NI = pd.DataFrame(DATA_BOGOTA_TIME[DATA_BOGOTA_TIME['Fecha_reporte'] != 'nan']['Fecha_reporte'])
DATA_BOGOTA_TIME_NR = pd.DataFrame(DATA_BOGOTA_TIME[DATA_BOGOTA_TIME['Fecha_recuperado'] != 'nan']['Fecha_recuperado'])
DATA_BOGOTA_TIME_ND = pd.DataFrame(DATA_BOGOTA_TIME[np.logical_and(DATA_BOGOTA_TIME['Fecha_muerte'] != 'nan', DATA_BOGOTA_TIME['Recuperado'] == 'Fallecido')]['Fecha_muerte'])

DATA_BOGOTA_TIME_NI = DATA_BOGOTA_TIME_NI.apply(lambda x: pd.to_datetime(x,errors = 'coerce', dayfirst=True))
DATA_BOGOTA_TIME_NR = DATA_BOGOTA_TIME_NR.apply(lambda x: pd.to_datetime(x,errors = 'coerce', dayfirst=True))
DATA_BOGOTA_TIME_ND = DATA_BOGOTA_TIME_ND.apply(lambda x: pd.to_datetime(x,errors = 'coerce', dayfirst=True))

DATA_BOGOTA_TIME_NI = DATA_BOGOTA_TIME_NI.groupby('Fecha_reporte').size().reset_index(name='Count')
DATA_BOGOTA_TIME_NR = DATA_BOGOTA_TIME_NR.groupby('Fecha_recuperado').size().reset_index(name='Count')
DATA_BOGOTA_TIME_ND = DATA_BOGOTA_TIME_ND.groupby('Fecha_muerte').size().reset_index(name='Count')

DATA_BOGOTA_TIME_NI.rename(columns={'Fecha_reporte':'Fecha', 'Count':'Nuevos_infectados'}, inplace=True) 
DATA_BOGOTA_TIME_NR.rename(columns={'Fecha_recuperado':'Fecha', 'Count':'Nuevos_recuperados'}, inplace=True)
DATA_BOGOTA_TIME_ND.rename(columns={'Fecha_muerte':'Fecha', 'Count':'Nuevas_muertes'}, inplace=True) 

DATA_BOGOTA_TIME_NI = DATA_BOGOTA_TIME_NI.assign(Infectados_acumulados = DATA_BOGOTA_TIME_NI.iloc[:,1].cumsum())
DATA_BOGOTA_TIME_NR = DATA_BOGOTA_TIME_NR.assign(Recuperados_acumulados = DATA_BOGOTA_TIME_NR.iloc[:,1].cumsum())
DATA_BOGOTA_TIME_ND = DATA_BOGOTA_TIME_ND.assign(Muertes_acumuladas = DATA_BOGOTA_TIME_ND.iloc[:,1].cumsum())

## Descriptive statistic

In [None]:
print(round(DATA_BOGOTA_TIME_NI.describe(),3), '\n')
print(round(DATA_BOGOTA_TIME_NR.describe(),3), '\n')
print(round(DATA_BOGOTA_TIME_ND.describe(),3), '\n')

       Nuevos_infectados  Infectados_acumulados
count            471.000                471.000
mean            2510.401             375494.437
std             2425.318             324051.184
min                1.000                  1.000
25%              804.500              37066.000
50%             1805.000             321377.000
75%             3554.000             655414.000
max            12490.000            1182399.000 

       Nuevos_recuperados  Recuperados_acumulados
count             462.000                 462.000
mean             2332.457              335094.658
std              2629.974              301164.512
min                 1.000                   1.000
25%               495.750               19228.250
50%              1541.000              281506.000
75%              3342.000              609619.000
max             23374.000             1077595.000 

       Nuevas_muertes  Muertes_acumuladas
count         453.000             453.000
mean           49.508         

In [None]:
def graph_covid(data, name_x1, name_y1, title1, name_x2, name_y2, title2):
  discrete_color_me = ['rgb(40, 116, 166)']
  fig1 = px.area(data, x = name_x1, y = name_y1, color_discrete_sequence = discrete_color_me)
  fig2 = px.area(data, x = name_x2, y = name_y2, color_discrete_sequence = discrete_color_me)
  trace1 = fig1['data'][0]
  trace2 = fig2['data'][0]
  fig = make_subplots(rows=1, cols=2, subplot_titles=(title1, title2), shared_xaxes=False)
  fig.add_trace(trace1, row=1, col=1)
  fig.add_trace(trace2, row=1, col=2)
  fig.update_layout(template="plotly_dark", title={'text':'Análisis y diseño gráfico: @DavidNT96 | Fuente de datos: Instituto Nacional de Salud', 'y':0.07,'x':0.5}, font=dict(family="sans-serif", color="white", size=10))
  fig.show()

In [None]:
graph_covid(data = DATA_BOGOTA_TIME_NI, 
            name_x1 = "Fecha", 
            name_y1 = "Nuevos_infectados", 
            title1 = "Número de nuevos infectados COVID-19 en Bogotá D.C", 
            name_x2 = "Fecha", 
            name_y2 = "Infectados_acumulados", 
            title2 = "Número acumulado de infectados COVID-19 en Bogotá D.C")

In [None]:
graph_covid(data = DATA_BOGOTA_TIME_NR, 
            name_x1 = "Fecha", 
            name_y1 = "Nuevos_recuperados", 
            title1 = "Número de nuevos recuperados COVID-19 en Bogotá D.C", 
            name_x2 = "Fecha", 
            name_y2 = "Recuperados_acumulados", 
            title2 = "Número acumulado de recuperados COVID-19 en Bogotá D.C")

In [None]:
graph_covid(data = DATA_BOGOTA_TIME_ND, 
            name_x1 = "Fecha", 
            name_y1 = "Nuevas_muertes", 
            title1 = "Número de nuevos fallecidos COVID-19 en Bogotá D.C", 
            name_x2 = "Fecha", 
            name_y2 = "Muertes_acumuladas", 
            title2 = "Número acumulado de fallecidos COVID-19 en Bogotá D.C")

## Data processing

### Window

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
def series_window(series, time_step, series_size):
  x_data, y_data = [], []
  for i in range(time_step, series_size):
    x_data.append(series[i-time_step :i,0])
    y_data.append(series[i, 0])
  x_data, y_data = np.array(x_data), np.array(x_data)
  x_data = np.reshape(x_data, (x_data.shape[0],x_data.shape[1],1))  
  return x_data, y_data  

### Training set

In [None]:
DATA_DEEP_FULL = DATA_BOGOTA_TIME_NI[['Fecha','Nuevos_infectados']]
DATA_DEEP = pd.DataFrame(DATA_DEEP_FULL['Nuevos_infectados'])

In [None]:
alpha = 0.90
time_step = 1
scaled_data = scaler.fit_transform(DATA_DEEP.values)
train_size = int(len(scaled_data)*alpha)
x_train, y_train = series_window(scaled_data, time_step, train_size)

### Validation set

In [None]:
valid_size = len(DATA_DEEP) - train_size
inputs_valid = DATA_DEEP[len(DATA_DEEP) - valid_size - time_step:].values
inputs_valid = inputs_valid.reshape(-1,1)
inputs_valid = scaler.transform(inputs_valid)
x_valid, y_valid = series_window(inputs_valid, time_step, inputs_valid.shape[0])

### Information sizes objectives

In [None]:
batch_size = 32
print('Batch size:',batch_size)
print('Alpha:', alpha)
print('Train size:', train_size)
print('Valid size:', valid_size)
print('Train size + Valid size:', train_size+valid_size)
print('Size data:', len(DATA_DEEP))
print('Time step:', time_step)
print('x train:', x_train.shape)
print('y train:', y_train.shape)
print('x valid:', x_valid.shape)
print('y valid:', y_valid.shape)

Batch size: 32
Alpha: 0.9
Train size: 423
Valid size: 48
Train size + Valid size: 471
Size data: 471
Time step: 1
x train: (422, 1, 1)
y train: (422, 1)
x valid: (48, 1, 1)
y valid: (48, 1)


In [None]:
def graph_split(data, name_y, plot = None):
  SPLIT = data.copy()
  SPLIT = SPLIT.assign(Fecha = data['Fecha'])
  SPLIT = SPLIT.assign(Grupo = range(0, len(data)))
  SPLIT = SPLIT.assign(Grupo = SPLIT.apply(lambda row: "Entrenamiento" if row["Grupo"] < train_size else "Validacion", axis=1))
  if plot == True:
    return SPLIT
  else:
    color_discrete_me = ["#2874A6", "#B03A2E"]
    fig = px.line(SPLIT, x="Fecha", y=name_y, color = "Grupo", color_discrete_sequence=color_discrete_me)
    fig.update_layout(autosize = False, 
                  height = 500, 
                  width=1150,  
                  font=dict(family="sans-serif", size = 13), 
                  margin=dict(l=100, r=100, b=80, t=100, pad = 4),
                  template="plotly_dark",
                  title={'text':'<b>Conjuntos de datos de entrenamiento y validacion </b><br><sub>Número de nuevos infectados COVID-19 en Bogotá D.C | Elaboracion: @DavidNT96 | Fuente: INS',
                         'yref': 'paper', 
                         'x':0.1})
    fig['layout']['xaxis']['title']='Time'
    fig['layout']['yaxis']['title']='Value'
    fig['data'][0]['name'] = 'Entrenamiento' 
    fig['data'][1]['name'] = 'Validacion'
    fig.show()

In [None]:
graph_split(data = DATA_DEEP_FULL, name_y = "Nuevos_infectados")

## Model LSTM

### Model structure

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_mae', patience=20)
# tf.keras.layers.Lambda(lambda x: x * 200)
#img_path = 'Imagenes'
#plot_model(model, to_file=img_path +'mlp-mnist.png', show_shapes=True)

tf.keras.backend.clear_session()
model = tf.keras.models.Sequential([tf.keras.layers.Conv1D(468,1,input_shape=(x_train.shape[1],1)),
                                    tf.keras.layers.GRU(468, return_sequences=True),
                                    tf.keras.layers.GRU(468, return_sequences=True),
                                    tf.keras.layers.Conv1D(468,1),
                                    tf.keras.layers.Conv1D(468,1),
                                    tf.keras.layers.GRU(468, return_sequences=True),
                                    tf.keras.layers.GRU(468, return_sequences=True),                               
                                    tf.keras.layers.Conv1D(156,1),
                                    tf.keras.layers.Conv1D(156,1),
                                    tf.keras.layers.Flatten(),
                                    tf.keras.layers.Dense(1)
                                    ])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 1, 468)            936       
_________________________________________________________________
gru (GRU)                    (None, 1, 468)            1316952   
_________________________________________________________________
gru_1 (GRU)                  (None, 1, 468)            1316952   
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 1, 468)            219492    
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 1, 468)            219492    
_________________________________________________________________
gru_2 (GRU)                  (None, 1, 468)            1316952   
_________________________________________________________________
gru_3 (GRU)                  (None, 1, 468)            1

In [None]:
#learning_rate=0.00001, momentum=0.9, epsilon=1e-08
optimizer = tf.optimizers.Adam(0.0001)
model.compile(loss=tf.keras.losses.Huber(), optimizer=optimizer, metrics=["mae"])

In [None]:
num_epochs = 50
history = model.fit(x_train,y_train, epochs = num_epochs, validation_data =(x_valid, y_valid), callbacks=[callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50


### Model evaluation

In [None]:
Evaluation1 = {'Epochs': range(len(history.history['loss'])), 
               'Loss': history.history['loss'],
               'Validatation Loss': history.history['val_loss']}
Evaluation2 = {'Epochs': range(len(history.history['loss'])),
               'mae': history.history['mae'],
               'validation mae': history.history['val_mae']}              
metric1 = pd.DataFrame(Evaluation1)
metric2 = pd.DataFrame(Evaluation2)
metric1_melt = pd.melt(metric1, id_vars=['Epochs'])
metric2_melt = pd.melt(metric2, id_vars=['Epochs'])

In [None]:
#@title
color_discrete_me = ["#2874A6", "#B03A2E"]
fig = px.line(metric1_melt, x="Epochs", y="value", color = "variable",color_discrete_sequence=color_discrete_me)
fig.update_layout(autosize = False, 
                  height = 500, 
                  width=1150,  
                  font=dict(family="sans-serif", size = 13), 
                  margin=dict(l=100, r=100, b=80, t=100, pad = 4),
                  template="plotly_dark",
                  title={'text':'<b>Loss</b><br><sub>Model COVID-19',                         
                         'yref': 'paper', 
                         'x':0.1})
fig['layout']['xaxis']['title']='Epoch'
fig['layout']['yaxis']['title']='Loss'
fig['data'][0]['name'] = 'Loss' 
fig['data'][1]['name'] = 'Validation loss'
fig.show()

In [None]:
#@title
fig = px.line(metric2_melt, x="Epochs", y="value", color = "variable",color_discrete_sequence=color_discrete_me)
fig.update_layout(autosize = False, 
                  height = 500, 
                  width=1150,  
                  font=dict(family="sans-serif", size = 13), 
                  margin=dict(l=100, r=100, b=80, t=100, pad = 4),
                  template="plotly_dark",
                  title={'text':'<b>MAE</b><br><sub>Model COVID-19',
                         'yref': 'paper', 
                         'x':0.1})
fig['layout']['xaxis']['title']='Epoch'
fig['layout']['yaxis']['title']='Loss'
fig['data'][0]['name'] = 'MAE' 
fig['data'][1]['name'] = 'Validation MAE'
fig.show()

## FORECAST

In [None]:
forecast = model.predict(x_valid)
forecast = scaler.inverse_transform(forecast)
DATA_SPLIT = graph_split(data = DATA_DEEP_FULL, name_y = "Nuevos_infectados", plot=True)
validation = np.array(DATA_SPLIT[DATA_SPLIT['Grupo'] == 'Validacion']['Nuevos_infectados'])

In [None]:
print('Forecast size:',len(forecast))
print('Validation size:',len(validation))

Forecast size: 48
Validation size: 48


In [None]:
FORESCAST_DATA = pd.DataFrame(DATA_SPLIT[DATA_SPLIT['Grupo'] == 'Validacion']['Fecha'])
FORESCAST_DATA = FORESCAST_DATA.assign(value = forecast)
FORESCAST_DATA = FORESCAST_DATA.assign(Grupp = ['Pronostico']*len(forecast))
FORESCAST_DATA.columns = ['Fecha', 'Nuevos_infectados',	'Grupo']
SPLIT_FORESCAST = DATA_SPLIT.copy()
SPLIT_FORESCAST = SPLIT_FORESCAST.append(FORESCAST_DATA)

In [None]:
def graph_validation(data, name_y):
  color_discrete_scale = ["rgb(55,126,184)", "rgb(228,26,28)", "rgb(255,127,0)", "rgb(77,175,74)"]
  fig = px.line(data, x="Fecha", y=name_y, color = "Grupo",color_discrete_sequence=color_discrete_me)
  fig.update_layout(autosize = False, 
                  height = 500, 
                  width=1150,  
                  font=dict(family="sans-serif", size = 13), 
                  margin=dict(l=100, r=100, b=80, t=100, pad = 4),
                  template="plotly_dark",
                  title={'text':'<b>FB Stock Price</b><br><sub>Facebook',
                         'yref': 'paper', 
                         'x':0.1})
  fig['layout']['xaxis']['title']='Tiempo'
  fig['layout']['yaxis']['title']='Valor'
  fig['data'][0]['name'] = 'Entrenamiento' 
  fig['data'][1]['name'] = 'Validacion'
  fig['data'][2]['name'] = 'Pronostico'
  fig.show()

In [None]:
graph_validation(data = SPLIT_FORESCAST, name_y = 'Nuevos_infectados')