In [None]:
# Este código está basado el código del repositorio https://github.com/k-sys/covid-19
# Este repositorio fue clonado el 2020-04-25 en https://github.com/coronamex/covid-20
# Tiene pequeñas modificaciones para realizar estimaciones con los datos de México distribuidos
# a través de CoronaMex.
# El código tomado de https://github.com/k-sys/covid-19 está en el dominito público.
# El resto se distribuye con una licencia GPL-3

# (C) Copyright 2020 Sur Herrera Paredes

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# For some reason Theano is unhappy when I run the GP, need to disable future warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import requests
import pymc3 as pm
import pandas as pd
import numpy as np
import theano
import theano.tensor as tt
import arviz

from matplotlib import pyplot as plt
from matplotlib import dates as mdates
from matplotlib import ticker

from datetime import date
from datetime import datetime

from IPython.display import clear_output

%config InlineBackend.figure_format = 'retina'

In [None]:
def confirmed_to_onset(confirmed, p_delay):

    assert not confirmed.isna().any()
    
    # Reverse cases so that we convolve into the past
    convolved = np.convolve(confirmed[::-1].values, p_delay)

    # Calculate the new date range
    dr = pd.date_range(end=confirmed.index[-1],
                       periods=len(convolved))

    # Flip the values and assign the date range
    onset = pd.Series(np.flip(convolved), index=dr)
    
    return onset

In [None]:
def adjust_onset_for_right_censorship(onset, p_delay):
    cumulative_p_delay = p_delay.cumsum()
    
    # Calculate the additional ones needed so shapes match
    ones_needed = len(onset) - len(cumulative_p_delay)
    padding_shape = (0, ones_needed)
    
    # Add ones and flip back
    cumulative_p_delay = np.pad(
        cumulative_p_delay,
        padding_shape,
        constant_values=1)
    cumulative_p_delay = np.flip(cumulative_p_delay)
    
    # Adjusts observed onset values to expected terminal onset values
    adjusted = onset / cumulative_p_delay
    
    return adjusted, cumulative_p_delay


In [None]:
class MCMCModel(object):
    
    def __init__(self, region, onset, cumulative_p_delay, window=50):
        
        # Just for identification purposes
        self.region = region
        
        # For the model, we'll only look at the last N
        self.onset = onset.iloc[-window:]
        self.cumulative_p_delay = cumulative_p_delay[-window:]
        
        # Where we store the results
        self.trace = None
        self.trace_index = self.onset.index[1:]

    # def run(self, chains=1, tune=3000, draws=1000, target_accept=.95):
    def run(self, chains=3, tune=1000, draws=1000, target_accept=.95):

        with pm.Model() as model:

            # Random walk magnitude
            step_size = pm.HalfNormal('step_size', sigma=.03)

            # Theta random walk
            theta_raw_init = pm.Normal('theta_raw_init', 0.1, 0.1)
            theta_raw_steps = pm.Normal('theta_raw_steps', shape=len(self.onset)-2) * step_size
            theta_raw = tt.concatenate([[theta_raw_init], theta_raw_steps])
            theta = pm.Deterministic('theta', theta_raw.cumsum())

            # Let the serial interval be a random variable and calculate r_t
            serial_interval = pm.Gamma('serial_interval', alpha=6, beta=1.5)
            gamma = 1.0 / serial_interval
            r_t = pm.Deterministic('r_t', theta/gamma + 1)

            inferred_yesterday = self.onset.values[:-1] / self.cumulative_p_delay[:-1]
            
            expected_today = inferred_yesterday * self.cumulative_p_delay[1:] * pm.math.exp(theta)

            # Ensure cases stay above zero for poisson
            mu = pm.math.maximum(.1, expected_today)
            observed = self.onset.round().values[1:]
            cases = pm.Poisson('cases', mu=mu, observed=observed)

            self.trace = pm.sample(
                chains=chains,
                tune=tune,
                draws=draws,
                target_accept=target_accept)
            
            return self
    
    def run_gp(self):
        with pm.Model() as model:
            gp_shape = len(self.onset) - 1

            length_scale = pm.Gamma("length_scale", alpha=3, beta=.4)

            eta = .05
            cov_func = eta**2 * pm.gp.cov.ExpQuad(1, length_scale)

            gp = pm.gp.Latent(mean_func=pm.gp.mean.Constant(c=0), 
                              cov_func=cov_func)

            # Place a GP prior over the function f.
            theta = gp.prior("theta", X=np.arange(gp_shape)[:, None])

            # Let the serial interval be a random variable and calculate r_t
            serial_interval = pm.Gamma('serial_interval', alpha=6, beta=1.5)
            gamma = 1.0 / serial_interval
            r_t = pm.Deterministic('r_t', theta / gamma + 1)

            inferred_yesterday = self.onset.values[:-1] / self.cumulative_p_delay[:-1]
            expected_today = inferred_yesterday * self.cumulative_p_delay[1:] * pm.math.exp(theta)

            # Ensure cases stay above zero for poisson
            mu = pm.math.maximum(.1, expected_today)
            observed = self.onset.round().values[1:]
            cases = pm.Poisson('cases', mu=mu, observed=observed)

            self.trace = pm.sample(chains=1, tune=1000, draws=1000, target_accept=.8)
        return self

In [None]:
def df_from_model(model):
    
    r_t = model.trace['r_t']
    mean = np.mean(r_t, axis=0)
    median = np.median(r_t, axis=0)
    # hpd_90 = pm.stats.hpd(r_t, credible_interval=.9)
    # hpd_50 = pm.stats.hpd(r_t, credible_interval=.5)
    hpd_90 = arviz.hdi(r_t, hdi_prob = 0.9)
    hpd_50 = arviz.hdi(r_t, hdi_prob = 0.5)
    
    idx = pd.MultiIndex.from_product([
            [model.region],
            model.trace_index
        ], names=['region', 'date'])
        
    df = pd.DataFrame(data=np.c_[mean, median, hpd_90, hpd_50], index=idx,
                 columns=['mean', 'median', 'lower_90', 'upper_90', 'lower_50','upper_50'])
    return df

def create_and_run_model(name, state):
    confirmed = state.positive.diff().dropna()
    onset = confirmed_to_onset(confirmed, p_delay)
    adjusted, cumulative_p_delay = adjust_onset_for_right_censorship(onset, p_delay)
    return MCMCModel(name, onset, cumulative_p_delay).run()

In [None]:
def calcular_p_retraso(serie_sintomas, serie_confirmacion):
    """Calcular distribución en retraso entre día de inicio de síntomas
    y confirmación de un caso."""
    
    # serie_sintomas = "../../datos/datos_abiertos/serie_tiempo_nacional_confirmados.csv"
    # serie_confirmacion = "../../datos/datos_abiertos/serie_tiempo_nacional_fecha_confirmacion.csv"
    
    # Leer tablas
    Dat_sintomas = pd.read_csv(serie_sintomas)
    Dat_sintomas.head()
    Dat_confirmacion = pd.read_csv(serie_confirmacion)
    Dat_confirmacion.head()
    Dat_sintomas = Dat_sintomas[['fecha', 'sintomas_acumulados']]
    Dat_confirmacion = Dat_confirmacion[['fecha', 'casos_acumulados']]
    
    # Unir tablas
    Dat = pd.concat([Dat_sintomas.set_index('fecha'), Dat_confirmacion.set_index('fecha')], axis=1, sort=False).reset_index(col_fill = 'fecha')
    Dat.columns = ['fecha', 'sintomas_acumulados', 'casos_acumulados']
    Dat = Dat.fillna(0)
    # Dat['dif'] = Dat.sintomas_acumulados - Dat.casos_acumulados
    Dat.fecha = pd.to_datetime(Dat.fecha)
    # Dat
    
    # Preparar serie vacía
    dias = (pd.Series(max(Dat.fecha)) - pd.Series(min(Dat.fecha))).dt.days[0]
    p_retraso = pd.Series(np.zeros(dias))
    for i, fila in Dat.iterrows():
        for j in range(i, Dat.shape[0]):
            # print(i, j)
            if Dat.casos_acumulados[j] >= Dat.sintomas_acumulados[i]:
                # print("hola")
                # Sumar casos retrasados n dias
                if i == 0:
                    nuevos = Dat.sintomas_acumulados[i]
                else:
                    nuevos = Dat.sintomas_acumulados[i] - Dat.sintomas_acumulados[i - 1]
                # nuevos = abs(Dat.sintomas_acumulados[i] - Dat.casos_acumulados[j])
                p_retraso[j - i] = p_retraso[j - i] + nuevos
                break
    
    # Calcular prob
    # p_retraso.sum()
    p_retraso = p_retraso / p_retraso.sum()
    # print(p_retraso.to_string())
    
    # Limpiar
    ii = p_retraso > 0
    ii_max = np.max(p_retraso.index[ii])
    p_retraso = p_retraso[0:ii_max]

    return p_retraso

In [None]:
# Calcular p_retraso
# serie_sintomas_nacional = "../../datos/datos_abiertos/serie_tiempo_nacional_confirmados.csv.gz"
# serie_confirmacion_nacional = "../../datos/datos_abiertos/serie_tiempo_nacional_fecha_confirmacion.csv.gz"
# serie_confirmacion_estados = "../../datos/datos_abiertos/serie_tiempo_estados_fecha_confirmacion.csv.gz"
# dir_estimado = "../estimados/"

serie_sintomas_nacional = "../datos/datos_abiertos/serie_tiempo_nacional_confirmados.csv.gz"
serie_confirmacion_nacional = "../datos/datos_abiertos/serie_tiempo_nacional_fecha_confirmacion.csv.gz"
serie_confirmacion_estados = "../datos/datos_abiertos/serie_tiempo_estados_fecha_confirmacion.csv.gz"
dir_estimado = "estimados/"

print("Parámetros leídos")

In [None]:
# Calcular retraso deteccción
p_retraso = calcular_p_retraso(serie_sintomas=serie_sintomas_nacional, serie_confirmacion=serie_confirmacion_nacional)

In [None]:
# Leer serie de tiempo estados confirmacón
Dat = pd.read_csv(serie_confirmacion_estados)

In [None]:
# ent = "Ciudad de México"

In [None]:
modelos = {}
for ent in Dat.estado.unique():
    casos = Dat[ Dat.estado == ent ]
    casos = casos[['fecha', 'casos_nuevos_um']]
    casos.columns = ['date', 'positives']
    casos = casos.set_index('date')
    
    if casos.positives.sum() > 500:
        print(ent)
        # Suavizar y luego correr por estado
        casos = casos.rolling(window=7, center = True).mean()
        casos = casos[~casos.isin([np.nan]).positives]
        
        # Ajustar
        inicio = confirmed_to_onset(casos.positives, p_retraso)
        ajustados, p_retraso_acumulado = adjust_onset_for_right_censorship(inicio, p_retraso)
        
        # Correr modelo
        ii = ajustados.isin([np.nan, np.inf,-np.inf])
        p_retraso_acumulado = p_retraso_acumulado[~ii]
        m1 = MCMCModel(ent, inicio[~ii], p_retraso_acumulado, window=100).run()
        
        modelos[ent] = m1

In [None]:
# Combinar tablas
Tab = pd.DataFrame()
for k in modelos.keys():
    print(k)
    m1 = modelos[k]
    tab = df_from_model(m1)
    tab = tab.droplevel(0)
    tab = tab.reset_index()
    tab['estado'] = k
    
    Tab = pd.concat([Tab, tab])
Tab['fecha_estimado'] = date.today().isoformat()

In [None]:
archivo = dir_estimado + '/rt_live_estimados.csv'
Tab.to_csv(archivo, index=False)