# COVID19 - District Region

In [1]:
from environs import Env
env = Env()
env.str("CUDA_DEVICE_ORDER",'PCI_BUS_ID')
env.int("CUDA_VISIBLE_DEVICES",1)
env.int("NUMBA_ENABLE_CUDASIM",1)
env.bool("OMPI_MCA_opal_cuda_support",True)

import os
import ray
MB=1024*1024
GB=MB*1024
ray.init(object_store_memory=1*GB,memory=1*GB,\
         driver_object_store_memory=500*MB,num_gpus=5,num_cpus=5, 
         ignore_reinit_error=True) # , include_webui=False, ignore_reinit_error=True)

@ray.remote(num_gpus=1)
def use_gpu():
    print("ray.get_gpu_ids(): {}".format(ray.get_gpu_ids()[0]))
    print("CUDA_VISIBLE_DEVICES: {}".format(os.environ["CUDA_VISIBLE_DEVICES"]))

use_gpu.remote()

2020-07-27 11:16:27,050	INFO resource_spec.py:212 -- Starting Ray with 0.98 GiB memory available for workers and up to 1.0 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-07-27 11:16:27,394	INFO services.py:1170 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


ObjectID(45b95b1c8bd3a9c4ffffffff010000c801000000)

In [2]:
import urllib.request
import pandas as pd
import numpy as np
from datetime import datetime,timedelta

In [3]:
# Download data
import get_data
LoadData=True

if LoadData:
    get_data.get_data()

Baixando arquivos brasil.io...
[2m[36m(pid=53369)[0m ray.get_gpu_ids(): 4
[2m[36m(pid=53369)[0m CUDA_VISIBLE_DEVICES: 4


In [4]:
dfSP = pd.read_csv("data/dados_municipios_SP.csv")
dfSP
dfSP.query('DRS == "{}"'.format('DRS 01 - Grande São Paulo'))

Unnamed: 0,date,state,city,place_type,confirmed,deaths,order_for_place,is_last,popEst,city_ibge_code,confirmed_per_100k_inhabitants,death_rate,DRS
3314,2020-07-26,SP,Arujá,city,960,56,121,True,89824.0,3503901.0,1068.75668,0.0583,DRS 01 - Grande São Paulo
3315,2020-07-25,SP,Arujá,city,958,56,120,False,89824.0,3503901.0,1066.53010,0.0585,DRS 01 - Grande São Paulo
3316,2020-07-24,SP,Arujá,city,924,55,119,False,89824.0,3503901.0,1028.67830,0.0595,DRS 01 - Grande São Paulo
3317,2020-07-23,SP,Arujá,city,890,55,118,False,89824.0,3503901.0,990.82651,0.0618,DRS 01 - Grande São Paulo
3318,2020-07-22,SP,Arujá,city,856,54,117,False,89824.0,3503901.0,952.97471,0.0631,DRS 01 - Grande São Paulo
...,...,...,...,...,...,...,...,...,...,...,...,...,...
53242,2020-03-30,SP,Vargem Grande Paulista,city,2,1,5,False,52597.0,3556453.0,3.80250,0.5000,DRS 01 - Grande São Paulo
53243,2020-03-27,SP,Vargem Grande Paulista,city,2,1,4,False,52597.0,3556453.0,3.80250,0.5000,DRS 01 - Grande São Paulo
53244,2020-03-26,SP,Vargem Grande Paulista,city,1,1,3,False,52597.0,3556453.0,1.90125,1.0000,DRS 01 - Grande São Paulo
53245,2020-03-25,SP,Vargem Grande Paulista,city,0,1,2,False,52597.0,3556453.0,,0.0000,DRS 01 - Grande São Paulo


In [5]:
# Model

In [6]:
# lista DRSs
DRS = list(dfSP["DRS"].unique())
DRS.remove("Indefinido")
DRS

['DRS 09 - Marília',
 'DRS 15 - São José do Rio Preto',
 'DRS 14 - São João da Boa Vista',
 'DRS 06 - Bauru',
 'DRS 16 - Sorocaba',
 'DRS 11 - Presidente Prudente',
 'DRS 05 - Barretos',
 'DRS 13 - Ribeirão Preto',
 'DRS 02 - Araçatuba',
 'DRS 07 - Campinas',
 'DRS 03 - Araraquara',
 'DRS 10 - Piracicaba',
 'DRS 17 - Taubaté',
 'DRS 08 - Franca',
 'DRS 01 - Grande São Paulo',
 'DRS 12 - Registro',
 'DRS 04 - Baixada Santista']

# SEAIR-D Model Equations

$$\begin{array}{l}\frac{d s}{d t}=-[\beta i(t) + \beta_2 a(t)-\mu] \cdot s(t)\\ 
\frac{d e}{d t}=[\beta i(t) + \beta_2 a(t)] \cdot s(t) -(\sigma+\mu) \cdot e(t)\\ 
\frac{d a}{d t}=\sigma e(t) \cdot (1-p)-(\gamma+\mu) \cdot a(t) \\
\frac{d i}{d t}=\sigma e(t) \cdot p - (\gamma + \sigma_2 + \sigma_3 + \mu) \cdot i(t)\\ 
\frac{d r}{d t}=(b + \sigma_2) \cdot i(t) + \gamma \cdot a(t) - \mu \cdot r(t)\\
\frac{d k}{d t}=(a + \sigma_3 - \mu) \cdot d(t)
\end{array}$$

The last equation does not need to be solve because:

$$\frac{d k}{d t}=-(\frac{d e}{d t}+\frac{d a}{d t}+\frac{d i}{d t}+\frac{d r}{d t})$$

The sum of all rates are equal to zero! The importance of this equation is that it conservates the rates.


## Parameters

$\beta$: Effective contact rate [1/min]
    
$\gamma$: Recovery(+Mortality) rate $\gamma=(a+b)$ [1/min]

$a$: mortality of healed  [1/min]

$b$: recovery rate  [1/min]

$\sigma$: is the rate at which individuals move from the exposed to the infectious classes. Its reciprocal ($1/\sigma$) is the average latent (exposed) period.

$\sigma_2$: is the rate at which individuals move from the infectious to the healed classes. Its reciprocal ($1/\sigma_2$) is the average latent (exposed) period

$\sigma_3$: is the rate at which individuals move from the infectious to the dead classes. Its reciprocal ($1/\sigma_3$) is the average latent (exposed) period
    
$p$: is the fraction of the exposed which become symptomatic infectious sub-population.

$(1-p)$: is the fraction of the exposed which becomes asymptomatic infectious sub-population.

In [7]:
#objective function Odeint solver
from scipy.integrate import odeint

#objective function Odeint solver
def lossOdeint(point, data, death, s_0, e_0, a_0, i_0, r_0, d_0, startNCases, ratioRecovered, weigthCases, weigthRecov):
    size = len(data)
    beta, beta2, sigma, sigma2, sigma3, gamma, b, mu = point
    def SEAIRD(y,t):
        S = y[0]
        E = y[1]
        A = y[2]
        I = y[3]
        R = y[4]
        D = y[5]
        p=0.2
        # beta2=beta
        y0=-(beta2*A+beta*I)*S-mu*S #S
        y1=(beta2*A+beta*I)*S-sigma*E-mu*E #E
        y2=sigma*E*(1-p)-gamma*A-mu*A #A
        y3=sigma*E*p-gamma*I-sigma2*I-sigma3*I-mu*I#I
        y4=b*I+gamma*A+sigma2*I-mu*R #R
        y5=(-(y0+y1+y2+y3+y4)) #D
        return [y0,y1,y2,y3,y4,y5]

    y0=[s_0,e_0,a_0,i_0,r_0,d_0]
    tspan=np.arange(0, size, 1)
    res=odeint(SEAIRD,y0,tspan) #,hmax=0.01)

    l1=0
    l2=0
    l3=0
    tot=0

    for i in range(0,len(data.values)):
        if data.values[i]>startNCases:
            l1 = l1+(res[i,3] - data.values[i])**2
            l2 = l2+(res[i,5] - death.values[i])**2
            newRecovered=min(1e6,data.values[i]*ratioRecovered)
            l3 = l3+(res[i,4] - newRecovered)**2
            tot+=1
    l1=np.sqrt(l1/max(1,tot))
    l2=np.sqrt(l2/max(1,tot))
    l3=np.sqrt(l3/max(1,tot))
    
    #weight for cases
    u = weigthCases  #Brazil US 0.1
    w = weigthRecov
    #weight for deaths
    v = max(0,1. - u - w)
    
    return u*l1 + v*l2 + w*l3

In [8]:
paramOpt=1
changeCSV=False
adjustParam=False
paramSave=False
selectStates=False

In [9]:
if paramOpt==0:
    paramFile="data/param.csv"
    version = "1"
    model = "ManualIC"
    
if paramOpt==1:
    paramFile="data/param_optimized_Yabox_HistMin.csv"
    version = "105"
    model = "YaboxIC"

In [10]:
dfparam = pd.read_csv(paramFile)
dfparam = dfparam.dropna()

DRS=dfparam.DRS

# if changeCSV:
#     dfparam.loc[dfparam.state=='PI','d0'] = 0
#     dfparam.loc[dfparam.state=='RR','d0'] = 0
#     dfparam.loc[dfparam.state=='AP','d0'] = 0
#     dfparam.loc[dfparam.state=='SP','WCASES'] = 0.65

# if paramSave:
#     dfparam.to_csv(paramFile)    
    
if adjustParam:
    sCorrect=[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
    # sCorrect=[0.85,1.25,1.0,1.25,1.5,0.95,1.25,1.15,1.11,0.85,0.9]
    #103 [1.25,1.3,1.3,1.3,1.3,1.3,1.3,1.3,1.0,1.0,1.1]
    dfparam.s0=dfparam.s0.multiply(sCorrect[:len(states)], axis=0)

dfparam

Unnamed: 0,DRS,start-date,prediction-range,s0,e0,a0,i0,r0,d0,START,RATIO,WCASES,WREC
0,DRS 01 - Grande São Paulo,2020-03-19,200,2921449,0,0,1486,300,420,0,0.15,0.3075,0.0231
1,DRS 02 - Araçatuba,2020-04-01,150,737,0,0,0,0,0,0,0.1,0.304,0.0693
2,DRS 03 - Araraquara,2020-04-02,150,2136,0,0,0,0,0,0,0.1,0.5902,0.0231
3,DRS 04 - Baixada Santista,2020-03-31,150,9117,0,0,0,0,0,0,0.1,0.3442,0.0311
4,DRS 05 - Barretos,2020-04-02,150,2940,0,0,0,0,0,0,0.1,0.4106,0.0338
5,DRS 06 - Bauru,2020-04-02,150,13039,0,0,2,0,0,0,0.1,0.3072,0.0651
6,DRS 07 - Campinas,2020-04-02,150,16998,0,0,23,0,0,0,0.1,0.5317,0.0284
7,DRS 08 - Franca,2020-04-01,150,1044,0,0,0,0,0,0,0.1,0.7238,0.0538
8,DRS 09 - Marília,2020-04-02,150,6917,0,0,0,0,0,0,0.1,0.6089,0.033
9,DRS 10 - Piracicaba,2020-04-02,150,13995,0,0,0,0,0,0,0.1,0.4776,0.062


# Initial parameter optimization

In [11]:
%reload_ext autoreload
%autoreload 2
import LearnerYabox_v2 as Learner #Yabox

2020-07-27 11:16:49,535	INFO resource_spec.py:212 -- Starting Ray with 155.32 GiB memory available for workers and up to 70.58 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-07-27 11:16:49,818	INFO services.py:1170 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


In [12]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines){
    return true;}

<IPython.core.display.Javascript object>

In [13]:
allDistricts=True
cleanRecovered=False
version="8"

display(DRS)

0          DRS 01 - Grande São Paulo
1                 DRS 02 - Araçatuba
2                DRS 03 - Araraquara
3          DRS 04 - Baixada Santista
4                  DRS 05 - Barretos
5                     DRS 06 - Bauru
6                  DRS 07 - Campinas
7                    DRS 08 - Franca
8                   DRS 09 - Marília
9                DRS 10 - Piracicaba
10      DRS 11 - Presidente Prudente
11                 DRS 12 - Registro
12           DRS 13 - Ribeirão Preto
13    DRS 14 - São João da Boa Vista
14    DRS 15 - São José do Rio Preto
15                 DRS 16 - Sorocaba
16                  DRS 17 - Taubaté
Name: DRS, dtype: object

In [None]:
results=[]
if allDistricts:
    for districtRegion in DRS:
        query = dfparam.query('DRS == "{}"'.format(districtRegion)).reset_index()
        parameters = np.array(query.iloc[:, 2:])[0]
        f=Learner.Learner.remote(districtRegion, *parameters, cleanRecovered, version)
        result = f.train.remote() 
        results.append(result)
else:
    districtRegion= "DRS 01 - Grande São Paulo" #'DRS 08 - Franca' \
    #'DRS 14 - São João da Boa Vista' #'DRS 04 - Baixada Santista' \
    #'DRS 11 - Presidente Prudente' #'DRS 13 - Ribeirão Preto' \
    #'DRS 05 - Barretos' #'DRS 12 - Registro' #'DRS 15 - São José do Rio Preto' \
    #'DRS 10 - Piracicaba'#'DRS 17 - Taubaté'#'DRS 02 - Araçatuba'# \
    #'DRS 03 - Araraquara' #DRS 07 - Campinas'#'DRS 16 - Sorocaba'#'DRS 06 - Bauru' \
    #'DRS 09 - Marília' #"DRS 01 - Grande São Paulo"
    query = dfparam.query('DRS == "{}"'.format(districtRegion)).reset_index()
    parameters = np.array(query.iloc[:, 2:])[0]
    f=Learner.Learner.remote(districtRegion, *parameters, cleanRecovered, version)
    result = f.train.remote() 
    results.append(result)

# #execute all the queue with max_runner_cap at a time    
results = ray.get(results)

  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:00<?, ?it/s]
  0%|          | 0/6125000 [00:0

  0%|          | 78/6125000 [00:02<29:20:08, 58.00it/s]
  0%|          | 78/6125000 [00:02<35:14:27, 48.28it/s]
  0%|          | 91/6125000 [00:02<32:57:25, 51.62it/s]
  0%|          | 91/6125000 [00:02<27:58:15, 60.83it/s]
  0%|          | 66/6125000 [00:02<38:11:58, 44.54it/s]
  0%|          | 105/6125000 [00:02<26:14:35, 64.83it/s]
  0%|          | 91/6125000 [00:02<28:03:36, 60.63it/s]
  0%|          | 91/6125000 [00:02<27:27:55, 61.95it/s]
  0%|          | 91/6125000 [00:02<28:26:57, 59.80it/s]
  0%|          | 91/6125000 [00:02<34:15:59, 49.65it/s]
  0%|          | 91/6125000 [00:02<25:06:08, 67.78it/s]
  0%|          | 105/6125000 [00:02<32:11:59, 52.84it/s]
  0%|          | 136/6125000 [00:02<22:27:27, 75.76it/s]
  0%|          | 78/6125000 [00:02<31:08:12, 54.64it/s]
  0%|          | 105/6125000 [00:02<23:18:45, 72.98it/s]
  0%|          | 105/6125000 [00:02<27:55:16, 60.93it/s]
  0%|          | 120/6125000 [00:02<25:52:05, 65.77it/s]
  0%|          | 120/6125000 [00:02<22:32:

  0%|          | 630/6125000 [00:03<3:54:53, 434.55it/s]
  0%|          | 528/6125000 [00:03<4:38:22, 366.69it/s]
  0%|          | 630/6125000 [00:03<3:50:26, 442.96it/s]
  0%|          | 703/6125000 [00:03<3:37:31, 469.24it/s]
  0%|          | 780/6125000 [00:03<3:11:14, 533.72it/s]
  0%|          | 861/6125000 [00:03<3:01:13, 563.19it/s]
  0%|          | 666/6125000 [00:03<3:39:47, 464.42it/s]
  0%|          | 741/6125000 [00:03<3:34:02, 476.88it/s]
  0%|          | 595/6125000 [00:03<4:04:27, 417.54it/s]
  0%|          | 820/6125000 [00:03<3:19:46, 510.90it/s]
  0%|          | 1035/6125000 [00:03<2:34:21, 661.20it/s]
  0%|          | 861/6125000 [00:03<2:52:01, 593.34it/s]
  0%|          | 780/6125000 [00:03<3:27:20, 492.30it/s]
  0%|          | 561/6125000 [00:03<4:23:58, 386.68it/s]
  0%|          | 703/6125000 [00:03<3:27:34, 491.73it/s]
  0%|          | 703/6125000 [00:03<3:26:04, 495.32it/s]
  0%|          | 820/6125000 [00:03<3:03:05, 557.46it/s]
  0%|          | 990/6125000 [

In [None]:
ray.shutdown()

In [None]:
# Save data as csv
import glob
import os

path = './results/data'
files = glob.glob(os.path.join(path, "*.csv"))

df = (pd.read_csv(f).assign(DRS = f.split(" - ")[-1].split(".")[0]) for f in files)
df_all_drs = pd.concat(df, ignore_index=True)
df_all_drs.index.name = 'index'
df_all_drs.to_csv('./data/SEAIRD_sigmaOpt_AllDRS'+'.csv', sep=",")

# Plots

In [None]:
%matplotlib inline 
import matplotlib.pyplot as plt
import pandas as pd
#%load_ext autoreload
%reload_ext autoreload
%autoreload 2
import covid_plots_v2 as covid_plots

In [None]:
def loadDataFrame(filename):
    df= pd.read_pickle(filename)
    df.columns = [c.lower().replace(' ', '_') for c in df.columns]
    df.columns = [c.lower().replace('(', '') for c in df.columns]
    df.columns = [c.lower().replace(')', '') for c in df.columns]
    return df

In [None]:
#DRS 01 - Grande São Paulo
#DRS 02 - Araçatuba
#DRS 03 - Araraquara
#DRS 04 - Baixada Santista
#DRS 05 - Barretos
#DRS 06 - Bauru
#DRS 07 - Campinas
#DRS 08 - Franca
#DRS 09 - Marília
#DRS 10 - Piracicaba
#DRS 11 - Presidente Prudente
#DRS 12 - Registro
#DRS 13 - Ribeirão Preto
#DRS 14 - São João da Boa Vista
#DRS 15 - São José do Rio Preto
#DRS 16 - Sorocaba
#DRS 17 - Taubaté

#select districts for plotting
districts4Plot=['DRS 01 - Grande São Paulo',
               'DRS 04 - Baixada Santista',
               'DRS 07 - Campinas',
               'DRS 05 - Barretos',
               districtRegion]

#main district region for analysis
#districtRegion = "DRS 01 - Grande São Paulo"

#Choose here your options
#opt=0 all plots
#opt=1 corona log plot
#opt=2 logistic model prediction
#opt=3 bar plot with growth rate
#opt=4 log plot + bar plot
#opt=5 SEAIR-D Model
opt = 5

#number of cases to start plotting model in log graph - real data = 100
startCase=1

In [None]:
#do not allow the scrolling of the plots

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines){
    return false;}

In [None]:
#plots one district or all districts
allDistricts=True

if allDistricts:
    for districtRegion in DRS:
        query = dfparam.query('DRS == "{}"'.format(districtRegion)).reset_index()
        ratio = query['RATIO'][0]
        startCase = query['START'][0]
        startdate = query['start-date'][0]
        predict_range = query['prediction-range'][0]
        
        #calcula data máxima dos gráficos
        #100 dias é usado como máximo dos cálculos da derivada das mortes
        lastDate=dfSP.date.max()
        maxDate= datetime.strptime(lastDate, "%Y-%m-%d") + timedelta(days = 100) #"2020-08-31"
        maxDateStr = maxDate.strftime("%Y-%m-%d")

        covid_plots.covid_plots(districtRegion, districts4Plot, startdate, predict_range, \
                        startCase, opt, version, show=True, ratio=ratio, maxDate=maxDateStr, model=model)
else: 
    query = dfparam.query('DRS == "{}"'.format(districtRegion)).reset_index()
    ratio = query['RATIO'][0]
    startdate = query['start-date'][0]
    predict_range = query['prediction-range'][0]
    startCase = query['START'][0]
    
    #calcula data máxima dos gráficos
    #100 dias é usado como máximo dos cálculos da derivada das mortes
    lastDate=dfSP.date.max()
    maxDate= datetime.strptime(lastDate, "%Y-%m-%d") + timedelta(days = 100) #"2020-08-31"
    maxDateStr = maxDate.strftime("%Y-%m-%d")    
    
    covid_plots.covid_plots(districtRegion, districts4Plot, startdate, predict_range, \
                       startCase, opt, version, show=True, ratio=ratio, maxDate=maxDateStr, model=model)