# COVID19 - Brazil States

# import Ray and test CUDA

In [1]:
from environs import Env
env = Env()
env.str("CUDA_DEVICE_ORDER",'PCI_BUS_ID')
env.int("CUDA_VISIBLE_DEVICES",512)
env.int("NUMBA_ENABLE_CUDASIM",1)
env.bool("OMPI_MCA_opal_cuda_support",True)

import os
import ray
MB=1024*1024
GB=MB*1024
ray.shutdown()
ray.init(object_store_memory=1*GB,memory=1*GB,lru_evict=True,\
         driver_object_store_memory=500*MB,num_gpus=None,num_cpus=1, ignore_reinit_error=True) # , include_webui=False, ignore_reinit_error=True)

# ray.init(lru_evict=True,object_store_memory=2*GB,memory=4*GB,
#          num_gpus=312,num_cpus=10, include_webui=False, ignore_reinit_error=True)

# ray.shutdown()
# ray.init(num_cpus=10,num_gpus=None)

@ray.remote(num_gpus=1)
def use_gpu():
    print("ray.get_gpu_ids(): {}".format(ray.get_gpu_ids()[0]))
    print("CUDA_VISIBLE_DEVICES: {}".format(os.environ["CUDA_VISIBLE_DEVICES"]))

use_gpu.remote()

2020-07-04 21:38:10,687	INFO resource_spec.py:212 -- Starting Ray with 0.98 GiB memory available for workers and up to 1.0 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-07-04 21:38:11,032	INFO services.py:1170 -- View the Ray dashboard at [1m[32mlocalhost:8269[39m[22m


ObjectID(45b95b1c8bd3a9c4ffffffff010000c801000000)

# Import Modules

In [2]:
import urllib.request
import pandas as pd
import numpy as np

# Download data

In [3]:
import get_data
LoadData=False

if LoadData:
    get_data.get_data()

In [4]:
df = pd.read_csv("data/dados_total_estados.csv")
df

Unnamed: 0,index,date,state,city,place_type,confirmed,deaths,order_for_place,is_last,popEst,city_ibge_code,confirmed_per_100k_inhabitants,death_rate
0,0,2020-07-04,AP,TOTAL,state,29809,441,106,True,845731.0,16.0,3524.64318,0.0148
1,1,2020-07-03,AP,TOTAL,state,29574,438,105,False,845731.0,16.0,3496.85657,0.0148
2,2,2020-07-02,AP,TOTAL,state,29153,427,104,False,845731.0,16.0,3447.07714,0.0146
3,3,2020-07-01,AP,TOTAL,state,28927,420,103,False,845731.0,16.0,3420.35470,0.0145
4,4,2020-06-30,AP,TOTAL,state,28492,417,102,False,845731.0,16.0,3368.91990,0.0146
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3001,262118,2020-02-29,SP,TOTAL,state,2,0,5,False,45919049.0,35.0,0.00436,0.0000
3002,262119,2020-02-28,SP,TOTAL,state,2,0,4,False,45919049.0,35.0,0.00436,0.0000
3003,262120,2020-02-27,SP,TOTAL,state,1,0,3,False,45919049.0,35.0,0.00218,0.0000
3004,262121,2020-02-26,SP,TOTAL,state,1,0,2,False,45919049.0,35.0,0.00218,0.0000


# Model

# SEAIR-D Model Equations

$$\begin{array}{l}\frac{d s}{d t}=-[\beta i(t) + \beta_2 a(t)-\mu] \cdot s(t)\\ 
\frac{d e}{d t}=[\beta i(t) + \beta_2 a(t)] \cdot s(t) -(\sigma+\mu) \cdot e(t)\\ 
\frac{d a}{d t}=\sigma e(t) \cdot (1-p)-(\gamma_2+\mu) \cdot a(t) \\
\frac{d i}{d t}=\sigma e(t) \cdot p - (\gamma + \sigma_2 + \sigma_3 + \mu) \cdot i(t)\\ 
\frac{d r}{d t}=(b + \sigma_2) \cdot i(t) + d \cdot a(t) - \mu \cdot r(t)\\
\frac{d k}{d t}=(a + \sigma_3 - \mu) \cdot d(t) + c \cdot a(t)
\end{array}$$

$$\gamma = a + b$$

$$\gamma_2 = c + d$$

$$p=0.2$$

The last equation does not need to be solve because:

$$s + e + i + a + r + k = N$$ 

$N$ is the total population of the region, state or country. If you derivate and by considering population constant during the period of simulation, you get the equation of $k$ without depending of the $N$.

$$\frac{d k}{d t}=-(\frac{d e}{d t}+\frac{d a}{d t}+\frac{d i}{d t}+\frac{d r}{d t})$$

The sum of all rates are equal to zero! The importance of this equation is that it conservates the rates.

## Parameters

$\beta$: Effective contact rate [1/min]
    
$\gamma$: from infected Recovery(+Mortality) rate $\gamma=(a+b)$ [1/min]

$\gamma_2$: from assymptomatic Recovery(+Mortality) rate $\gamma=(a+b)$ [1/min]

$a$: mortality of healed from infected  [1/min]

$b$: recovery rate from infected [1/min]

$c$: mortality of healed from assymptomatic [1/min]

$d$: recovery rate from assymptomatic [1/min]

$\sigma$: is the rate at which individuals move from the exposed to the infectious classes. Its reciprocal ($1/\sigma$) is the average latent (exposed) period.

$\sigma_2$: is the rate at which individuals move from the infectious to the healed classes. Its reciprocal ($1/\sigma_2$) is the average latent (exposed) period

$\sigma_3$: is the rate at which individuals move from the infectious to the dead classes. Its reciprocal ($1/\sigma_3$) is the average latent (exposed) period
    
$p$: is the fraction of the exposed which become symptomatic infectious sub-population.

$(1-p)$: is the fraction of the exposed which becomes asymptomatic infectious sub-population.

# Initial parameters

In [5]:
paramOpt=4
changeCSV=True
adjustParam=False
paramSave=False

if paramOpt==0:
    paramFile="data/param.csv"
    version = "1"
    model = "ManualIC"

if paramOpt==1:
    paramFile="data/param_optimized_DE_Scipy.csv"    
    version = "1002"
    model = "DE_ScipyIC"
    
if paramOpt==2:
    paramFile="data/param_optimized_DE_SciPy_HistMin.csv"
    version = "1002"
    model = "DE_ScipyIC"

if paramOpt==3:
    paramFile="data/param_optimized_Yabox.csv"    
    version = "104"
    model = "YaboxIC"
    
if paramOpt==4:
    paramFile="data/param_optimized_Yabox_HistMin.csv"
    version = "104"
    model = "YaboxIC"

if paramOpt==5:
    paramFile="data/param_optimized_Brute_Force.csv"
    version = "12"
    model = "BruteForceIC"

if paramOpt==6:
    paramFile="data/param_optimized_Brute_Force_HistMin.csv"
    version = "12"
    model = "BruteForceIC"

dfparam = pd.read_csv(paramFile)
dfparam = dfparam.dropna()
states=dfparam.state

if changeCSV:
    dfparam.loc[dfparam.state=='AM','d0'] = 20
    dfparam.loc[dfparam.state=='PA','d0'] = 50
    dfparam.loc[dfparam.state=='PI','d0'] = 0
    dfparam.loc[dfparam.state=='RR','d0'] = 0
    dfparam.loc[dfparam.state=='AP','d0'] = 0
    dfparam.loc[dfparam.state=='RJ','d0'] = 10
    dfparam.loc[dfparam.state=='PE','d0'] = 50
    dfparam.loc[dfparam.state=='MG','d0'] = 20
    dfparam.loc[dfparam.state=='SP','d0'] = 105
    dfparam.loc[dfparam.state=='SP','WCASES'] = 0.65
    dfparam.loc[dfparam.state=='PI','i0'] = 750
    dfparam.loc[dfparam.state=='PE','i0'] = 500
    dfparam.loc[dfparam.state=='RR','i0'] = 550
    dfparam.loc[dfparam.state=='AP','i0'] = 650

if paramSave:
    dfparam.to_csv(paramFile)    
    
if adjustParam:
    sCorrect=[1.25,1.4,1.3,1.25,1.2,1.45,1.45,1.2,1.0,1.0,1.1]
    dfparam.s0=dfparam.s0.multiply(sCorrect[:len(states)], axis=0)

dfparam

Unnamed: 0,state,start-date,prediction-range,s0,e0,a0,i0,r0,d0,START,RATIO,WCASES,WREC
0,SP,2020-03-15,200,11474640.0,0,0,599,300,105,100,0.15,0.65,0.019385
1,ES,2020-04-01,200,469728.6,0,0,44,250,50,100,0.15,0.639412,0.007067
2,MG,2020-04-01,200,1475001.0,0,0,251,250,20,100,0.15,0.612272,0.005835
3,RJ,2020-03-20,200,813015.1,0,0,624,250,10,100,0.15,0.635137,0.004899
4,CE,2020-03-20,200,664783.5,0,0,512,250,50,100,0.15,0.643318,0.025334
5,PE,2020-03-20,200,905455.4,0,0,500,250,50,100,0.15,0.664496,0.01213
6,AM,2020-03-20,200,896659.8,0,0,563,250,20,100,0.15,0.694524,0.016627
7,PA,2020-03-20,200,800540.6,0,0,929,250,50,100,0.15,0.498993,0.02414
8,PI,2020-03-20,200,886027.9,0,0,750,250,0,100,0.15,0.671877,0.048074
9,RR,2020-03-20,200,1037288.0,0,0,550,250,0,100,0.15,0.611768,0.046058


# Define States Array and Import Solver

In [6]:
display(states)
import ray
import cupy
from numba import cuda
ncpu=len(states)
# ray.init(num_cpus=ncpu)

# Load solver
%reload_ext autoreload
%autoreload 2
import LearnerYabox_v3 as Learner #Yabox

[2m[36m(pid=138783)[0m ray.get_gpu_ids(): 255
[2m[36m(pid=138783)[0m CUDA_VISIBLE_DEVICES: 255


0     SP
1     ES
2     MG
3     RJ
4     CE
5     PE
6     AM
7     PA
8     PI
9     RR
10    AP
Name: state, dtype: object

# Solve for States Array

In [7]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines){
    return true;}

<IPython.core.display.Javascript object>

In [8]:
allStates=False
cleanRecovered=False
version="20"

results=[]
if allStates:
    for state in states:        
        query = dfparam.query('state == "{}"'.format(state)).reset_index()
        parameters = np.array(query.iloc[:, 2:])[0]
        f=Learner.Learner.remote(state, *parameters, cleanRecovered, version)
        result = f.train.remote() 
        results.append(result)
else:
    state= "AM"
    query = dfparam.query('state == "{}"'.format(state)).reset_index()
    parameters = np.array(query.iloc[:, 2:])[0]
    parameters
    cleanRecovered = False
    f=Learner.Learner.remote(state, *parameters, cleanRecovered, version)
    result = f.train.remote() 
    results.append(result)

# #execute all the queue with max_runner_cap at a time    
results = ray.get(results)

  0%|          | 0/3500000 [00:00<?, ?it/s]
  0%|          | 0/3500000 [00:02<?, ?it/s]
  0%|          | 1/3500000 [00:03<1171:57:09,  1.21s/it]
  0%|          | 3/3500000 [00:05<998:00:39,  1.03s/it] 
  0%|          | 6/3500000 [00:06<818:43:05,  1.19it/s]
  0%|          | 10/3500000 [00:07<662:57:36,  1.47it/s]
  0%|          | 15/3500000 [00:08<537:48:14,  1.81it/s]
  0%|          | 21/3500000 [00:10<437:34:49,  2.22it/s]
  0%|          | 28/3500000 [00:11<356:44:42,  2.73it/s]
  0%|          | 36/3500000 [00:12<293:44:25,  3.31it/s]
  0%|          | 45/3500000 [00:13<245:14:56,  3.96it/s]
  0%|          | 55/3500000 [00:14<207:40:28,  4.68it/s]
  0%|          | 66/3500000 [00:16<178:23:48,  5.45it/s]
  0%|          | 78/3500000 [00:17<154:50:44,  6.28it/s]
  0%|          | 91/3500000 [00:18<135:31:37,  7.17it/s]
  0%|          | 105/3500000 [00:19<120:39:08,  8.06it/s]


KeyboardInterrupt: 

In [None]:
ray.shutdown()

# Plots

In [None]:
%matplotlib inline 
import matplotlib.pyplot as plt
import pandas as pd
#%load_ext autoreload
%reload_ext autoreload
%autoreload 2
import covid_plots

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines){
    return false;}

In [None]:
#select districts for plotting log plot
states4Plot=['MG',
               'SP',
               'ES',
               'CE',
               'RJ']

#Choose here your options
#opt=0 all plots
#opt=1 corona log plot
#opt=2 logistic model prediction
#opt=3 bar plot with growth rate
#opt=4 log plot + bar plot
#opt=5 SEAIR-D Model
opt = 5

#number of cases to start plotting model in log graph - real data = 100
startCase=100

In [None]:
#plots one district or all districts
from datetime import datetime,timedelta
dfparam = pd.read_csv(paramFile)
plotStates=False

#select states for plotting
#you need to run (solve) it before plotting
if plotStates:
    states=dfparam.state
    allStates=True

#main state for plotting log graph or SEAIRD plot
state="SP"

#if it is not SEAIRD plot, so plot one state only
if not opt==5:
    AllStates=False

if allStates:
    for state in states:
        query = dfparam.query('state == "{}"'.format(state)).reset_index()
        ratio = query['RATIO'][0]
        startCase = query['START'][0]
        startdate = query['start-date'][0]
        predict_range = query['prediction-range'][0]
        
        #calcula data máxima dos gráficos
        #100 dias é usado como máximo dos cálculos da derivada das mortes
        lastDate=df.date.max()
        maxDate= datetime.strptime(lastDate, '%Y-%m-%d') + timedelta(days = 100) #"2020-08-31"
        maxDateStr = maxDate.strftime("%Y-%m-%d")

        covid_plots.covid_plots(state, states4Plot, startdate, predict_range, \
                        startCase, 5, version, show=True, ratio=ratio, maxDate=maxDateStr,model=model)
else: 
    query = dfparam.query('state == "{}"'.format(state)).reset_index()
    ratio = query['RATIO'][0]
    startdate = query['start-date'][0]
    predict_range = query['prediction-range'][0]
    startCase = query['START'][0]
    
    #calcula data máxima dos gráficos
    #100 dias é usado como máximo dos cálculos da derivada das mortes
    lastDate=df.date.max()
    maxDate= datetime.strptime(lastDate, '%Y-%m-%d') + timedelta(days = 100) #"2020-08-31"
    maxDateStr = maxDate.strftime("%Y-%m-%d")    
    
    covid_plots.covid_plots(state, states4Plot, startdate,predict_range, \
                       startCase, opt, version, show=True, ratio=ratio, maxDate=maxDateStr,model=model)