# Optimize Initial Conditions
## Yabox

In [1]:
import numpy as np
from datetime import datetime,timedelta
import pandas as pd
from yabox import DE

# Initialize Ray

In [2]:
from environs import Env
env = Env()
env.str("CUDA_DEVICE_ORDER",'PCI_BUS_ID')
env.str("CUDA_VISIBLE_DEVICES","96")
env.int("NUMBA_ENABLE_CUDASIM",1)
env.bool("OMPI_MCA_opal_cuda_support",True)

import os
import ray
MB=1024*1024
GB=MB*1024
ray.shutdown()
ray.init(num_gpus=96,num_cpus=17,
         ignore_reinit_error=True)

@ray.remote(num_gpus=1)
def use_gpu():
    print("ray.get_gpu_ids(): {}".format(ray.get_gpu_ids()[0]))
    print("CUDA_VISIBLE_DEVICES: {}".format(os.environ["CUDA_VISIBLE_DEVICES"]))

use_gpu.remote()

2021-03-02 11:27:48,872	INFO services.py:1092 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8268[39m[22m


ObjectRef(df5a1a828c9685d3ffffffff0100000001000000)

# Load New and Process Data from website data.brasil.io

In [3]:
%reload_ext autoreload
%autoreload 2
import get_data_v2 as gd
LoadData=True

if LoadData:
    gd.get_data()

Baixando arquivos brasil.io...
[2m[36m(pid=36836)[0m ray.get_gpu_ids(): 95
[2m[36m(pid=36836)[0m CUDA_VISIBLE_DEVICES: 95
   state      popEst
0     AC    881935.0
1     AL   3337357.0
2     AM   4144597.0
3     AP    845731.0
4     BA  14873064.0
5     CE   9132078.0
6     DF   3015268.0
7     ES   4018650.0
8     GO   7018354.0
9     MA   7075181.0
10    MG  21168791.0
11    MS   2778986.0
12    MT   3484466.0
13    PA   8602865.0
14    PB   4018127.0
15    PE   9557071.0
16    PI   3273227.0
17    PR  11433957.0
18    RJ  17264943.0
19    RN   3506853.0
20    RO   1777225.0
21    RR    605761.0
22    RS  11377239.0
23    SC   7164788.0
24    SE   2298696.0
25    SP  45919049.0
26    TO   1572866.0


# Functions to Load Processed Data

In [4]:
def load_confirmed(districtRegion, start_date):
    dateparse = lambda x: datetime.strptime(x, '%Y-%m-%d')
    df = pd.read_csv('./data/confirmados.csv',delimiter=',',parse_dates=True, date_parser=dateparse)
    y=[]
    x=[]
    start=datetime.strptime(start_date, "%Y-%m-%d")+timedelta(days=40)
    start2=start.strftime("%Y-%m-%d")
    for i in range(0,len(df.date)):
        y.append(df[districtRegion].values[i])
        x.append(df.date.values[i])
    df2=pd.DataFrame(data=y,index=x,columns=[""])
    df2 =df2.apply (pd.to_numeric, errors='coerce')
    df2[start2:] = df2[start2:].replace({0:np.nan})
    df2 = df2.dropna()
    df2.index = pd.DatetimeIndex(df2.index)
    #interpolate missing data
    df2 = df2.reindex(pd.date_range(df2.index.min(), df2.index.max()), fill_value=np.nan)
    df2 = df2.interpolate(method='akima', axis=0).ffill().bfill()
    #string type for dates and integer for data
    df2 = df2.astype(int)
    df2.index = df2.index.astype(str)
    #select dates
    df2=df2[start_date:]
    del x,y,df,dateparse
    return df2

def load_dead(districtRegion, start_date):
    dateparse = lambda x: datetime.strptime(x, '%Y-%m-%d')
    df = pd.read_csv('./data/mortes.csv',delimiter=',',parse_dates=True, date_parser=dateparse)
    y=[]
    x=[]
    start=datetime.strptime(start_date, "%Y-%m-%d")+timedelta(days=40)
    start2=start.strftime("%Y-%m-%d")
    for i in range(0,len(df.date)):
        y.append(df[districtRegion].values[i])
        x.append(df.date.values[i])
    df2=pd.DataFrame(data=y,index=x,columns=[""])
    df2 =df2.apply (pd.to_numeric, errors='coerce')
    df2[start2:] = df2[start2:].replace({0:np.nan})
    df2 = df2.dropna()
    df2.index = pd.DatetimeIndex(df2.index)
    #interpolate missing data
    df2 = df2.reindex(pd.date_range(df2.index.min(), df2.index.max()), fill_value=np.nan)
    df2 = df2.interpolate(method='akima', axis=0).ffill().bfill()
    #string type for dates and integer for data
    df2 = df2.astype(int)
    df2.index = df2.index.astype(str)
    #select dates
    df2=df2[start_date:]
    del x,y,df,dateparse
    return df2

# Load solver

In [5]:
%reload_ext autoreload
%autoreload 2
import LearnerICRayNoLoadBH_v2 as L 

# Data for States

In [6]:
dfparam = pd.read_csv("data/param.csv")
dfPopEst = pd.read_csv("data/popEst.csv")
dfparam['popEst']=dfPopEst.popEst
dfparam['RATIO']=0
display(dfparam)

Unnamed: 0,state,start-date,prediction-range,s0,e0,a0,i0,r0,d0,START,RATIO,WCASES,WREC,popEst
0,SP,2020-03-15,200,8000000.0,0,0,800,300,250,100,0,0.55,0.01,881935.0
1,ES,2020-04-01,200,475000.0,0,0,50,250,50,100,0,0.65,0.01,3337357.0
2,MG,2020-04-01,200,1100000.0,0,0,200,250,40,100,0,0.55,0.01,4144597.0
3,RJ,2020-03-20,200,700000.0,0,0,800,250,50,100,0,0.5,0.01,845731.0
4,CE,2020-03-20,200,800000.0,0,0,800,250,50,100,0,0.5,0.01,14873064.0
5,PE,2020-03-20,200,700000.0,0,0,800,250,100,100,0,0.5,0.01,9132078.0
6,AM,2020-03-20,200,700000.0,0,0,800,250,100,100,0,0.5,0.01,3015268.0
7,PA,2020-03-20,200,700000.0,0,0,800,250,100,100,0,0.5,0.01,4018650.0
8,PI,2020-03-20,200,700000.0,0,0,800,250,100,100,0,0.5,0.01,7018354.0
9,RR,2020-03-20,200,700000.0,0,0,800,250,100,100,0,0.5,0.01,7075181.0


In [7]:
df = pd.read_csv("data/dados_total_estados.csv",compression='gzip')
df

Unnamed: 0,index,date,state,city,place_type,confirmed,deaths,order_for_place,is_last,popEst,estimated_population,city_ibge_code,confirmed_per_100k_inhabitants,death_rate
0,0,2021-03-01,AP,TOTAL,state,83885,1142,343,True,845731.0,861773.0,16.0,9734.00188,0.0136
1,1,2021-02-28,AP,TOTAL,state,83663,1140,342,False,845731.0,861773.0,16.0,9708.24103,0.0136
2,2,2021-02-27,AP,TOTAL,state,83505,1139,341,False,845731.0,861773.0,16.0,9689.90674,0.0136
3,3,2021-02-26,AP,TOTAL,state,83279,1136,340,False,845731.0,861773.0,16.0,9663.68174,0.0136
4,4,2021-02-25,AP,TOTAL,state,83062,1135,339,False,845731.0,861773.0,16.0,9638.50109,0.0137
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9448,1369627,2020-02-29,SP,TOTAL,state,2,0,5,False,45919049.0,46289333.0,35.0,0.00432,0.0000
9449,1369628,2020-02-28,SP,TOTAL,state,2,0,4,False,45919049.0,46289333.0,35.0,0.00432,0.0000
9450,1369629,2020-02-27,SP,TOTAL,state,1,0,3,False,45919049.0,46289333.0,35.0,0.00216,0.0000
9451,1369630,2020-02-26,SP,TOTAL,state,1,0,2,False,45919049.0,46289333.0,35.0,0.00216,0.0000


# Functions for Optimization

In [8]:
from scipy.integrate import odeint
import sys
import io
import gc

def create_f(state,e0,a0,r0,date, ratio, predict_range, version):
                
    def fobjective(point):
        
        cleanRecovered=False
        s0, deltaDate, i0, d0, startNCases, weigthCases = point
        weigthRecov=0
        ratio=0
        dead = load_dead(state,date)
        data = load_confirmed(state,date)*(1-ratio)-dead
        end_date=datetime.strptime(date, "%Y-%m-%d") + timedelta(days=deltaDate)
        f=L.Learner.remote(state, end_date.strftime("%Y-%m-%d"), predict_range,\
                           s0, e0, a0, i0, r0, d0, startNCases, ratio, weigthCases, weigthRecov, \
                           cleanRecovered, version, data, dead, savedata=False)
        result = f.train.remote() 
        result = ray.get(result) 

        del end_date,cleanRecovered, point,f         

        gc.collect()

        return result
    return fobjective

In [9]:
@ray.remote(num_cpus=0,num_gpus=4) #, max_calls=1)
def opt(state,s0,i0,e0,a0,r0,d0,wcases,date,startNCases, 
        ratio, predict_range, popEst, version):

    bounds=[(0.3*popEst,0.99*popEst),(-4,4),(0,250), (0,250), (0,250),\
              (0.19,0.85)]
    maxiterations=500
    runDate=datetime.today()
    f=create_f(state,e0,a0,r0,date, ratio, predict_range, version)
    de = DE(f, bounds, maxiters=maxiterations)
    for step in de.geniterator():
        try:
            idx = step.best_idx
            norm_vector = step.population[idx]
            best_params = de.denormalize([norm_vector])
            del norm_vector, idx
        except:
            print("error in function evaluation")
    p=best_params[0]
    
    return p

# Main Code

In [10]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines){
    return true;}

<IPython.core.display.Javascript object>

In [11]:
states=dfparam.state
display(states)
version="119"
gc.enable()

optimal=[]
for state in states:
    #remove previous history file
    strFile='./results/history_'+state+version+'.csv'
    if os.path.isfile(strFile):
        os.remove(strFile)
    query = dfparam.query('state == "{}"'.format(state)).reset_index()
    parameters = np.array(query.iloc[:, 2:])[0]
    date,predict_range,s0,e0,a0,i0,r0,d0,startNCases,ratio,wcases,wrec,popEst = parameters
    optimal.append(opt.remote(state,s0,i0,e0,a0,r0,d0,wcases,date,startNCases, \
                              ratio, predict_range, popEst, version))           

0     SP
1     ES
2     MG
3     RJ
4     CE
5     PE
6     AM
7     PA
8     PI
9     RR
10    AP
Name: state, dtype: object

In [None]:
optimal=ray.get(optimal)



[2m[36m(pid=36825)[0m basinhopping step 0: f 1.72614e+10
[2m[36m(pid=36823)[0m basinhopping step 0: f 1.67341e+10
[2m[36m(pid=1818)[0m basinhopping step 0: f 2.72282e+10
[2m[36m(pid=36825)[0m basinhopping step 1: f 1.72614e+10 trial_f 1.7395e+10 accepted 0  lowest_f 1.72614e+10
[2m[36m(pid=36824)[0m basinhopping step 0: f 1.57183e+10
[2m[36m(pid=1814)[0m basinhopping step 0: f 4.0469e+10
[2m[36m(pid=1818)[0m basinhopping step 1: f 2.72282e+10 trial_f 2.77134e+10 accepted 0  lowest_f 2.72282e+10




[2m[36m(pid=1814)[0m basinhopping step 1: f 4.0469e+10 trial_f 6.15223e+10 accepted 0  lowest_f 4.0469e+10
[2m[36m(pid=36825)[0m basinhopping step 2: f 1.72614e+10 trial_f 2.12575e+10 accepted 0  lowest_f 1.72614e+10
[2m[36m(pid=36825)[0m basinhopping step 3: f 1.72614e+10 trial_f 1.75066e+10 accepted 0  lowest_f 1.72614e+10
[2m[36m(pid=36803)[0m basinhopping step 0: f 2.46152e+11
[2m[36m(pid=1832)[0m basinhopping step 0: f 2.20545e+10
[2m[36m(pid=36825)[0m basinhopping step 4: f 1.72614e+10 trial_f 1.7291e+10 accepted 0  lowest_f 1.72614e+10
[2m[36m(pid=36825)[0m basinhopping step 5: f 1.72614e+10 trial_f 1.75066e+10 accepted 0  lowest_f 1.72614e+10
[2m[36m(pid=1832)[0m basinhopping step 1: f 2.20241e+10 trial_f 2.20241e+10 accepted 1  lowest_f 2.20241e+10
[2m[36m(pid=1832)[0m found new global minimum on step 1 with function value 2.20241e+10
[2m[36m(pid=36804)[0m basinhopping step 0: f 7.93707e+09
[2m[36m(pid=36824)[0m basinhopping step 1: f 1.57183e+

In [None]:
for i in range(0,len(states)):    

    state=states[i]
    query = dfparam.query('state == "{}"'.format(state)).reset_index()
    parameters = np.array(query.iloc[:, 2:])[0]
    startdate,predict_range,s0,e0,a0,i0,r0,d0,startNCases,ratio,wcases,wrec = parameters
    Date = datetime.strptime(startdate, "%Y-%m-%d")
    end_date = Date + timedelta(days=+int(optimal[i][1]))
    dateStr=end_date.strftime("%Y-%m-%d")

    j = query['index'].values[0]
    dfparam.at[j, "s0"] = optimal[i][0]
    dfparam.at[j, "start-date"] = dateStr
    dfparam.at[j, "i0"] = optimal[i][2]
    dfparam.at[j, "WCASES"] = optimal[i][3]
    dfparam.at[j, "WREC"] = optimal[i][4]

    dfparam.to_csv("data/param_optimized_Yabox.csv", sep=",", index=False)
    dfparam
    