# Optimize Initial Conditions
## Yabox

In [1]:
import numpy as np
from datetime import datetime,timedelta
import pandas as pd
from yabox import DE

# Initialize Ray

In [2]:
from environs import Env
env = Env()
env.str("CUDA_DEVICE_ORDER",'PCI_BUS_ID')
env.int("CUDA_VISIBLE_DEVICES",1)
env.int("NUMBA_ENABLE_CUDASIM",1)
env.bool("OMPI_MCA_opal_cuda_support",True)

import os
import ray
MB=1024*1024
GB=MB*1024
ray.shutdown()
ray.init(object_store_memory=1*GB,memory=220*GB,
         #lru_evict=True,
         driver_object_store_memory=500*MB,num_gpus=1,num_cpus=1,
         ignore_reinit_error=True) # , include_webui=False, ignore_reinit_error=True)

@ray.remote(num_gpus=1)
def use_gpu():
    print("ray.get_gpu_ids(): {}".format(ray.get_gpu_ids()[0]))
    print("CUDA_VISIBLE_DEVICES: {}".format(os.environ["CUDA_VISIBLE_DEVICES"]))

use_gpu.remote()

2020-09-28 13:31:52,477	INFO resource_spec.py:212 -- Starting Ray with 219.97 GiB memory available for workers and up to 1.0 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-09-28 13:31:52,798	INFO services.py:1170 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


ObjectID(45b95b1c8bd3a9c4ffffffff010000c801000000)

# Load New and Process Data from website data.brasil.io

In [3]:
%reload_ext autoreload
%autoreload 2
import get_data
LoadData=True

if LoadData:
    get_data.get_data()

[2m[36m(pid=18428)[0m ray.get_gpu_ids(): 0
[2m[36m(pid=18428)[0m CUDA_VISIBLE_DEVICES: 0


# Functions to Load Processed Data

In [4]:
def load_confirmed(country,start_date=None):
    df = pd.read_csv('data/time_series_19-covid-Confirmed-country.csv')
    country_df = df[df['Country/Region'] == country]
    if start_date==None:
        return country_df.iloc[0]
    else: 
        return country_df.iloc[0].loc[start_date:]


def load_recovered(country,start_date=None):
    df = pd.read_csv('data/time_series_19-covid-Recovered-country.csv')
    country_df = df[df['Country/Region'] == country]
    if start_date==None:
        return country_df.iloc[0]
    else: 
        return country_df.iloc[0].loc[start_date:]

def load_dead(country,start_date=None):
    df = pd.read_csv('data/time_series_19-covid-Deaths-country.csv')
    country_df = df[df['Country/Region'] == country]
    if start_date==None:
        return country_df.iloc[0]
    else: 
        return country_df.iloc[0].loc[start_date:]

# Load solver

In [5]:
%reload_ext autoreload
%autoreload 2
import LearnerICRayNoLoadBH as L 

# Data for Countries

In [6]:
dfparam = pd.read_csv("data/param.csv")
countries=dfparam.country
popEst = pd.read_csv("data/WPP2019_TotalPopulationBySex.csv")
popEst.popTotal=pd.to_numeric(popEst.PopTotal, errors='coerce')

for country in countries:
    if country=="US":
        country2="United States of America"    
    else:
        country2=country
    dfparam.loc[dfparam.country==country,'popTotal']=popEst.loc[popEst.Location==country2].loc[popEst.Time==2020].iloc[0,8]*1000
    dfparam.loc[dfparam.country==country,'s0']=popEst.loc[popEst.Location==country2].loc[popEst.Time==2020].iloc[0,8]*1000*0.3
    
display(dfparam)
    

  after removing the cwd from sys.path.


Unnamed: 0,country,start-date,prediction-range,s0,e0,a0,i0,r0,d0,START,WCASES,WREC,popTotal
0,Brazil,3/2/20,200,63767822.7,0.0001,0.0001,200,100,50,50,0.15,0.05,212559400.0
1,China,1/28/20,200,431797132.2,0.0001,0.0001,200,100,50,50,0.15,0.05,1439324000.0
2,Italy,2/28/20,200,18138548.4,0.0001,0.0001,200,100,50,50,0.15,0.05,60461830.0
3,US,2/20/20,200,99300794.1,0.0001,0.0001,200,100,50,50,0.15,0.05,331002600.0
4,India,3/10/20,200,414001315.5,0.0001,0.0001,200,100,50,50,0.15,0.05,1380004000.0


# Functions for Optimization

In [7]:
from scipy.integrate import odeint
import sys
import io
import gc

def create_f(country,e0,a0,r0,date, predict_range, version):
                
    def fobjective(point):
        
        dead=  load_dead(country,date)
        recovered = load_recovered(country,date)
        data = load_confirmed(country,date)-recovered-dead
        cleanRecovered=False
        s0, deltaDate, i0, d0, startNCases, weigthCases, weigthRecov = point
        end_date=datetime.strptime(date, "%m/%d/%y") + timedelta(days=deltaDate)
        f=L.Learner.remote(country, end_date.strftime("%m/%d/%y"), predict_range,\
                           s0, e0, a0, i0, r0, d0, startNCases, weigthCases, weigthRecov, \
                           cleanRecovered, version, data, dead, recovered, savedata=False)
        result = f.train.remote() 
        result = ray.get(result) 

        del end_date,cleanRecovered, data, dead, point,f         

        gc.collect()

        return result
    return fobjective

In [8]:
@ray.remote(memory=50 * 1024 * 1024, max_calls=1)
def opt(country,s0,i0,e0,a0,r0,d0,wcases,wrec,date,startNCases, 
        predict_range, version):

    bounds=[(5e3,s0),(-2,2),(0,350), (0,250), (0,150),\
              (0.1,0.35),(0.001,.2)]
    maxiterations=500
    f=create_f(country,e0,a0,r0,date, predict_range, version)
    de = DE(f, bounds, maxiters=maxiterations)
    for step in de.geniterator():
        try:
            idx = step.best_idx
            norm_vector = step.population[idx]
            best_params = de.denormalize([norm_vector])
            del norm_vector, idx
        except:
            print("error in function evaluation")
    p=best_params[0]
    del f, bounds, data, dead,best_params
    
    return p

# Main Code

In [9]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines){
    return true;}

<IPython.core.display.Javascript object>

In [10]:
countries=dfparam.country
display(countries)
allCountries=True
version="003"
gc.enable()

optimal=[]
if allCountries:
    for country in countries:
        #remove previous history file
        strFile='./results/history_'+country+version+'.csv'
        if os.path.isfile(strFile):
            os.remove(strFile)
        query = dfparam.query('country == "{}"'.format(country)).reset_index()
        parameters = np.array(query.iloc[:, 2:])[0]
        date,predict_range,s0,e0,a0,i0,r0,d0,startNCases,wcases,wrec,pop = parameters
        optimal.append(opt.remote(country,s0,i0,e0,a0,r0,d0,wcases,wrec,date,startNCases, 
                                            predict_range, version))        
else:
    country = "Brazil" 
    #remove previous history file
    strFile='./results/history_'+country+version+'.csv'
    if os.path.isfile(strFile):
        os.remove(strFile)
    query = dfparam.query('country == "{}"'.format(country)).reset_index()
    parameters = np.array(query.iloc[:, 2:])[0]
    date,predict_range,s0,e0,a0,i0,r0,d0,startNCases,wcases,wrec,pop = parameters
    optimal.append(opt.remote(country,s0,i0,e0,a0,r0,d0,wcases,wrec,date,startNCases, 
                        predict_range, version))        

0    Brazil
1     China
2     Italy
3        US
4     India
Name: country, dtype: object



In [None]:
optimal=ray.get(optimal)




[2m[36m(pid=18473)[0m basinhopping step 0: f -7.82403e+12
[2m[36m(pid=18473)[0m basinhopping step 1: f -7.82403e+12 trial_f 3.99631e+13 accepted 0  lowest_f -7.82403e+12
[2m[36m(pid=18525)[0m basinhopping step 0: f -2.61395e+13
[2m[36m(pid=18499)[0m basinhopping step 0: f -6.62631e+15
[2m[36m(pid=18577)[0m basinhopping step 0: f 7.88185e+10
[2m[36m(pid=18525)[0m basinhopping step 1: f -4.67517e+13 trial_f -4.67517e+13 accepted 1  lowest_f -4.67517e+13
[2m[36m(pid=18525)[0m found new global minimum on step 1 with function value -4.67517e+13
[2m[36m(pid=18473)[0m basinhopping step 2: f -7.82403e+12 trial_f 3.99631e+13 accepted 0  lowest_f -7.82403e+12
[2m[36m(pid=18473)[0m basinhopping step 3: f -7.82403e+12 trial_f 3.99632e+13 accepted 0  lowest_f -7.82403e+12
[2m[36m(pid=18525)[0m basinhopping step 2: f -4.67517e+13 trial_f 2.61302e+09 accepted 0  lowest_f -4.67517e+13
[2m[36m(pid=18525)[0m basinhopping step 3: f -4.67517e+13 trial_f 3.04782e+09 accepte

[2m[36m(pid=18473)[0m basinhopping step 17: f -7.82403e+12 trial_f 7.48025e+11 accepted 0  lowest_f -7.82403e+12
[2m[36m(pid=18551)[0m basinhopping step 11: f -7.16687e+12 trial_f 8.56668e+11 accepted 0  lowest_f -7.16687e+12
[2m[36m(pid=18551)[0m basinhopping step 12: f -7.16687e+12 trial_f 2.8018e+13 accepted 0  lowest_f -7.16687e+12
[2m[36m(pid=18473)[0m basinhopping step 18: f -2.03237e+13 trial_f -2.03237e+13 accepted 1  lowest_f -2.03237e+13
[2m[36m(pid=18473)[0m found new global minimum on step 18 with function value -2.03237e+13
[2m[36m(pid=18525)[0m basinhopping step 13: f -6.9352e+13 trial_f 6.13624e+11 accepted 0  lowest_f -6.9352e+13
[2m[36m(pid=18551)[0m basinhopping step 13: f -7.16687e+12 trial_f 1.23327e+12 accepted 0  lowest_f -7.16687e+12
[2m[36m(pid=18577)[0m basinhopping step 9: f -5.63367e+15 trial_f 2.29093e+12 accepted 0  lowest_f -5.63367e+15
[2m[36m(pid=18525)[0m basinhopping step 14: f -6.9352e+13 trial_f 3.04433e+09 accepted 0  lowe

[2m[36m(pid=18473)[0m basinhopping step 30: f -2.03237e+13 trial_f 1.28495e+11 accepted 0  lowest_f -2.03237e+13
[2m[36m(pid=18551)[0m basinhopping step 25: f -1.44444e+13 trial_f 1.01805e+12 accepted 0  lowest_f -1.44444e+13
[2m[36m(pid=18551)[0m basinhopping step 26: f -1.44444e+13 trial_f -2.96742e+12 accepted 0  lowest_f -1.44444e+13
[2m[36m(pid=18577)[0m basinhopping step 19: f -5.63367e+15 trial_f 1.53858e+13 accepted 0  lowest_f -5.63367e+15
[2m[36m(pid=18499)[0m basinhopping step 27: f -6.62631e+15 trial_f 1.09125e+09 accepted 0  lowest_f -6.62631e+15
[2m[36m(pid=18551)[0m basinhopping step 27: f -1.44444e+13 trial_f 1.82486e+16 accepted 0  lowest_f -1.44444e+13
[2m[36m(pid=18473)[0m basinhopping step 31: f -2.03237e+13 trial_f 6.62414e+10 accepted 0  lowest_f -2.03237e+13
[2m[36m(pid=18525)[0m basinhopping step 24: f -6.9352e+13 trial_f -6.60048e+13 accepted 0  lowest_f -6.9352e+13
[2m[36m(pid=18551)[0m basinhopping step 28: f -1.44444e+13 trial_f 1.

In [None]:
for i in range(0,len(countries)):    

    country=countries[i]
    query = dfparam.query('country == "{}"'.format(country)).reset_index()
    parameters = np.array(query.iloc[:, 2:])[0]
    startdate,predict_range,s0,e0,a0,i0,r0,d0,startNCases,wcases,wrec = parameters
    Date = datetime.strptime(startdate, "%m/%d/%y")
    end_date = Date + timedelta(days=+int(optimal[i][1]))
    dateStr=end_date.strftime("%m/%d/%y")

    j = query['index'].values[0]
    dfparam.at[j, "s0"] = optimal[i][0]
    dfparam.at[j, "start-date"] = dateStr
    dfparam.at[j, "i0"] = optimal[i][2]
    dfparam.at[j, "WCASES"] = optimal[i][3]
    dfparam.at[j, "WREC"] = optimal[i][4]

    dfparam.to_csv("data/param_optimized_Yabox.csv", sep=",", index=False)
    dfparam
    