# Optimize Initial Conditions
## Yabox

In [1]:
import numpy as np
from datetime import datetime,timedelta
import pandas as pd
from yabox import DE

# Initialize Ray

In [2]:
from environs import Env
env = Env()
env.str("CUDA_DEVICE_ORDER",'PCI_BUS_ID')
env.int("CUDA_VISIBLE_DEVICES",1)
env.int("NUMBA_ENABLE_CUDASIM",1)
env.bool("OMPI_MCA_opal_cuda_support",True)

import os
import ray
MB=1024*1024
GB=MB*1024
ray.shutdown()
ray.init(object_store_memory=1*GB,memory=220*GB,
         lru_evict=True,
         driver_object_store_memory=500*MB,num_gpus=5,num_cpus=1,
         ignore_reinit_error=True) # , include_webui=False)

@ray.remote(num_gpus=1)
def use_gpu():
    print("ray.get_gpu_ids(): {}".format(ray.get_gpu_ids()[0]))
    print("CUDA_VISIBLE_DEVICES: {}".format(os.environ["CUDA_VISIBLE_DEVICES"]))

use_gpu.remote()

2020-10-29 15:17:00,893	INFO resource_spec.py:212 -- Starting Ray with 219.97 GiB memory available for workers and up to 1.0 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-10-29 15:17:01,206	INFO services.py:1170 -- View the Ray dashboard at [1m[32mlocalhost:8266[39m[22m


ObjectID(45b95b1c8bd3a9c4ffffffff010000c801000000)

# Load New and Process Data from website data.brasil.io

In [3]:
%reload_ext autoreload
%autoreload 2
import get_data
LoadData=True

if LoadData:
    get_data.get_data()

[2m[36m(pid=33996)[0m ray.get_gpu_ids(): 4
[2m[36m(pid=33996)[0m CUDA_VISIBLE_DEVICES: 4


# Functions to Load Processed Data

In [4]:
def load_confirmed(country,start_date=None,end_date=None):
    df = pd.read_csv('data/time_series_19-covid-Confirmed-country.csv')
    country_df = df[df['Country/Region'] == country]
    if start_date==None:
        return country_df.iloc[0]
    else: 
        return country_df.iloc[0].loc[start_date:end_date]

def load_recovered(country,start_date=None,end_date=None):
    df = pd.read_csv('data/time_series_19-covid-Recovered-country.csv')
    country_df = df[df['Country/Region'] == country]
    if start_date==None:
        return country_df.iloc[0]
    else: 
        return country_df.iloc[0].loc[start_date:end_date]

def load_dead(country,start_date=None,end_date=None):
    df = pd.read_csv('data/time_series_19-covid-Deaths-country.csv')
    country_df = df[df['Country/Region'] == country]
    if start_date==None:
        return country_df.iloc[0]
    else: 
        return country_df.iloc[0].loc[start_date:end_date]

# Load solver

In [5]:
%reload_ext autoreload
%autoreload 2
import LearnerICRayNoLoadBH_v4NewModel as L 

# Data for Countries

In [6]:
dfparam = pd.read_csv("data/param.csv")
countries=dfparam.country
popEst = pd.read_csv("data/WPP2019_TotalPopulationBySex.csv")
popEst['popTotal']=pd.to_numeric(popEst.PopTotal, errors='coerce')

for country in countries:
    if country=="US":
        country2="United States of America"    
    else:
        country2=country
    dfparam.loc[dfparam.country==country,'popTotal']=popEst.loc[popEst.Location==country2].loc[popEst.Time==2020].iloc[0,8]*1000
    dfparam.loc[dfparam.country==country,'s0']=popEst.loc[popEst.Location==country2].loc[popEst.Time==2020].iloc[0,8]*1000
    
display(dfparam)
    

Unnamed: 0,country,start-date,prediction-range,s0,e0,a0,i0,r0,d0,START,WCASES,WREC,WDTH,popTotal
0,Brazil,3/2/20,200,212559400.0,0.0001,0.0001,200,100,50,50,0.15,0.05,0.8,212559400.0
1,China,1/28/20,200,1439324000.0,0.0001,0.0001,200,100,50,50,0.15,0.05,0.8,1439324000.0
2,Italy,2/28/20,200,60461830.0,0.0001,0.0001,200,100,50,50,0.15,0.05,0.8,60461830.0
3,US,2/20/20,200,331002600.0,0.0001,0.0001,200,100,50,50,0.15,0.05,0.8,331002600.0
4,India,3/10/20,200,1380004000.0,0.0001,0.0001,200,100,50,50,0.15,0.05,0.8,1380004000.0


# Functions for Optimization

In [7]:
from scipy.integrate import odeint
import sys
import io
import gc

def create_f(country,e0,a0,date, end_dateFirstWave, predict_range, version):
                
    def fobjective(point):
        
        dead=  load_dead(country,date, end_dateFirstWave)
        recovered = load_recovered(country,date, end_dateFirstWave)
        data = load_confirmed(country,date, end_dateFirstWave)-recovered-dead
        cleanRecovered=False
        s0, deltaDate, i0, d0, r0, startNCases  = point
        end_date=datetime.strptime(date, "%m/%d/%y") + timedelta(days=deltaDate)
        f=L.Learner.remote(country, end_date.strftime("%m/%d/%y"), predict_range,\
                           s0, e0, a0, i0, r0, d0, startNCases, \
                           cleanRecovered, version, data, dead, recovered, savedata=False)
        result = f.train.remote() 
        result = ray.get(result) 

        del end_date,cleanRecovered, data, dead, point,f         

        gc.collect()

        return result
    return fobjective

In [8]:
@ray.remote(memory=50 * 1024 * 1024, max_calls=1)
def opt(country,s0,i0,e0,a0,r0,d0,date,end_date,startNCases, 
        predict_range, version):

    bounds=[(5e3,s0),(-2,2),(0,250), (0,250),(0,250),(0,250)]
    maxiterations=500
    f=create_f(country,e0,a0,date,end_date, predict_range, version)
    de = DE(f, bounds, maxiters=maxiterations)
    for step in de.geniterator():
        try:
            idx = step.best_idx
            norm_vector = step.population[idx]
            best_params = de.denormalize([norm_vector])
            del norm_vector, idx
        except:
            print("error in function evaluation")
    p=best_params[0]
    del f, bounds, data, dead,best_params
    
    return p

# Main Code

In [9]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines){
    return true;}

<IPython.core.display.Javascript object>

In [10]:
countries=dfparam.country
display(countries)
version="015"
gc.enable()
firstWave=['10/1/20','6/1/20','8/1/20','10/25/20','10/25/20']

optimal=[]
i=0

for country in countries:
    #remove previous history file
    strFile='./results/history_'+country+version+'.csv'
    if os.path.isfile(strFile):
        os.remove(strFile)
    query = dfparam.query('country == "{}"'.format(country)).reset_index()
    parameters = np.array(query.iloc[:, 2:])[0]
    endDate = datetime.strptime(firstWave[i], '%m/%d/%y')
    end_date= datetime.strftime(endDate, '%-m/%-d/%y')
    date,predict_range,s0,e0,a0,i0,r0,d0,startNCases,wcases,wrec,wdth, pop = parameters
    optimal.append(opt.remote(country,s0,i0,e0,a0,r0,d0,date,end_date,startNCases, 
                                        predict_range, version)) 
    i+=1        

0    Brazil
1     China
2     Italy
3        US
4     India
Name: country, dtype: object

In [11]:
optimal=ray.get(optimal)


RayTaskError(TypeError): [36mray::__main__.opt()[39m (pid=33996, ip=192.168.0.104)
  File "/home/ats4i/anaconda3/envs/geo_env/lib/python3.6/inspect.py", line 2997, in bind
    return args[0]._bind(args[1:], kwargs)
  File "/home/ats4i/anaconda3/envs/geo_env/lib/python3.6/inspect.py", line 2912, in _bind
    raise TypeError(msg) from None
TypeError: missing a required argument: 'data'

During handling of the above exception, another exception occurred:

[36mray::__main__.opt()[39m (pid=33996, ip=192.168.0.104)
  File "python/ray/_raylet.pyx", line 459, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 462, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 463, in ray._raylet.execute_task
  File "<ipython-input-8-1ca64d264f44>", line 9, in opt
  File "/home/ats4i/anaconda3/envs/geo_env/lib/python3.6/site-packages/yabox/algorithms/de.py", line 170, in geniterator
    it = self.iterator()
  File "/home/ats4i/anaconda3/envs/geo_env/lib/python3.6/site-packages/yabox/algorithms/de.py", line 167, in iterator
    return iter(DEIterator(self))
  File "/home/ats4i/anaconda3/envs/geo_env/lib/python3.6/site-packages/yabox/algorithms/de.py", line 9, in __init__
    self.fitness = de.evaluate(self.population)
  File "/home/ats4i/anaconda3/envs/geo_env/lib/python3.6/site-packages/yabox/algorithms/de.py", line 161, in evaluate
    return self.evaluate_denormalized(PD)
  File "/home/ats4i/anaconda3/envs/geo_env/lib/python3.6/site-packages/yabox/algorithms/de.py", line 164, in evaluate_denormalized
    return [self.fobj(ind) for ind in PD]
  File "/home/ats4i/anaconda3/envs/geo_env/lib/python3.6/site-packages/yabox/algorithms/de.py", line 164, in <listcomp>
    return [self.fobj(ind) for ind in PD]
  File "<ipython-input-7-54ddbc6977ff>", line 18, in fobjective
  File "/home/ats4i/anaconda3/envs/geo_env/lib/python3.6/site-packages/ray/actor.py", line 378, in remote
    return self._remote(args=args, kwargs=kwargs)
  File "/home/ats4i/anaconda3/envs/geo_env/lib/python3.6/site-packages/ray/actor.py", line 544, in _remote
    kwargs)
  File "/home/ats4i/anaconda3/envs/geo_env/lib/python3.6/site-packages/ray/signature.py", line 117, in flatten_args
    raise TypeError(str(exc))
TypeError: missing a required argument: 'data'

In [None]:
for i in range(0,len(countries)):    

    country=countries[i]
    query = dfparam.query('country == "{}"'.format(country)).reset_index()
    parameters = np.array(query.iloc[:, 2:])[0]
    startdate,predict_range,s0,e0,a0,i0,r0,d0,startNCases,wcases,wrec = parameters
    Date = datetime.strptime(startdate, "%m/%d/%y")
    end_date = Date + timedelta(days=+int(optimal[i][1]))
    dateStr=end_date.strftime("%m/%d/%y")

    j = query['index'].values[0]
    dfparam.at[j, "s0"] = optimal[i][0]
    dfparam.at[j, "start-date"] = dateStr
    dfparam.at[j, "i0"] = optimal[i][2]
    dfparam.at[j, "WCASES"] = optimal[i][3]
    dfparam.at[j, "WREC"] = optimal[i][4]

    dfparam.to_csv("data/param_optimized_Yabox.csv", sep=",", index=False)
    dfparam
    