# Optimize Initial Conditions
## Yabox

In [1]:
import numpy as np
from datetime import datetime,timedelta,date
import pandas as pd
from scipy.optimize import basinhopping

# Initialize Ray

In [2]:
from environs import Env
env = Env()
env.str("CUDA_DEVICE_ORDER",'PCI_BUS_ID')
env.int("CUDA_VISIBLE_DEVICES",1)
env.int("NUMBA_ENABLE_CUDASIM",1)
env.bool("OMPI_MCA_opal_cuda_support",True)

import os
import ray
MB=1024*1024
GB=MB*1024
ray.shutdown()
ray.init(object_store_memory=1*GB,memory=220*GB,
         lru_evict=True,
         driver_object_store_memory=500*MB,num_gpus=5,num_cpus=1,
         ignore_reinit_error=True) # , include_webui=False)

@ray.remote(num_gpus=1)
def use_gpu():
    print("ray.get_gpu_ids(): {}".format(ray.get_gpu_ids()[0]))
    print("CUDA_VISIBLE_DEVICES: {}".format(os.environ["CUDA_VISIBLE_DEVICES"]))

use_gpu.remote()

2020-11-05 14:40:48,633	INFO resource_spec.py:212 -- Starting Ray with 219.97 GiB memory available for workers and up to 1.0 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-11-05 14:40:49,071	INFO services.py:1170 -- View the Ray dashboard at [1m[32mlocalhost:8267[39m[22m


ObjectID(45b95b1c8bd3a9c4ffffffff010000c801000000)

# Load New and Process Data from website data.brasil.io

In [3]:
%reload_ext autoreload
%autoreload 2
import get_data
LoadData=True

if LoadData:
    get_data.get_data()

[2m[36m(pid=17506)[0m ray.get_gpu_ids(): 4
[2m[36m(pid=17506)[0m CUDA_VISIBLE_DEVICES: 4


# Functions to Load Processed Data

In [4]:
def load_confirmed(country,start_date=None,end_date=None):
    df = pd.read_csv('data/time_series_19-covid-Confirmed-country.csv')
    country_df = df[df['Country/Region'] == country]
    if start_date==None:
        return country_df.iloc[0]
    else: 
        return country_df.iloc[0].loc[start_date:end_date]

def load_recovered(country,start_date=None,end_date=None):
    df = pd.read_csv('data/time_series_19-covid-Recovered-country.csv')
    country_df = df[df['Country/Region'] == country]
    if start_date==None:
        return country_df.iloc[0]
    else: 
        return country_df.iloc[0].loc[start_date:end_date]

def load_dead(country,start_date=None,end_date=None):
    df = pd.read_csv('data/time_series_19-covid-Deaths-country.csv')
    country_df = df[df['Country/Region'] == country]
    if start_date==None:
        return country_df.iloc[0]
    else: 
        return country_df.iloc[0].loc[start_date:end_date]

# Load solver

In [5]:
%reload_ext autoreload
%autoreload 2
import LearnerICRayNoLoadBH_v3NewModel as L 

# Data for Countries

In [6]:
modelHist = "YaboxAndBasinHopping"
dfparam = pd.read_csv("data/param_optimized_"+modelHist+"_HistMin.csv")
countries=dfparam.country
popEst = pd.read_csv("data/WPP2019_TotalPopulationBySex.csv")
popEst['popTotal']=pd.to_numeric(popEst.PopTotal, errors='coerce')

for country in countries:
    if country=="US":
        country2="United States of America"    
    else:
        country2=country
    dfparam.loc[dfparam.country==country,'popTotal']=popEst.loc[popEst.Location==country2].loc[popEst.Time==2020].iloc[0,8]*1000
#     dfparam.loc[dfparam.country==country,'s0']=popEst.loc[popEst.Location==country2].loc[popEst.Time==2020].iloc[0,8]*1000
    
display(dfparam)
    

Unnamed: 0,country,start-date,prediction-range,s0,e0,a0,i0,r0,d0,START,WCASES,WREC,WDTH,popTotal
0,Brazil,03/01/20,200,9088665,0.0,0.0,231,307,404,264,1.0,1.0,1.0,212559400.0
1,China,01/29/20,200,170883,0.0,0.0,365,162,56,372,1.0,1.0,1.0,1439324000.0
2,Italy,02/27/20,200,486349,0.0,0.0,4,374,73,107,1.0,1.0,1.0,60461830.0
3,US,02/18/20,200,15342759,0.0,0.0,51,355,312,276,1.0,1.0,1.0,331002600.0
4,India,03/08/20,200,16760693,0.0,0.0,9,297,262,378,1.0,1.0,1.0,1380004000.0


# Functions for Optimization

In [7]:
from scipy.integrate import odeint
import sys
import io
import gc

def create_f(country,e0,a0,date, end_dateFirstWave, wcases, wrec, wdth, predict_range, version):
                
    def fobjective(point):
        
        dead=  load_dead(country,date, end_dateFirstWave)
        recovered = load_recovered(country,date, end_dateFirstWave)
        data = load_confirmed(country,date, end_dateFirstWave)-recovered-dead
        cleanRecovered=False
        s0, deltaDate, i0, d0, r0, startNCases  = point
        end_date=datetime.strptime(date, "%m/%d/%y") + timedelta(days=deltaDate)
        f=L.Learner.remote(country, end_date.strftime("%m/%d/%y"), predict_range,\
                           s0, e0, a0, i0, r0, d0, startNCases, wcases, wrec, wdth,\
                           cleanRecovered, version, data, dead, recovered, savedata=False)
        result = f.train.remote() 
        result = ray.get(result) 

        del end_date,cleanRecovered, data, dead, point,f         

        gc.collect()

        return result
    return fobjective

In [8]:
@ray.remote(memory=50 * 1024 * 1024, max_calls=1)
def opt(country,s0,i0,e0,a0,r0,d0,date,end_date,startNCases, wcases, wrec, wdth,
        predict_range, version):

    bnds=[(s0/1.5,s0*1.5),(0,0),(i0/1.2,i0*1.2),(r0/1.2,r0*1.2),(d0/1.2,d0*1.2),(startNCases/1.2,startNCases*1.2)]
    f=create_f(country,e0,a0,date,end_date, wcases, wrec, wdth, predict_range, version)
    x0 = [s0, 0, i0,r0,d0,startNCases]
    minimizer_kwargs = { "method": "L-BFGS-B","bounds":bnds }
    optimal = basinhopping(f, x0, minimizer_kwargs=minimizer_kwargs,niter=10,disp=True)  
    del f, bnds, x0
    
    return optimal.x

# Main Code

In [9]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines){
    return true;}

<IPython.core.display.Javascript object>

In [10]:
flagFirstWave=True
finalDate=date.today()+ timedelta(days=-1)
finalDateStr= datetime.strftime(finalDate, '%-m/%-d/%y')

if flagFirstWave:
    #'10/1/20' Brazil
    #'10/25/20' US, India
    firstWave=[finalDateStr,'6/1/20','8/1/20',finalDateStr,finalDateStr]
else:
    firstWave=[finalDateStr,finalDateStr,finalDateStr,finalDateStr,finalDateStr]

In [11]:
countries=dfparam.country
display(countries)
version="114"
gc.enable()

optimal=[]
i=0

for country in countries:
    #remove previous history file
    strFile='./results/history_'+country+version+'.csv'
    if os.path.isfile(strFile):
        os.remove(strFile)
    query = dfparam.query('country == "{}"'.format(country)).reset_index()
    parameters = np.array(query.iloc[:, 2:])[0]
    endDate = datetime.strptime(firstWave[i], '%m/%d/%y')
    end_dateStr= datetime.strftime(endDate, '%-m/%-d/%y')
    date,predict_range,s0,e0,a0,i0,r0,d0,startNCases,wcases,wrec,wdth, pop = parameters
    dateD = datetime.strptime(date, '%m/%d/%y')
    dateStr= datetime.strftime(dateD, '%-m/%-d/%y')
    optimal.append(opt.remote(country,s0,i0,e0,a0,r0,d0,dateStr,end_dateStr,startNCases, wcases, wrec, wdth,
                                        predict_range, version)) 
    i+=1        

0    Brazil
1     China
2     Italy
3        US
4     India
Name: country, dtype: object

In [None]:
optimal=ray.get(optimal)




[2m[36m(pid=17550)[0m basinhopping step 0: f 2.12927e+11




[2m[36m(pid=17654)[0m basinhopping step 0: f 3.16061e+11
[2m[36m(pid=17576)[0m basinhopping step 0: f 6.91445e+08
[2m[36m(pid=17628)[0m basinhopping step 0: f 1.71494e+12
[2m[36m(pid=17654)[0m basinhopping step 1: f 3.16061e+11 trial_f 7.53532e+12 accepted 0  lowest_f 3.16061e+11
[2m[36m(pid=17550)[0m basinhopping step 1: f 2.12927e+11 trial_f 1.78907e+13 accepted 0  lowest_f 2.12927e+11
[2m[36m(pid=17628)[0m basinhopping step 1: f 1.71494e+12 trial_f 4.3715e+13 accepted 0  lowest_f 1.71494e+12
[2m[36m(pid=17576)[0m basinhopping step 1: f 5.15679e+08 trial_f 5.15679e+08 accepted 1  lowest_f 5.15679e+08
[2m[36m(pid=17576)[0m found new global minimum on step 1 with function value 5.15679e+08
[2m[36m(pid=17654)[0m basinhopping step 2: f 3.16061e+11 trial_f 6.82153e+12 accepted 0  lowest_f 3.16061e+11
[2m[36m(pid=17602)[0m basinhopping step 0: f 5.92934e+08
[2m[36m(pid=17654)[0m basinhopping step 3: f 3.16061e+11 trial_f 9.85118e+12 accepted 0  lowest_f 3.1



[2m[36m(pid=17924)[0m basinhopping step 0: f 3.2246e+11
[2m[36m(pid=17628)[0m basinhopping step 10: f 1.71494e+12 trial_f 8.21078e+12 accepted 0  lowest_f 1.71494e+12




[2m[36m(pid=17602)[0m basinhopping step 4: f 5.92934e+08 trial_f 7.08207e+09 accepted 0  lowest_f 5.92934e+08
[2m[36m(pid=17550)[0m basinhopping step 8: f 2.12927e+11 trial_f 2.90686e+13 accepted 0  lowest_f 2.12927e+11
[2m[36m(pid=17924)[0m basinhopping step 1: f 3.2246e+11 trial_f 7.72015e+12 accepted 0  lowest_f 3.2246e+11
[2m[36m(pid=17550)[0m basinhopping step 9: f 2.12927e+11 trial_f 6.46276e+11 accepted 0  lowest_f 2.12927e+11


In [None]:
for i in range(0,len(countries)):    

    #s0, deltaDate, i0, d0, r0, startNCases  = point
    # deltaDate not used at all
    j = query['index'].values[0]
    dfparam.at[j, "s0"] = optimal[i][0]
    dfparam.at[j, "i0"] = optimal[i][2]
    dfparam.at[j, "r0"] = optimal[i][3]
    dfparam.at[j, "d0"] = optimal[i][4]
    dfparam.at[j, "startNCases"] = optimal[i][5]

    dfparam.to_csv("data/param_optimized_FineTune.csv", sep=",", index=False)
    display(dfparam)
    