In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import time
import ipywidgets as wg
from ipywidgets import interact
from IPython.display import display
import networkx as nx
from src.environment import Environment
from src.runner import runner
from src.helpers import confidence_interval
from sklearn import preprocessing
import random
import json

# Calibration

## 1 Setting the parameters

In [5]:
age_groups = ['age_0_10', 'age_10_20', 'age_20_30', 'age_30_40', 'age_40_50', 
              'age_50_60', 'age_60_70', 'age_70_80', 'age_80_plus']

In [33]:
TIME = len(1 + mobility_data.mean(axis=1).loc['2020-03-29':'2020-06-14']/ 100)
AGENTS = 100000

In [34]:
mobility_data = pd.read_csv('input_data/Global_Mobility_Report.csv')[pd.read_csv('input_data/Global_Mobility_Report.csv')['country_region_code'] == 'ZA']
mobility_data = mobility_data[mobility_data['sub_region_1'] == 'Western Cape']
mobility_data.index = mobility_data['date']
mobility_data = mobility_data[mobility_data.columns[5:]]

In [35]:
travel_multiplier = 1 + mobility_data.mean(axis=1).loc['2020-03-29':'2020-06-14'].mean() /100# month of april
travel_multiplier

0.5387393162393163

In [36]:
gathering_max_contacts = int(round((15 * 0.7 + 1))) * 2 # based on 2 trips and the maximum amount of people that are allowed in a mini bus taxi
gathering_max_contacts

24

In [45]:
perc_infections_detects = 14
initial_agents = max(round((310 / (3740026 / AGENTS) * 100/perc_infections_detects)), 20) # 310 cases / (population / agent) * 1 / 14% detected cases
initial_agents

59

In [46]:
parameters = {
    # 1 general simulation parameters
    "time": TIME, 
    "number_of_agents": AGENTS,
    "monte_carlo_runs": 1,
    
    # 2 COVID-19 parameters
    "exposed_days": 4, # average number of days without symptoms and being able to infect others
    "asymptom_days": 10, # average number of days agents are infected but do not have symptoms
    "symptom_days": 10,# average number of days agents have mild symptoms
    "critical_days": 8, # average number of days agents are in critical condition
    "probability_symptomatic": 0.6165, # determines whether an agent will become asymptomatic or asymptomatic spreader
    "no_hospital_multiplier": 1.79, # the increase in probability if a critical agent cannot go to the hospital SOURCE: Zhou et al. 2020
    "probability_transmission": 0.00335, # should be estimated to replicate realistic R0 number.
    
    "probability_critical": {key:value for key, value in zip(age_groups, [0.001, 0.003, 0.012, 0.032, 0.049, 0.102, 0.166, 0.244, 0.273])}, # probability that an agent enters a critical stage of the disease SOURCE: Verity et al.
    "probability_to_die": {key:value for key, value in zip(age_groups, [0.005, 0.021, 0.053, 0.126, 0.221, 0.303, 0.565, 0.653, 0.765])}, # probability to die per age group in critical stage SOURCE: Verity et al.
    
    # Cape Town specific parameters
    "total_initial_infections": [x for x in range(0, initial_agents)], # total agents infected in CT
    "health_system_capacity": 0.0009179, # 3433 acute beds in CT / 3740026 population
    
    # Policy parameters
    # general
    "lockdown_days" : [None for x in range(0, TIME)], # in the baseline this is 0, 5 march was the first reported case, 27 march was the start of the lockdown 35 days 
    
    # Specific policy parameters
    # (1) physical distancing measures such as increased hygiëne & face mask adoption 
    "physical_distancing_multiplier": 0.31, # based on a study of face mask on hamsters by Yuen et al. (2020)
    # (2) reducing travel e.g. by reducing it for work, school or all
    "visiting_recurring_contacts_multiplier": travel_multiplier, # based on travel data
    # (3) Testing and general awareness
    'likelihood_awareness': 0.54, # li2020early this will be increased through testing, track & trace and coviid
    # (4) limiting mass contact e.g. forbidding large events, outside household. 
    "gathering_max_contacts": gathering_max_contacts, # based on the regulations for mini bus taxis --> (15 * 0.7) + driver
    
    # additional parameter used to switch of informal districts
    "informality_dummy": 1.0, # setting this parameter at 0 will mean the lockdown is equally effective anywhere, alternative = 1
    
    # additional parameter that can later be used to test large scale testing 
    'aware_status': ['i2'], # i1 can be added if there is large scale testing, this is optional
    
    # Technical parameters
    'init_infected_agent': 0, # to calculate R0
    "data_output": 'csv', # 'csv' or 'network', or 'False'
    "perc_infections_detects": perc_infections_detects / 100.0,
    
    # Depreciated paramters (can be used later)
    "probability_susceptible": 0.000, # probability that the agent will again be susceptible after having recovered
}

Next, we store these parameters in a .json file.

In [47]:
with open('parameters/parameters.json', 'w') as outfile:
    json.dump(parameters, outfile)

## 2 Generate input data files

Then, we generate a file that add per Ward (district) data on: 

- informality of the Ward,
- initial cases per ward.

to the already existing features in the population csv that has 

- distribution of population size across wards,
- the age distribution within wards.

In [40]:
informal_residential = pd.read_csv('input_data/Informal_Residential.csv').iloc[:-1]
inital_infections = pd.read_csv('input_data/Cases_With_Subdistricts.csv', index_col=0)
population = pd.read_csv('input_data/population.csv')

The informality score of the Wards is then normalized to make be able to compare them in the model and added to the population DataFrame.

In [41]:
x = informal_residential[['Informal_residential']].values.astype(float)
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
informal_residential['Informal_residential'] = pd.DataFrame(x_scaled)

population['Informal_residential'] = informal_residential['Informal_residential']

Because the amount of agents are less than in the actual population of Cape Town, we filter some wards so that all of them are at least populated with an agent. Therefore, we calculate the smallest size of a neighbourhood should have as follows.

In [42]:
smallest_size = population['Population'].sum() / parameters['number_of_agents']

We then combine this data into one neighbourhood data set,

In [43]:
districts_data = []
for i in range(len(population)):
    if population['Population'].iloc[i] > smallest_size:
        districts_data.append(
            [int(population['WardID'].iloc[i]), {'Population': population['Population'].iloc[i],
                                                 'Density': population['Density'].iloc[i],
                                                 'lon': population['lon'].iloc[i],
                                                 'lat': population['lat'].iloc[i],
                                                 'Informal_residential': population['Informal_residential'].iloc[i],
                                                 'Cases_With_Subdistricts': inital_infections.loc[population['WardID'].iloc[i]]['Cases_03292020'],
                                                },
            ])
max_neighbourhoods = len(districts_data)

and export it to a json file. 

In [44]:
max_districts = len(districts_data) # this can be manually shortened to study dynamics in some districts
with open('parameters/district_data_100k.json', 'w') as outfile:
    json.dump(districts_data[:max_districts], outfile)