In [1]:
# These imports are all for the notebooks purpose
import os
import json
import difflib
import pandas as pd
import tempfile
from copy import deepcopy
from IPython.core.display import display, HTML

# here are our demographics imports
from dtk.tools.demographics.generator.DemographicsNodeGeneratorConcern import GenericMortalityDistributionConcern, WorldBankBirthRateNodeConcern, EquilibriumAgeDistributionConcern 
from dtk.tools.demographics.DemographicsGenerator import DemographicsGenerator
input_path = os.getcwd()
#input_path = os.path.dirname(__file__)

def pretty_json(demo):
    return json.dumps(demo, indent=4, sort_keys=True)

def display_diff(a, b):
    diff = difflib.HtmlDiff().make_file(pretty_json(a).split('\n'), pretty_json(b).split('\n'))
    display(HTML(diff))

# Path to our grid file
grid_file = os.path.join(input_path, 'test_grid.csv')

Above we have imported all our required libraries and set our input path to the current directory.

Now we are going to generate a demographics file from the follow grid file in csv format


In [2]:
pd.read_csv(grid_file)

Unnamed: 0.1,Unnamed: 0,lat,lon,gcid,population,country
0,0,12.026335,-1.607123,1,1500,China
1,1,12.030855,-1.607123,2,1763,
2,2,12.035375,-1.607123,3,343,


Now let's load the grid file without specifying any concerns

In [3]:
current = DemographicsGenerator.from_grid_file(grid_file)
current

{'Nodes': [{'NodeID': 1,
   'NodeAttributes': {'Latitude': 12.0263354369855,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 1000,
    'FacilityName': 1402941396},
   'IndividualAttributes': {}},
  {'NodeID': 2,
   'NodeAttributes': {'Latitude': 12.0308553124896,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 1000,
    'FacilityName': 1402941396},
   'IndividualAttributes': {}},
  {'NodeID': 3,
   'NodeAttributes': {'Latitude': 12.0353751865335,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 1000,
    'FacilityName': 1402941397},
   'IndividualAttributes': {}}],
 'Defaults': {},
 'Metadata': {'Author': 'idm',
  'Tool': 'dtk-tools',
  'IdReference': 'Custom user',
  'DateCreated': '2018-12-14 02:13:30.822494',
  'NodeCount': 3,
  'Resolution': 30}}

Notice our population column did not get loaded. That is because it is called 'population' and not pop. We can do that using our population from our csv like so

In [4]:
prev = deepcopy(current)
current = DemographicsGenerator.from_grid_file(grid_file, population_column_name='population')
display_diff(prev, current)

0,1,2,3,4,5
f,1,{,f,1,{
,2,"""Defaults"": {},",,2,"""Defaults"": {},"
,3,"""Metadata"": {",,3,"""Metadata"": {"
,4,"""Author"": ""idm"",",,4,"""Author"": ""idm"","
n,5,"""DateCreated"": ""2018-12-14 02:13:30.822494"",",n,5,"""DateCreated"": ""2018-12-14 02:13:30.847472"","
,6,"""IdReference"": ""Custom user"",",,6,"""IdReference"": ""Custom user"","
,7,"""NodeCount"": 3,",,7,"""NodeCount"": 3,"
,8,"""Resolution"": 30,",,8,"""Resolution"": 30,"
,9,"""Tool"": ""dtk-tools""",,9,"""Tool"": ""dtk-tools"""
,10,"},",,10,"},"

Legends,Legends.1
Colors Added Changed Deleted,Links (f)irst change (n)ext change (t)op

Colors
Added
Changed
Deleted

Links,Links.1
(f)irst change,
(n)ext change,
(t)op,


For a more realistic configuration, we want to add some other configuration options to our demographics file. For one, we want the file to use World Bank population data for our country. In addition, it would also be nice to create a population already in equilibrium, allowing us do burn-ins for immunity establishment only.

To do that, we can use *Demographics Concerns*. These are set's of demographcis configuration elements logically grouped together. For example, we have simple concerns like GenericDefaultsAgeDistubrution but we have more complex DefaultWorldBankEquilibriumConcern which combines a series of concerns related to one class.

For our example, we will use **EquilibriumAgeDistributionConcern** and the **WorldBankBirthRateNodeConcern**

In [5]:
br_concern = WorldBankBirthRateNodeConcern(country="India", birthrate_year=2016)

chain = [
    br_concern,
    EquilibriumAgeDistributionConcern(default_birth_rate=br_concern.default_birth_rate)
]

Notice that to properly build a demographcs using the World Bank's birth rate, we but be sure to pass that value to our EquilibriumAgeDistributionConcern

Now let's use that chain in as our concerns when loading our grid file and see how the output changes

In [6]:
current = DemographicsGenerator.from_grid_file(grid_file, population_column_name='population', node_concern=chain)
current

{'Nodes': [{'NodeID': 1,
   'NodeAttributes': {'Latitude': 12.0263354369855,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 1500,
    'FacilityName': 1402941396,
    'BirthRate': 0.07813561643835616,
    'Metadata': {'CountryBirthRate': 19.012999999999998,
     'BirthRateSource': 'World Bank',
     'World Bank Year': '2016'}},
   'IndividualAttributes': {'AgeDistribution': {'DistributionValues': [[0.0,
       0.05349793456807252,
       0.10456146724957095,
       0.15330136968882704,
       0.19982337335370082,
       0.2441798387815855,
       0.286566004890429,
       0.327023404976325,
       0.3656398029619375,
       0.4024989694572181,
       0.4376808033683345,
       0.47126140442333603,
       0.5033139332724507,
       0.5338744974788288,
       0.5630778430780281,
       0.5909521978420574,
       0.6175579526608965,
       0.6429530225228447,
       0.6671924967943497,
       0.6903289582407516,
       0.7124125216513417,
       0.7334679905069765,
       0.7

TODO explain the above demographics file

Notice that the first node is using the birth rate from India to produce the Age distrubtion but we have marked the node as being located in China in the country column. 

World Bank Generator will store the Country Birth Rate to the node if it detects a Country column in the node attributes. EquilibriumAgeDistributionConcern will look for any node specific birth rates like the per country birth rate and use that to calculate the node specific EquilibriumA age distributions.

In [7]:
prev = deepcopy(current)

current = DemographicsGenerator.from_grid_file(grid_file, population_column_name='population', 
                                               node_concern=chain, 
                                               load_other_columns_as_attributes=True, 
                                               include_columns=['country'])
display_diff(prev, current)

0,1,2,3,4,5
f,1.0,{,f,1.0,{
,2.0,"""Defaults"": {",,2.0,"""Defaults"": {"
,3.0,"""IndividualAttributes"": {",,3.0,"""IndividualAttributes"": {"
,4.0,"""ImmunityDistribution1"": 1,",,4.0,"""ImmunityDistribution1"": 1,"
,5.0,"""ImmunityDistribution2"": 0,",,5.0,"""ImmunityDistribution2"": 0,"
,6.0,"""ImmunityDistributionFlag"": 0,",,6.0,"""ImmunityDistributionFlag"": 0,"
,7.0,"""MigrationHeterogeneityDistribution1"": 1,",,7.0,"""MigrationHeterogeneityDistribution1"": 1,"
,8.0,"""MigrationHeterogeneityDistribution2"": 0,",,8.0,"""MigrationHeterogeneityDistribution2"": 0,"
,9.0,"""MigrationHeterogeneityDistributionFlag"": 0,",,9.0,"""MigrationHeterogeneityDistributionFlag"": 0,"
,10.0,"""MortalityDistribution"": {",,10.0,"""MortalityDistribution"": {"

Legends,Legends.1
Colors Added Changed Deleted,Links (f)irst change (n)ext change (t)op

Colors
Added
Changed
Deleted

Links,Links.1
(f)irst change,
(n)ext change,
(t)op,
