In [1]:
import os
import pandas as pd
from dtk.tools.demographics.generator.DemographicsNodeGeneratorConcern import GenericMortalityDistributionConcern, WorldBankBirthRateNodeConcern, EquilibriumAgeDistributionConcern 
from dtk.tools.demographics.DemographicsGenerator import DemographicsGenerator
input_path = os.getcwd()

# Path to our grid file
grid_file = os.path.join(input_path, 'test_grid.csv')

Above we have imported all our required libraries and set our input path to the current directory.

Now we are going to generate a demographics file from the follow grid file in csv format


In [2]:
pd.read_csv(grid_file)

Unnamed: 0.1,Unnamed: 0,lat,lon,gcid,population,country
0,0,12.026335,-1.607123,1,1500,China
1,1,12.030855,-1.607123,2,1763,
2,2,12.035375,-1.607123,3,343,
3,3,12.039895,-1.607123,4,12,
4,4,12.044415,-1.607123,5,989,
5,5,12.048935,-1.607123,6,1567,
6,6,12.053455,-1.607123,7,1785,
7,7,12.057975,-1.607123,8,2242,
8,8,12.062494,-1.607123,9,343,
9,9,12.067014,-1.607123,10,12,


Now let's load the grid file without specifying any concerns

In [3]:
DemographicsGenerator.from_grid_file(grid_file)

{'Nodes': [{'NodeID': 1402941396,
   'NodeAttributes': {'Latitude': 12.0263354369855,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 1000,
    'FacilityName': 1402941396},
   'IndividualAttributes': {}},
  {'NodeID': 1402941396,
   'NodeAttributes': {'Latitude': 12.0308553124896,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 1000,
    'FacilityName': 1402941396},
   'IndividualAttributes': {}},
  {'NodeID': 1402941397,
   'NodeAttributes': {'Latitude': 12.0353751865335,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 1000,
    'FacilityName': 1402941397},
   'IndividualAttributes': {}},
  {'NodeID': 1402941397,
   'NodeAttributes': {'Latitude': 12.0398950591167,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 1000,
    'FacilityName': 1402941397},
   'IndividualAttributes': {}},
  {'NodeID': 1402941398,
   'NodeAttributes': {'Latitude': 12.0444149302386,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 1000,
    'Facili

Notice our population column did not get loaded. That is because it is called 'population' and not pop. We can do that using our population from our csv like so

In [4]:
DemographicsGenerator.from_grid_file(grid_file, population_column_name='population')

{'Nodes': [{'NodeID': 1402941396,
   'NodeAttributes': {'Latitude': 12.0263354369855,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 1500,
    'FacilityName': 1402941396},
   'IndividualAttributes': {}},
  {'NodeID': 1402941396,
   'NodeAttributes': {'Latitude': 12.0308553124896,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 1763,
    'FacilityName': 1402941396},
   'IndividualAttributes': {}},
  {'NodeID': 1402941397,
   'NodeAttributes': {'Latitude': 12.0353751865335,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 343,
    'FacilityName': 1402941397},
   'IndividualAttributes': {}},
  {'NodeID': 1402941397,
   'NodeAttributes': {'Latitude': 12.0398950591167,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 12,
    'FacilityName': 1402941397},
   'IndividualAttributes': {}},
  {'NodeID': 1402941398,
   'NodeAttributes': {'Latitude': 12.0444149302386,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 989,
    'FacilityNa

For a more realistic configuration, we want to add some other configuration options to our demographics file. For one, we want the file to use World Bank population data for our country. In addition, it would also be nice to create a population already in equilibrium, allowing us do burn-ins for immunity establishment only.

To do that, we can use *Demographics Concerns*. These are set's of demographcis configuration elements logically grouped together. For example, we have simple concerns like GenericDefaultsAgeDistubrution but we have more complex DefaultWorldBankEquilibriumConcern which combines a series of concerns related to one class.

For our example, we will use **EquilibriumAgeDistributionConcern** and the **WorldBankBirthRateNodeConcern**

In [5]:
br_concern = WorldBankBirthRateNodeConcern(country="India", birthrate_year=2016)

chain = [
    br_concern,
    EquilibriumAgeDistributionConcern(default_birth_rate=br_concern.default_birth_rate)
]

Notice that to properly build a demographcs using the World Bank's birth rate, we but be sure to pass that value to our EquilibriumAgeDistributionConcern

Now let's use that chain in as our concerns when loading our grid file and see how the output changes

In [6]:
DemographicsGenerator.from_grid_file(grid_file, population_column_name='population', node_concern=chain)

{'Nodes': [{'NodeID': 1402941396,
   'NodeAttributes': {'Latitude': 12.0263354369855,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 1000,
    'FacilityName': 1402941396,
    'BirthRate': 0.052090410958904106,
    'CountryBirthRate': 19.012999999999998},
   'IndividualAttributes': {'MortalityDistribution': {'NumDistributionAxes': 2,
     'AxisNames': ['gender', 'age'],
     'AxisUnits': ['male=0,female=1', 'years'],
     'AxisScaleFactors': [1, 365],
     'NumPopulationGroups': [2, 1],
     'PopulationGroups': [[0, 1], [0]],
     'ResultUnits': 'annual deaths per 1000 individuals',
     'ResultScaleFactor': 2.74e-06,
     'ResultValues': [[19.012999999999998], [19.012999999999998]]},
    'AgeDistribution': {'DistributionValues': [[0.0,
       0.053497934568072524,
       0.10456146724957095,
       0.15330136968882707,
       0.19982337335370084,
       0.24417983878158556,
       0.2865660048904291,
       0.3270234049763251,
       0.3656398029619376,
       0.402498969

TODO explain the above demographics file

Notice that the first node is using the birth rate from India to produce the Age distrubtion but we have marked the node as being located in China in the country column. 

World Bank Generator will store the Country Birth Rate to the node if it detects a Country column in the node attributes. EquilibriumAgeDistributionConcern will look for any node specific birth rates like the per country birth rate and use that to calculate the node specific EquilibriumA age distributions.

In [8]:
DemographicsGenerator.from_grid_file(grid_file, population_column_name='population', node_concern=chain, load_other_columns_as_attributes=True, exclude_columns=['population'])

{'Nodes': [{'NodeID': 1402941396,
   'NodeAttributes': {'Latitude': 12.0263354369855,
    'Longitude': -1.60712345544773,
    'InitialPopulation': 1500,
    'FacilityName': 1402941396,
    'gcid': '1',
    'country': 'China',
    'BirthRate': 0.049315068493150684,
    'CountryBirthRate': 12.0},
   'IndividualAttributes': {'MortalityDistribution': {'NumDistributionAxes': 2,
     'AxisNames': ['gender', 'age'],
     'AxisUnits': ['male=0,female=1', 'years'],
     'AxisScaleFactors': [1, 365],
     'NumPopulationGroups': [2, 1],
     'PopulationGroups': [[0, 1], [0]],
     'ResultUnits': 'annual deaths per 1000 individuals',
     'ResultScaleFactor': 2.74e-06,
     'ResultValues': [[12.0], [12.0]]},
    'AgeDistribution': {'DistributionValues': [[0.0,
       0.0414530017600223,
       0.08170527490144032,
       0.12079159821513838,
       0.15874574317867804,
       0.19555986156802654,
       0.2313480316912453,
       0.26609956061296663,
       0.29984447438159745,
       0.3326119293