# Pipeline example, mortality

This notebook uses the same demographic components as `pipeline_001_mortality`, and it runs five simulations in parallel using `multiprocessing`.

In [1]:
import datetime
import matplotlib.pyplot as plt
import multiprocessing
import numpy as np
import pandas as pd
from pathlib import Path
import time

from vivarium import InteractiveContext
from vivarium.framework.configuration import build_simulation_configuration
from vivarium.config_tree import ConfigTree

from vivarium_public_health.population import Mortality
from vivarium_public_health.population.spenser_population import TestPopulation
from vivarium_public_health.population.spenser_population import build_mortality_table
from vivarium_public_health.population.spenser_population import transform_rate_table
from vivarium_public_health.population.spenser_population import prepare_dataset

from vivarium_public_health.utilities import read_config_file

## Base plugins for simulation

In [2]:
def base_plugins_simulation():
    config = {'required': {
                  'data': {
                      'controller': 'vivarium_public_health.testing.mock_artifact.MockArtifactManager',
                      'builder_interface': 'vivarium.framework.artifact.ArtifactInterface'
                  }
             }
    }

    return ConfigTree(config)

## Configure a simulation

In [3]:
def config_simulation(inp_file):        
    # ============= CONFIGURATION
    if inp_file['configuration']['population']['population_size'] <= 0:
        pop_size = len(pd.read_csv(inp_file['paths']['path_to_pop_file']))
    else:
        pop_size = inp_file['configuration']['population']['population_size']
    
    # config object
    config = build_simulation_configuration()
    config.update({
        'time': {
            'start': inp_file['configuration']['time']['start'],
            'end': inp_file['configuration']['time']['end'],
            'step_size': inp_file['configuration']['time']['step_size']
        },
        'randomness': inp_file['configuration']['randomness'],
        'input_data': inp_file['configuration']['input_data'],
    }, 
        layer='model_override')
    
    config.update({
        'path_to_pop_file': inp_file['paths']['path_to_pop_file'],
        'path_to_mortality_file': inp_file['paths']['path_to_mortality_file'],
        
        'population': {
            'population_size': pop_size,
            'age_start': inp_file['configuration']['population']['age_start'],
            'age_end': inp_file['configuration']['population']['age_end'],
        },
        },
    )
    return config

## All steps required to do one simulation

In [4]:
def run_simulation(mortality_multiply=1.):
    inp_file = read_config_file("../config/model_specification_pipeline_001.yaml")

    base_plugins = base_plugins_simulation()
    config = config_simulation(inp_file=inp_file)
    components = [eval(x) for x in inp_file["list_components"]]
    simulation = InteractiveContext(components=components,
                                    configuration=config,
                                    plugin_configuration=base_plugins,
                                    setup=False)

    df = pd.read_csv(config.path_to_mortality_file)
    # to save time, only look at locatiosn existing on the test dataset.
    mortality_rate_df = df[(df['LAD.code']=='E08000032')]

    asfr_data = transform_rate_table(mortality_rate_df,
                                          2011,
                                          2012,
                                          config.population.age_start,
                                          config.population.age_end)
    
    asfr_data["mean_value"] *= mortality_multiply
    simulation._data.write("cause.all_causes.cause_specific_mortality_rate", asfr_data)
    print(f"Simulation set up -- {multiprocessing.current_process().name}")
    simulation.setup()
    num_days = 365*2 + 10
    simulation.run_for(duration=pd.Timedelta(days=num_days))
    pop = simulation.get_population()
    return pop

In [5]:
# Prepare the data only once
inp_file = read_config_file("../config/model_specification_pipeline_001.yaml")

if inp_file['prepare_data']['prepare']:
    # read a dataset (normally from daedalus), change columns to be readable by vivarium
    # this function creates a file saved at output_path
    prepare_dataset(
        dataset_path=inp_file['prepare_data']['path_to_dataset'], 
        output_path=inp_file['prepare_data']['path_to_output'],
        lookup_ethnicity=inp_file['prepare_data']['path_to_lookup_ethnicity'],
        columns_map=inp_file['prepare_data']['columns_map'],
        location_code=inp_file['prepare_data']['location_code']
        )




Write the dataset at: ../persistant_data/test_ssm_E08000032_MSOA11_ppp_2011.csv


## Simulations

Five simulations are defined by `mortality_multiply_list = [0.5, 0.8, 1, 1.2, 2]` (next cell). In this example, all the input mortality rates are multiplied by 0.5, 0.8, 1, 1.2 and 2, respectively. 

In [None]:
sim_start = time.time()

mortality_multiply_list = [0.5, 0.8, 1, 1.2, 2]

proc_pool = multiprocessing.Pool(6)
results = proc_pool.map(run_simulation, (mortality_multiply_list))
proc_pool.close()
proc_pool.join()

sim_end = time.time()

Simulation set up -- ForkPoolWorker-5


2020-07-22 11:19:32.209 | DEBUG    | vivarium.framework.values:register_value_modifier:373 - Registering metrics.1.population_manager.metrics as modifier to metrics


Simulation set up -- ForkPoolWorker-1Simulation set up -- ForkPoolWorker-2

Simulation set up -- ForkPoolWorker-4


2020-07-22 11:19:32.234 | DEBUG    | vivarium.framework.values:register_value_modifier:373 - Registering metrics.1.population_manager.metrics as modifier to metrics
2020-07-22 11:19:32.235 | DEBUG    | vivarium.framework.values:register_value_modifier:373 - Registering metrics.1.population_manager.metrics as modifier to metrics


Simulation set up -- ForkPoolWorker-3

2020-07-22 11:19:32.240 | DEBUG    | vivarium.framework.values:register_value_modifier:373 - Registering metrics.1.population_manager.metrics as modifier to metrics





2020-07-22 11:19:32.255 | DEBUG    | vivarium.framework.values:register_value_modifier:373 - Registering metrics.1.population_manager.metrics as modifier to metrics
2020-07-22 11:19:39.888 | DEBUG    | vivarium.framework.values:_register_value_producer:323 - Registering value pipeline mortality_rate
2020-07-22 11:19:39.893 | DEBUG    | vivarium.framework.values:_register_value_producer:323 - Registering value pipeline metrics
2020-07-22 11:19:39.925 | DEBUG    | vivarium.framework.values:_register_value_producer:323 - Registering value pipeline mortality_rate
2020-07-22 11:19:39.931 | DEBUG    | vivarium.framework.values:_register_value_producer:323 - Registering value pipeline metrics
2020-07-22 11:19:39.953 | DEBUG    | vivarium.framework.values:_register_value_producer:323 - Registering value pipeline mortality_rate
2020-07-22 11:19:39.958 | DEBUG    | vivarium.framework.values:_register_value_producer:323 - Registering value pipeline metrics
2020-07-22 11:19:39.968 | DEBUG    | viv

In [None]:
len(results)

In [None]:
print(f"Total time: {sim_end - sim_start}")
for i, pop in enumerate(results):
    print("----")
    print(mortality_multiply_list[i])
    print (f'#alive: {len(pop[pop["alive"]=="alive"])}')
    print (f'#dead: {len(pop[pop["alive"]!="alive"])}')

## Plot results

In [None]:
#min_time = pop["entrance_time"].min().strftime("%Y-%m-%d")
min_time = "2011-01-01"
max_time = datetime.datetime.strptime("2012-12-31", "%Y-%m-%d")

print("min_time:", min_time)
print("max_time:", max_time)

In [None]:
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [None]:
# --- input
# intervals for plotting (in days)
interval_in_days = 10
# list of ethnicities
sel_ethnicity = ["WBI", "WHO"]

pop = results[0]
plt.figure(figsize=(15, 10))

cmap = plt.get_cmap('Greys')
colors = [cmap(i) for i in np.linspace(0, 1, len(results)+2)]

for i, pop in enumerate(results):
    # Population (total, for selected ethnicities, gender)
    total_population = len(pop)
    total_population_ETH = len(pop[pop["ethnicity"].isin(sel_ethnicity)])
    total_population_male = len(pop[pop["sex"] == 1])
    total_population_female = len(pop[pop["sex"] == 2])

    time_axis = []
    population_axis = []
    population_ETH_axis = []
    population_M_axis = []
    population_F_axis = []

    curr_time = datetime.datetime.strptime(min_time, "%Y-%m-%d")

    
    
    while curr_time <= max_time:
        time_axis.append(curr_time)

        # dead population until current time (changes in the while loop)
        pop_dead = pop[pop["exit_time"] <= curr_time.strftime("%Y-%m-%d")]

        num_dead_population = total_population - len(pop_dead)
        population_axis.append(num_dead_population)

        num_dead_population_eth = total_population_ETH - len(pop_dead[pop_dead["ethnicity"].isin(sel_ethnicity)])
        population_ETH_axis.append(num_dead_population_eth)

        num_dead_male = total_population_male - len(pop_dead[pop_dead["sex"] == 1])
        population_M_axis.append(num_dead_male)

        num_dead_female = total_population_female - len(pop_dead[pop_dead["sex"] == 2])
        population_F_axis.append(num_dead_female)

        # go to next time, according to the selected interval_in_days
        curr_time = datetime.datetime.strptime(curr_time.strftime("%Y-%m-%d"), "%Y-%m-%d")
        curr_time += datetime.timedelta(days=interval_in_days)

    # Population (total)
    plt.plot(time_axis, population_axis, 
             c=colors[i+2], lw=4, marker="o", 
             label=f"{mortality_multiply_list[i]}"
            )
plt.xlabel("Time", size=32)
plt.ylabel("Population", size=32)
plt.xticks(size=24, rotation=90)
plt.yticks(size=24)
plt.grid()
plt.legend(fontsize=24)
plt.show()