In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Goal: Generate V&V targets for the facility choice model

The targets are output in a `.csv` file at the end of the notebook.

## Requirements:

If you want to run this notebook or any of the code in the
`birth_facility.py` or `solution_finding.py` modules, you will need the right environment.
You can create it with these steps (on 64-bit Linux):

- `conda create --name facility_choice --file environment_lock_conda.txt`
- `conda activate facility_choice`
- `pip install -r environment_lock_pip.txt`

If you're not on 64-bit Linux, replacing the first command with `conda create --name facility_choice python=3.10`
should approximately recreate the environment.

If you want to update all the packages in the environment, create it using `conda env create --name facility_choice --file environment.yaml` (no need for any `pip` commands).
Then, run the following commands inside the environment you created to update the lock files:

- `conda list --explicit > environment_lock_conda.txt`
- `pip freeze | grep -v 'file:///' | grep -v '\-e' > environment_lock_pip.txt`

You will also need to be on the IHME cluster to run this notebook, as the causal model
nanosim loads LBWSG data from an Artifact for the MNCNH Portfolio model.

In [3]:
import pandas as pd
import numpy as np
import birth_facility as bf

!date
!whoami
!uname -a
!pwd
!python --version
!conda info --envs | grep '\*'
!conda list | grep -e pandas -e numpy -e scipy -e statsmodels -e matplotlib -e gbd

Fri Dec 12 13:31:03 PST 2025
lutzes
Linux long-slurm-sarchive-p0033 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 x86_64 x86_64 GNU/Linux
/mnt/share/homes/lutzes/vivarium_gates_mncnh/src/vivarium_gates_mncnh/data/facility_choice
Python 3.10.19
facility_choice       *  /ihme/homes/lutzes/.conda/envs/facility_choice
gbd                       4.37.1                   pypi_0    pypi
gbd-mapping               5.0.0                    pypi_0    pypi
matplotlib-inline         0.1.7                    pypi_0    pypi
numpy                     1.26.4                   pypi_0    pypi
pandas                    1.5.3                    pypi_0    pypi
pandas-stubs              2.2.3.250308             pypi_0    pypi
scipy                     1.15.3                   pypi_0    pypi
statsmodels               0.14.5                   pypi_0    pypi


In [4]:
# Import a few specific things for convenience
from birth_facility import (
    # Classes for causal model nanosim
    BirthFacilityChoiceData, BirthFacilityModelWithUltrasoundAndSimpleGAError,
    # String indicating fake draw column that's the mean of the real draws
    MEAN_DRAW,
    # Constants for column names
    ANC, LBWSG_CAT, FACILITY, TERM_STATUS, BELIEVED_TERM_STATUS, ULTRASOUND,
)

# Part 1: Run a nanosim for each country

The 5 input parameters are the outputs previously found by the causal
model optimization. The 5 inputs are 3 correlations and 2 causal
probabilities. These same parameters should be used in Vivarium.

# Load results of optimization for reference

In [5]:
# These will be the inputs to the causal model nanosims below
optimization_results = pd.read_csv('facility_choice_optimization_results.csv').set_index('parameter_name')
optimization_results

Unnamed: 0_level_0,Ethiopia,Nigeria,Pakistan
parameter_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"corr(anc, lbwsg_category)",0.2,0.2,0.2
"corr(anc, facility)",0.689604,0.417728,0.351232
"corr(lbwsg_category, facility)",0.2,0.2,0.2
prob_home_given_believed_preterm,0.297958,0.374638,0.165997
prob_home_given_believed_term,0.504472,0.533572,0.250153



# Load data for the 3 countries

In [6]:
draw = MEAN_DRAW

data_p = BirthFacilityChoiceData('pakistan', draw=draw)
data_e = BirthFacilityChoiceData('ethiopia', draw=draw)
data_n = BirthFacilityChoiceData('nigeria', draw=draw)

# Define propensity correlations, create models for all 3 countries, and sample propensities



In [7]:
# Choose a large population size to reduce stochastic variation for
# validation targets
pop_size = 1_000_000

# These maps specify the three propensity correlations for each country
correlation_map_e = {
    (ANC, FACILITY): optimization_results.loc['corr(anc, facility)', 'Ethiopia'],
    (ANC, LBWSG_CAT): optimization_results.loc['corr(anc, lbwsg_category)', 'Ethiopia'],
    (FACILITY, LBWSG_CAT): optimization_results.loc['corr(lbwsg_category, facility)', 'Ethiopia'],
}
correlation_map_n = {
    (ANC, FACILITY): optimization_results.loc['corr(anc, facility)', 'Nigeria'],
    (ANC, LBWSG_CAT): optimization_results.loc['corr(anc, lbwsg_category)', 'Nigeria'],
    (FACILITY, LBWSG_CAT): optimization_results.loc['corr(lbwsg_category, facility)', 'Nigeria'],
}
correlation_map_p = {
    (ANC, FACILITY): optimization_results.loc['corr(anc, facility)', 'Pakistan'],
    (ANC, LBWSG_CAT): optimization_results.loc['corr(anc, lbwsg_category)', 'Pakistan'],
    (FACILITY, LBWSG_CAT): optimization_results.loc['corr(lbwsg_category, facility)', 'Pakistan'],
}

# Create a causal model nanosim for each country
model_e = BirthFacilityModelWithUltrasoundAndSimpleGAError(
    data=data_e,
    population_size=pop_size,
    num_facility_types=2,
    correlation_map=correlation_map_e,
    seed=303916847607905435497113962748943192257,
)
model_n = BirthFacilityModelWithUltrasoundAndSimpleGAError(
    data=data_n,
    population_size=pop_size,
    num_facility_types=2,
    correlation_map=correlation_map_n,
    seed=291258168795962949552887306725641490503,
)
model_p = BirthFacilityModelWithUltrasoundAndSimpleGAError(
    data=data_p,
    population_size=pop_size,
    num_facility_types=2,
    correlation_map=correlation_map_p,
    seed=106517498220144615105209854865147031104,
)

# Display correlation matrix for Ethiopia:
# The entries should match the correlations specified in correlation_map
# above
model_e.correlation_matrix

Unnamed: 0,sex,anc,ultrasound,lbwsg_category,gestational_age,ga_error,facility
sex,1.0,0.0,0.0,0.0,0.0,0.0,0.0
anc,0.0,1.0,0.0,0.2,0.0,0.0,0.689604
ultrasound,0.0,0.0,1.0,0.0,0.0,0.0,0.0
lbwsg_category,0.0,0.2,0.0,1.0,0.0,0.0,0.2
gestational_age,0.0,0.0,0.0,0.0,1.0,0.0,0.0
ga_error,0.0,0.0,0.0,0.0,0.0,1.0,0.0
facility,0.0,0.689604,0.0,0.2,0.0,0.0,1.0


## Sample propensities and display result for Ethiopia

Then verify that the empirical correlations match the inputs.

In [8]:
model_e.sample_propensities()
model_n.sample_propensities()
model_p.sample_propensities()

model_e.propensity

Unnamed: 0_level_0,sex,anc,ultrasound,lbwsg_category,gestational_age,ga_error,facility
simulant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.821092,0.999734,0.462921,0.917647,0.717700,0.995695,0.995406
1,0.730453,0.768388,0.971235,0.828540,0.112967,0.882391,0.374172
2,0.063495,0.862503,0.545431,0.226184,0.641785,0.298074,0.971005
3,0.274937,0.523107,0.210120,0.935051,0.875211,0.004025,0.756663
4,0.950339,0.016989,0.327721,0.314351,0.374498,0.115858,0.081712
...,...,...,...,...,...,...,...
999995,0.841760,0.589113,0.343493,0.892584,0.342121,0.159309,0.366436
999996,0.448645,0.951002,0.077141,0.368116,0.692791,0.884578,0.852628
999997,0.834919,0.032777,0.737128,0.128997,0.399556,0.648388,0.249334
999998,0.458436,0.048702,0.481687,0.559591,0.791476,0.111460,0.149538


In [9]:
# Check empirical propensity correlations - these should approximately
# match the correlation matrix above.
# I also added a method to do this (empirical_propensity_correlations),
# but I'm writing the code explicitly here to show that it's using the
# sampled propensities.
pd.DataFrame(
    np.corrcoef(model_e.propensity, rowvar=False),
    index=model_e.propensity.columns,
    columns=model_e.propensity.columns,
).round(2)


Unnamed: 0,sex,anc,ultrasound,lbwsg_category,gestational_age,ga_error,facility
sex,1.0,0.0,0.0,-0.0,-0.0,-0.0,0.0
anc,0.0,1.0,-0.0,0.19,-0.0,0.0,0.67
ultrasound,0.0,-0.0,1.0,-0.0,0.0,0.0,-0.0
lbwsg_category,-0.0,0.19,-0.0,1.0,0.0,0.0,0.19
gestational_age,-0.0,-0.0,0.0,0.0,1.0,0.0,-0.0
ga_error,-0.0,0.0,0.0,0.0,0.0,1.0,0.0
facility,0.0,0.67,-0.0,0.19,-0.0,0.0,1.0


# Define causal facility choice probabilities, and initialize populations

In [10]:
# These are P(home|do(believed preterm)) and P(home|do(believed term))
model_e.set_facility_choice_probabilities(optimization_results.loc['prob_home_given_believed_preterm', 'Ethiopia'], optimization_results.loc['prob_home_given_believed_term', 'Ethiopia'])
model_n.set_facility_choice_probabilities(optimization_results.loc['prob_home_given_believed_preterm', 'Nigeria'], optimization_results.loc['prob_home_given_believed_term', 'Nigeria'])
model_p.set_facility_choice_probabilities(optimization_results.loc['prob_home_given_believed_preterm', 'Pakistan'], optimization_results.loc['prob_home_given_believed_term', 'Pakistan'])

model_e.assign_population_variables()
model_n.assign_population_variables()
model_p.assign_population_variables()

# Display population for Ethiopia
model_e.population

Unnamed: 0_level_0,sex,anc,ultrasound,lbwsg_category,gestational_age,term_status,ga_error,estimated_gestational_age,believed_term_status,facility
simulant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,Female,anc1,standard_ultrasound,cat54,39.435400,term,3.753110,43.188510,believed_term,in_facility
1,Female,anc1,standard_ultrasound,cat54,38.225934,term,1.695752,39.921686,believed_term,at_home
2,Male,anc1,standard_ultrasound,cat31,41.283571,term,-0.757069,40.526502,believed_term,in_facility
3,Male,anc1,no_ultrasound,cat54,39.750423,term,-5.299931,34.450492,believed_preterm,in_facility
4,Female,anc0,no_ultrasound,cat42,37.374498,term,-2.391899,34.982599,believed_preterm,at_home
...,...,...,...,...,...,...,...,...,...,...
999995,Female,anc1,no_ultrasound,cat54,38.684242,term,-1.994601,36.689640,believed_preterm,in_facility
999996,Male,anc1,no_ultrasound,cat42,37.692791,term,2.396371,40.089162,believed_term,in_facility
999997,Female,anc0,no_ultrasound,cat45,36.399556,preterm,0.761941,37.161497,believed_term,at_home
999998,Male,anc0,no_ultrasound,cat48,37.791476,term,-2.437603,35.353873,believed_preterm,at_home


# Part 2: Compute validation targets

Some validation targets are inputs to the causal model optimization,
and some are computed from the simulated population after running the
nanosims above.

# Display the optimization targets for the 3 countries

These are the conditional probabilities that the optimization tries to
match. When we run the facility choice model in Vivarium using the five
inputs specified above, the simulated population should approximately
reproduce these values.

In [11]:
models = [model_e, model_n, model_p]
locations = [model.data.location for model in models]

optimization_targets = pd.concat(
    [model.targets for model in models],
    axis=1, keys=locations, names=['location']
)
optimization_targets

Unnamed: 0_level_0,location,Ethiopia,Nigeria,Pakistan
probability_of,given,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
in_facility,anc0,0.183369,0.309069,0.556322
in_facility,anc1,0.664786,0.584168,0.795407
preterm,at_home,0.163579,0.172663,0.194529
preterm,in_facility,0.163486,0.153,0.155164


In [12]:
# Reshape for concatenation with other validation targets:
# Unstack the variables in the "probability_of" column to get a table of
# conditional probabilities in the same format as output by my
# `prob_y_given_x` function (see below), then use my stacking function
# to get a single-level index with appropriate labels.
reshaped_optimization_targets = bf.stack_conditional_probability_table(
    optimization_targets.unstack('probability_of'))
reshaped_optimization_targets

location,Ethiopia,Nigeria,Pakistan
probability_of,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
in_facility_given_anc0,0.183369,0.309069,0.556322
in_facility_given_anc1,0.664786,0.584168,0.795407
preterm_given_at_home,0.163579,0.172663,0.194529
preterm_given_in_facility,0.163486,0.153,0.155164


### For comparison, display the empirical target values achieved by the nanosim populations:

They look pretty close, but not perfect. This should give some idea of
how close Vivarium should get to the target values.

In [13]:
pd.concat(
    [model.calculate_targets() for model in models],
    axis=1, keys=locations, names=['location']
)

Unnamed: 0_level_0,location,Ethiopia,Nigeria,Pakistan
probability_of,given,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
in_facility,anc0,0.185071,0.311632,0.557005
in_facility,anc1,0.664991,0.584307,0.795885
preterm,at_home,0.163649,0.172964,0.193565
preterm,in_facility,0.163826,0.153301,0.155795


# Display input probabilities for the 3 countries

These are the exact probabilities that were used as input somewhere in
the model, directly or indirectly.

In [14]:
input_probabilities = pd.concat(
    [model.data.input_probabilities for model in models],
    axis=1, keys=locations, names=['location']
)
# These are already in the right shape for concatenation
input_probabilities

location,Ethiopia,Nigeria,Pakistan
subpopulation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,0.515312,0.51245,0.519811
Female,0.484688,0.48755,0.480189
anc0,0.21989,0.257001,0.075919
anc1,0.78011,0.742999,0.924081
preterm,0.163527,0.162567,0.163933
term,0.836473,0.837433,0.836067
at_home,0.441073,0.486533,0.222744
in_facility,0.558927,0.513467,0.777256
BEmONC,0.089922,0.002271,0.264677
CEmONC,0.469005,0.511196,0.512579


### For comparison, display the empirical population proportions in the nanosims:

Note that I'm running the 2-facility-type model, which doesn't include
BEmONC and CEmONC.

In [15]:
pd.concat(
    [model.get_population_proportions() for model in models],
    axis=1, keys=locations, names=['location']
)

location,Ethiopia,Nigeria,Pakistan
subpopulation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,0.515319,0.513159,0.519799
Female,0.484681,0.486841,0.480201
anc0,0.220451,0.256668,0.075748
anc1,0.779549,0.743332,0.924252
preterm,0.163748,0.162851,0.164188
term,0.836252,0.837149,0.835812
at_home,0.440808,0.48568,0.22221
in_facility,0.559192,0.51432,0.77779
BEmONC,,,
CEmONC,,,


# Display observed probabilities of IFD status given believed preterm status

These probabilities, $\Pr(\text{IFD status} \mid \text{believed preterm status})$, are computed
from the simulated population and should match what is observed in the
Vivarium sim.

These probabilities are different from the corresponding causal
probabilities $\Pr(\text{IFD status} \mid
\operatorname{do}(\text{believed preterm status}))$ that were input
above, because of the nonzero correlations of IFD status with ANC
attendance and LBWSG category.

In [16]:
observed_ifd_probabilities_given_believed_term_status = pd.concat(
    [model.get_observed_facility_choice_probabilities() for model in models],
    axis=1,
    keys=locations,
    names=['location']
)
observed_ifd_probabilities_given_believed_term_status

location,Ethiopia,Ethiopia,Nigeria,Nigeria,Pakistan,Pakistan
facility,at_home,in_facility,at_home,in_facility,at_home,in_facility
believed_term_status,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
believed_preterm,0.354554,0.645446,0.432403,0.567597,0.204393,0.795607
believed_term,0.477713,0.522287,0.508279,0.491721,0.229595,0.770405


In [17]:
# Reshape for concatenation with other validation targets
reshaped_observed_ifd_probabilities_given_believed_term_status = (
    bf.stack_conditional_probability_table(
        observed_ifd_probabilities_given_believed_term_status)
)
reshaped_observed_ifd_probabilities_given_believed_term_status

location,Ethiopia,Nigeria,Pakistan
probability_of,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
at_home_given_believed_preterm,0.354554,0.432403,0.204393
in_facility_given_believed_preterm,0.645446,0.567597,0.795607
at_home_given_believed_term,0.477713,0.508279,0.229595
in_facility_given_believed_term,0.522287,0.491721,0.770405


### For comparison, display the causal probabilities we input when we initialized the populations:

These are different from the observed values, as expected.

In [18]:
pd.concat(
    [model.prob_facility_given_believed_term_status for model in models],
    axis=1, keys=locations, names=['location']
).pipe(bf.stack_conditional_probability_table)

location,Ethiopia,Nigeria,Pakistan
probability_of,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
at_home_given_believed_preterm,0.297958,0.374638,0.165997
in_facility_given_believed_preterm,0.702042,0.625362,0.834003
at_home_given_believed_term,0.504472,0.533572,0.250153
in_facility_given_believed_term,0.495528,0.466428,0.749847


# Display confusion matrices of term status vs. believed term status

I'm using the term [confusion
matrix](https://en.wikipedia.org/wiki/Confusion_matrix) somewhat
loosely...

These probabilities, $\Pr(\text{believed preterm status} \mid \text{preterm status})$, are computed
from the simulated population and should match what is observed in the
Vivarium sim.

The values here will vary by location because of differing ultrasound
rates.

These probabilities depend on the [AI Ultrasound
module](https://vivarium-research.readthedocs.io/en/latest/models/concept_models/vivarium_mncnh_portfolio/ai_ultrasound_module/module_document.html#vivarium-mncnh-portfolio-ai-ultrasound-module)
and the LBWSG exposure but don't involve the facility choice probabilities.

In [19]:
term_status_confusion = [
    bf.prob_y_given_x(
        model.population[BELIEVED_TERM_STATUS],
        model.population[TERM_STATUS]
    ) for model in models
]

prob_believed_term_status_given_term_status = pd.concat(
    term_status_confusion, axis=1, keys=locations)
prob_believed_term_status_given_term_status


Unnamed: 0_level_0,Ethiopia,Ethiopia,Nigeria,Nigeria,Pakistan,Pakistan
believed_term_status,believed_preterm,believed_term,believed_preterm,believed_term,believed_preterm,believed_term
term_status,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
preterm,0.847705,0.152295,0.846768,0.153232,0.856719,0.143281
term,0.19234,0.80766,0.191057,0.808943,0.182314,0.817686


In [20]:
# Reshape for concatenation with other validation targets
reshaped_prob_believed_term_status_given_term_status = (
    bf.stack_conditional_probability_table(
        prob_believed_term_status_given_term_status)
)
reshaped_prob_believed_term_status_given_term_status

Unnamed: 0_level_0,Ethiopia,Nigeria,Pakistan
probability_of,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
believed_preterm_given_preterm,0.847705,0.846768,0.856719
believed_term_given_preterm,0.152295,0.153232,0.143281
believed_preterm_given_term,0.19234,0.191057,0.182314
believed_term_given_term,0.80766,0.808943,0.817686


# Display P(believed term status | term status, ultrasound)

These probabilities, $\Pr(\text{believed preterm status} \mid
\text{preterm status, ultrasound type})$, are computed
from the simulated population and should match what is observed in the
Vivarium sim.

Should these values be independent of location?? They look close enough
that the differences could be due to stochastic variation. I think maybe
they shouldn't be exactly the same because of differences in the
gestational age distribution.

These probabilities depend on the [AI Ultrasound
module](https://vivarium-research.readthedocs.io/en/latest/models/concept_models/vivarium_mncnh_portfolio/ai_ultrasound_module/module_document.html#vivarium-mncnh-portfolio-ai-ultrasound-module)
and the LBWSG exposure but don't involve the facility choice probabilities.

In [21]:
ultrasound_tables = [
    bf.prob_y_given_x(
        model.population[BELIEVED_TERM_STATUS],
        model.population[[TERM_STATUS, ULTRASOUND]],
    ) for model in models
]

prob_believed_term_status_given_term_status_and_ultrasound = pd.concat(
    ultrasound_tables, axis=1, keys=locations)
prob_believed_term_status_given_term_status_and_ultrasound


Unnamed: 0_level_0,Unnamed: 1_level_0,Ethiopia,Ethiopia,Nigeria,Nigeria,Pakistan,Pakistan
Unnamed: 0_level_1,believed_term_status,believed_preterm,believed_term,believed_preterm,believed_term,believed_preterm,believed_term
term_status,ultrasound,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
preterm,no_ultrasound,0.833808,0.166192,0.83387,0.16613,0.837591,0.162409
preterm,standard_ultrasound,0.867109,0.132891,0.868305,0.131695,0.870455,0.129545
term,no_ultrasound,0.220492,0.779508,0.217945,0.782055,0.217747,0.782253
term,standard_ultrasound,0.162334,0.837666,0.157915,0.842085,0.160951,0.839049


In [22]:
# Reshape for concatenation with other validation targets
reshaped_prob_believed_term_status_given_term_status_and_ultrasound = (
    bf.stack_conditional_probability_table(
        prob_believed_term_status_given_term_status_and_ultrasound)
)
reshaped_prob_believed_term_status_given_term_status_and_ultrasound


Unnamed: 0_level_0,Ethiopia,Nigeria,Pakistan
probability_of,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
believed_preterm_given_preterm_and_no_ultrasound,0.833808,0.83387,0.837591
believed_term_given_preterm_and_no_ultrasound,0.166192,0.16613,0.162409
believed_preterm_given_preterm_and_standard_ultrasound,0.867109,0.868305,0.870455
believed_term_given_preterm_and_standard_ultrasound,0.132891,0.131695,0.129545
believed_preterm_given_term_and_no_ultrasound,0.220492,0.217945,0.217747
believed_term_given_term_and_no_ultrasound,0.779508,0.782055,0.782253
believed_preterm_given_term_and_standard_ultrasound,0.162334,0.157915,0.160951
believed_term_given_term_and_standard_ultrasound,0.837666,0.842085,0.839049


In [23]:
# Compute average probabilities for the 3 countries
sum(ultrasound_tables) / 3

Unnamed: 0_level_0,believed_term_status,believed_preterm,believed_term
term_status,ultrasound,Unnamed: 2_level_1,Unnamed: 3_level_1
preterm,no_ultrasound,0.83509,0.16491
preterm,standard_ultrasound,0.868623,0.131377
term,no_ultrasound,0.218728,0.781272
term,standard_ultrasound,0.1604,0.8396


# Concatenate all the validation targets

In [24]:
tables = [
    reshaped_optimization_targets,
    input_probabilities,
    reshaped_observed_ifd_probabilities_given_believed_term_status,
    reshaped_prob_believed_term_status_given_term_status,
    reshaped_prob_believed_term_status_given_term_status_and_ultrasound,
]

validation_targets = (
    pd.concat(tables)
    .rename_axis(index='probability_of', columns='location')
    .rename(index=lambda s: s.lower())
)
validation_targets

location,Ethiopia,Nigeria,Pakistan
probability_of,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
in_facility_given_anc0,0.183369,0.309069,0.556322
in_facility_given_anc1,0.664786,0.584168,0.795407
preterm_given_at_home,0.163579,0.172663,0.194529
preterm_given_in_facility,0.163486,0.153,0.155164
male,0.515312,0.51245,0.519811
female,0.484688,0.48755,0.480189
anc0,0.21989,0.257001,0.075919
anc1,0.78011,0.742999,0.924081
preterm,0.163527,0.162567,0.163933
term,0.836473,0.837433,0.836067


# Save validation targets to .csv and verify format by reading back in

It looks like the "location" name for the columns gets dropped, but I
guess we don't actually need it...

In [25]:
output_filename = "facility_choice_validation_targets.csv"
validation_targets.to_csv(output_filename)
pd.read_csv(output_filename, index_col=0)

Unnamed: 0_level_0,Ethiopia,Nigeria,Pakistan
probability_of,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
in_facility_given_anc0,0.183369,0.309069,0.556322
in_facility_given_anc1,0.664786,0.584168,0.795407
preterm_given_at_home,0.163579,0.172663,0.194529
preterm_given_in_facility,0.163486,0.153,0.155164
male,0.515312,0.51245,0.519811
female,0.484688,0.48755,0.480189
anc0,0.21989,0.257001,0.075919
anc1,0.78011,0.742999,0.924081
preterm,0.163527,0.162567,0.163933
term,0.836473,0.837433,0.836067
