In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 8)

from scipy import stats
import collections

import warnings
# warnings.filterwarnings('ignore')
from matplotlib.backends.backend_pdf import PdfPages

from pathlib import Path

import db_queries as db
import vivarium_helpers.id_helper as idh
import gbd_mapping
from vivarium import Artifact

# Add the repo directory vivarium_research_ciff_sam/ to sys.path
import os, sys
repo_path = os.path.abspath('../..')
sys.path.append(repo_path)
# Assumes vivarium_research_ciff_sam/ is in sys.path
# import model_validation.vivarium_transformed_output as vto
# import model_validation.vivarium_raw_output as vro
import model_validation.vivarium_output_processing as vp
import model_validation.ciff_sam_results as csr
import model_validation.ciff_sam_plots as csp

%load_ext autoreload
%autoreload 2

!pwd
!whoami
!date

/ihme/homes/ndbs/vivarium_research_ciff_sam/nathaniel/scratch
ndbs
Fri Oct 22 22:09:50 PDT 2021


# Goal: Create a class whose methods are the same as the functions in the vivarium_output_processing module, but whose global variables are instance variables instead

## I attempted to implement this using a function wrapper (decorator, but without the syntactic sugar) to override the global variables, but it doesn't seem to work

Class written inside the `vivarium_output_processing` module:

```Python
class VivariumOutputProcessor:
    """Class to implement all the functions of the vivarium_output_processing module as methods
    and store global variables as instance variables, so that different instances can be created
    with different global parameters.
    """
    def __init__(
        self,
        value_column=VALUE_COLUMN,
        draw_column=DRAW_COLUMN,
        scenario_column=SCENARIO_COLUMN,
        measure_column=MEASURE_COLUMN,
        index_columns=None,
    ):
        self.value_column=value_column
        self.draw_column=draw_column
        self.scenario_column=scenario_column
        self.measure_column=measure_column
        self.index_columns = [self.draw_column, self.scenario_column] if index_columns is None else index_columns
        module_function_members = inspect.getmembers(sys.modules[__name__], inspect.isfunction)
        to_omit = ['set_global_index_columns']
        for function_name, function in module_function_members:
            if function_name not in to_omit:
                setattr(self, function_name, self._use_instance_variables(function))

    def _use_instance_variables(self, func):
        """Damn, I don't think this will work unless I override *all* the function names (not just column names)
        that get called from other functions , but I don't think there's a way
        to automatically assign local variable names after looking them up with getmembers.
        Instead I'd have to manually assign each one, which is not very maintainable if I add more functions
        or refactor things.
        """
        def instance_variables_wrapped_func(*args, **kwargs):
            VALUE_COLUMN = self.value_column
            DRAW_COLUMN  = self.draw_column
            SCENARIO_COLUMN = self.scenario_column
            MEASURE_COLUMN = self.measure_column
            INDEX_COLUMNS = self.index_columns
            value = self.value
            marginalize = self.marginalize
            stratify = self.stratify
            print(INDEX_COLUMNS)
#             ratio = self.ratio
#             difference = self.difference
#             averted = self.averted
#             describe = self.describe
            return func(*args, **kwargs)
        return instance_variables_wrapped_func
```

## A better solution is to rewrite the module as a class, and then have a single global instance of the class to implement the module functions (i.e. the reverse strategy of what I tried above -- define the module from the class rather than the class from the module)

In [2]:
import inspect

In [4]:
inspect.getmembers(vp, inspect.isfunction)

[('_ensure_columns_not_levels',
  <function model_validation.vivarium_output_processing._ensure_columns_not_levels(df, column_list=None)>),
 ('_ensure_iterable',
  <function model_validation.vivarium_output_processing._ensure_iterable(colnames, df, default=None)>),
 ('aggregate_mean_lower_upper',
  <function model_validation.vivarium_output_processing.aggregate_mean_lower_upper(df_or_groupby, lower_rank=0.025, upper_rank=0.975)>),
 ('assert_values_equal',
  <function model_validation.vivarium_output_processing.assert_values_equal(df1, df2, **kwargs)>),
 ('averted',
  <function model_validation.vivarium_output_processing.averted(measure: pandas.core.frame.DataFrame, baseline_scenario: str, scenario_col=None)>),
 ('compare_values',
  <function model_validation.vivarium_output_processing.compare_values(df1, df2, **kwargs)>),
 ('describe',
  <function model_validation.vivarium_output_processing.describe(df, **describe_kwargs)>),
 ('difference',
  <function model_validation.vivarium_output_

In [9]:
sys.modules['model_validation.vivarium_output_processing']

<module 'model_validation.vivarium_output_processing' from '/ihme/homes/ndbs/vivarium_research_ciff_sam/model_validation/vivarium_output_processing.py'>

In [10]:
dir(vp)

['DRAW_COLUMN',
 'INDEX_COLUMNS',
 'MEASURE_COLUMN',
 'SCENARIO_COLUMN',
 'VALUE_COLUMN',
 'VivariumOutputprocessor',
 '__builtins__',
 '__cached__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_ensure_columns_not_levels',
 '_ensure_iterable',
 'aggregate_mean_lower_upper',
 'assert_values_equal',
 'averted',
 'collections',
 'compare_values',
 'describe',
 'difference',
 'get_mean_lower_upper',
 'inspect',
 'list_columns',
 'marginalize',
 'pd',
 'ratio',
 'set_global_index_columns',
 'stratify',
 'value']

In [12]:
fcns = vp.get_module_functions()
fcns

[('_ensure_columns_not_levels',
  <function model_validation.vivarium_output_processing._ensure_columns_not_levels(df, column_list=None)>),
 ('_ensure_iterable',
  <function model_validation.vivarium_output_processing._ensure_iterable(colnames, df, default=None)>),
 ('aggregate_mean_lower_upper',
  <function model_validation.vivarium_output_processing.aggregate_mean_lower_upper(df_or_groupby, lower_rank=0.025, upper_rank=0.975)>),
 ('assert_values_equal',
  <function model_validation.vivarium_output_processing.assert_values_equal(df1, df2, **kwargs)>),
 ('averted',
  <function model_validation.vivarium_output_processing.averted(measure: pandas.core.frame.DataFrame, baseline_scenario: str, scenario_col=None)>),
 ('compare_values',
  <function model_validation.vivarium_output_processing.compare_values(df1, df2, **kwargs)>),
 ('describe',
  <function model_validation.vivarium_output_processing.describe(df, **describe_kwargs)>),
 ('difference',
  <function model_validation.vivarium_output_

In [17]:
list(zip(*fcns))

[('_ensure_columns_not_levels',
  '_ensure_iterable',
  'aggregate_mean_lower_upper',
  'assert_values_equal',
  'averted',
  'compare_values',
  'describe',
  'difference',
  'get_mean_lower_upper',
  'get_module_functions',
  'list_columns',
  'marginalize',
  'ratio',
  'set_global_index_columns',
  'stratify',
  'value'),
 (<function model_validation.vivarium_output_processing._ensure_columns_not_levels(df, column_list=None)>,
  <function model_validation.vivarium_output_processing._ensure_iterable(colnames, df, default=None)>,
  <function model_validation.vivarium_output_processing.aggregate_mean_lower_upper(df_or_groupby, lower_rank=0.025, upper_rank=0.975)>,
  <function model_validation.vivarium_output_processing.assert_values_equal(df1, df2, **kwargs)>,
  <function model_validation.vivarium_output_processing.averted(measure: pandas.core.frame.DataFrame, baseline_scenario: str, scenario_col=None)>,
  <function model_validation.vivarium_output_processing.compare_values(df1, df2, 

In [19]:
list(map(lambda x: x[0], fcns))

['_ensure_columns_not_levels',
 '_ensure_iterable',
 'aggregate_mean_lower_upper',
 'assert_values_equal',
 'averted',
 'compare_values',
 'describe',
 'difference',
 'get_mean_lower_upper',
 'get_module_functions',
 'list_columns',
 'marginalize',
 'ratio',
 'set_global_index_columns',
 'stratify',
 'value']

In [32]:
p = vp.VivariumOutputProcessor(index_columns=['draw_column', 'scenario_column', 'x_factor_effect'])
dir(p)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_ensure_columns_not_levels',
 '_ensure_iterable',
 '_use_instance_column_names',
 'aggregate_mean_lower_upper',
 'assert_values_equal',
 'averted',
 'compare_values',
 'describe',
 'difference',
 'draw_column',
 'get_mean_lower_upper',
 'get_module_functions',
 'index_columns',
 'list_columns',
 'marginalize',
 'measure_column',
 'ratio',
 'scenario_column',
 'stratify',
 'value',
 'value_column']

In [31]:
p.__gt__(3)

NotImplemented

In [33]:
p.index_columns

['draw_column', 'scenario_column', 'x_factor_effect']

In [34]:
locals()

{'__name__': '__main__',
 '__doc__': 'Automatically created module for IPython interactive environment',
 '__package__': None,
 '__loader__': None,
 '__spec__': None,
 '__builtin__': <module 'builtins' (built-in)>,
 '__builtins__': <module 'builtins' (built-in)>,
 '_ih': ['',
  'import inspect',
  'inspect.getmembers(vp)',
  'inspect.getmembers(vp, inspect.isfunction)',
  'sys.modules',
  "sys.modules['vp']",
  'sys.modules',
  'sys.modules.keys()',
  "sys.modules['model_validation.vivarium_output_processing']",
  'dir(vp)',
  'vp.get_module_functions()',
  'fcns = vp.get_module_functions()\nfcns',
  'zip(*zip(fcns))',
  'list(zip(*zip(fcns)))',
  'list(*zip(fcns))',
  'list(zip(fcns))',
  'list(zip(*fcns))',
  'map(lambda x: x[0], fcns)',
  'list(map(lambda x: x[0], fcns))',
  "p = vp.VivariumOutputProcessor(index_columns=['draw_column', 'scenario_column', 'x_factor_effect'])\np",
  'p.index_columns',
  'dir(P)',
  'dir(p)',
  'for x in range(4) if x != 3:',
  'for x in range(4) if x 

In [43]:
def print_locals():
    print(locals())

def f():
    print(locals())
    locals()['y'] = 5
    print(locals())
    print(y)

In [44]:
print_locals()
f()

{}
{}
{'y': 5}


NameError: name 'y' is not defined

In [45]:
vp.INDEX_COLUMNS

['input_draw', 'scenario']

In [46]:
vp.VALUE_COLUMN

<property at 0x2ad5bd336ae0>

In [47]:
vp.VALUE_COLUMN()

TypeError: 'property' object is not callable

In [48]:
vp.VALUE_COLUMN.fget

<function model_validation.vivarium_output_processing.VALUE_COLUMN()>

In [49]:
vp.VALUE_COLUMN.fget()

'value'

In [52]:
vp.VALUE_COLUMN.fset()

TypeError: 'NoneType' object is not callable

In [55]:
c = vp.C()

In [56]:
c.VALUE_COLUMN

'value'

# Useful notes for implementing a class with the same functionality as my `vivarium_output_processing` module:

https://stackoverflow.com/questions/2933470/how-do-i-call-setattr-on-the-current-module

https://stackoverflow.com/questions/880530/can-modules-have-properties-the-same-way-that-objects-can

https://stackoverflow.com/questions/2447353/getattr-on-a-module

# Test my class implementation

It doesn't work -- the methods seem to still be using global variables rather than instance variables.

In [58]:
data = csr.VivariumResults.cleaned_from_model_spec('4.5.2')
data.table_names()

['wasting_transition_count',
 'wasting_state_person_time',
 'deaths',
 'stunting_state_person_time',
 'population',
 'ylls',
 'ylds',
 'person_time',
 'cause_state_person_time',
 'cause_transition_count']

In [70]:
p = vp.VivariumOutputProcessor(index_columns=['draw_column', 'scenario_column', 'x_factor_effect'])
dir(p)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_ensure_columns_not_levels',
 '_ensure_iterable',
 '_use_instance_variables',
 'aggregate_mean_lower_upper',
 'assert_values_equal',
 'averted',
 'compare_values',
 'describe',
 'difference',
 'draw_column',
 'get_mean_lower_upper',
 'get_module_functions',
 'index_columns',
 'list_columns',
 'marginalize',
 'measure_column',
 'ratio',
 'scenario_column',
 'stratify',
 'value',
 'value_column']

In [60]:
data.wasting_state_person_time

Unnamed: 0,sex,year,wasting_state,measure,input_draw,scenario,x_factor_effect,value,x_factor,sq_lns,wasting_treatment,age
0,female,2022,mild_child_wasting,state_person_time,29,baseline,1.1,0.000000,cat2,covered,covered,early_neonatal
1,female,2022,mild_child_wasting,state_person_time,29,baseline,1.1,0.000000,cat1,covered,covered,early_neonatal
2,female,2022,mild_child_wasting,state_person_time,29,baseline,1.1,0.000000,cat2,covered,uncovered,early_neonatal
3,female,2022,mild_child_wasting,state_person_time,29,baseline,1.1,0.000000,cat1,covered,uncovered,early_neonatal
...,...,...,...,...,...,...,...,...,...,...,...,...
345596,male,2026,susceptible_to_child_wasting,state_person_time,946,wasting_treatment,1.5,16475.270363,cat2,uncovered,covered,2_to_4
345597,male,2026,susceptible_to_child_wasting,state_person_time,946,wasting_treatment,1.5,3419.826146,cat1,uncovered,covered,2_to_4
345598,male,2026,susceptible_to_child_wasting,state_person_time,946,wasting_treatment,1.5,1704.976044,cat2,uncovered,uncovered,2_to_4
345599,male,2026,susceptible_to_child_wasting,state_person_time,946,wasting_treatment,1.5,384.646133,cat1,uncovered,uncovered,2_to_4


In [71]:
df = p.ratio(
    data.wasting_state_person_time,
    data.wasting_state_person_time,
    strata=[],
    numerator_broadcast='wasting_state'
)
df

['draw_column', 'scenario_column', 'x_factor_effect']


Unnamed: 0,input_draw,scenario,wasting_state,value,numerator_measure,denominator_measure,multiplier
0,29,baseline,mild_child_wasting,0.214319,state_person_time,state_person_time,1
1,29,baseline,moderate_acute_malnutrition,0.079999,state_person_time,state_person_time,1
2,29,baseline,severe_acute_malnutrition,0.019436,state_person_time,state_person_time,1
3,29,baseline,susceptible_to_child_wasting,0.686246,state_person_time,state_person_time,1
...,...,...,...,...,...,...,...
140,946,wasting_treatment,mild_child_wasting,0.219033,state_person_time,state_person_time,1
141,946,wasting_treatment,moderate_acute_malnutrition,0.071468,state_person_time,state_person_time,1
142,946,wasting_treatment,severe_acute_malnutrition,0.017188,state_person_time,state_person_time,1
143,946,wasting_treatment,susceptible_to_child_wasting,0.692312,state_person_time,state_person_time,1


In [72]:
p.marginalize(df, 'wasting_state')

['draw_column', 'scenario_column', 'x_factor_effect']


Unnamed: 0,denominator_measure,input_draw,multiplier,numerator_measure,scenario,value
0,state_person_time,29,1,state_person_time,baseline,1.0
1,state_person_time,29,1,state_person_time,sqlns,1.0
2,state_person_time,29,1,state_person_time,wasting_treatment,1.0
3,state_person_time,223,1,state_person_time,baseline,1.0
...,...,...,...,...,...,...
32,state_person_time,829,1,state_person_time,wasting_treatment,1.0
33,state_person_time,946,1,state_person_time,baseline,1.0
34,state_person_time,946,1,state_person_time,sqlns,1.0
35,state_person_time,946,1,state_person_time,wasting_treatment,1.0


In [69]:
p.value(data.wasting_state_person_time, include='wasting_state')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value
wasting_state,input_draw,scenario,Unnamed: 3_level_1
mild_child_wasting,29,baseline,0.000000
mild_child_wasting,29,baseline,0.000000
mild_child_wasting,29,baseline,0.000000
mild_child_wasting,29,baseline,0.000000
...,...,...,...
susceptible_to_child_wasting,946,wasting_treatment,16475.270363
susceptible_to_child_wasting,946,wasting_treatment,3419.826146
susceptible_to_child_wasting,946,wasting_treatment,1704.976044
susceptible_to_child_wasting,946,wasting_treatment,384.646133


In [67]:
p.index_columns

['draw_column', 'scenario_column', 'x_factor_effect']

In [73]:
p.stratify(data.wasting_state_person_time, 'age')

['draw_column', 'scenario_column', 'x_factor_effect']


Unnamed: 0,age,input_draw,scenario,value
0,1-5_months,29,baseline,223857.070500
1,1-5_months,29,sqlns,223857.070500
2,1-5_months,29,wasting_treatment,223857.070500
3,1-5_months,223,baseline,224011.779603
...,...,...,...,...
212,late_neonatal,829,wasting_treatment,31013.894593
213,late_neonatal,946,baseline,30594.175222
214,late_neonatal,946,sqlns,30594.175222
215,late_neonatal,946,wasting_treatment,30594.175222
