In [1]:
from datetime import datetime, timedelta
from pathlib import Path

import itertools
import matplotlib
from vivarium import Artifact, InteractiveContext
import ipywidgets
import pandas as pd, numpy as np
import seaborn as sns
pd.set_option('display.max_rows', 8)

In [2]:
import pickle
from vivarium_ciff_sam.constants import data_keys, metadata

The purpose of this notebook is to verify:

1. if the age-specific state table relative risks are equivalent to the pipeline relative risks
2. if the pipeline relative risk updates appropriately upon transition from the early to late neonatal age group
3. if the pipeline relative risk value does not change across timesteps other than for the age group transition

In [3]:
path = Path('/ihme/homes/alibow/vivarium_ciff_sam/src/vivarium_ciff_sam/model_specifications/ciff_sam.yaml')
path

PosixPath('/ihme/homes/alibow/vivarium_ciff_sam/src/vivarium_ciff_sam/model_specifications/ciff_sam.yaml')

In [4]:
# manually updated the following parameters in the model specification file because it threw an error otherwise
    # population_size=100000
    # age_end=0.0767123287671233
    # exit_age=0.0767123287671233

sim = InteractiveContext(Path(path))

2022-01-18 09:43:16.968 | DEBUG    | vivarium.framework.values:register_value_modifier:375 - Registering metrics.1.population_manager.metrics as modifier to metrics
2022-01-18 09:43:16.995 | DEBUG    | vivarium.framework.artifact.manager:_load_artifact:65 - Running simulation from artifact located at /ihme/costeffectiveness/artifacts/vivarium_ciff_sam/ethiopia.hdf.
2022-01-18 09:43:16.996 | DEBUG    | vivarium.framework.artifact.manager:_load_artifact:66 - Artifact base filter terms are ['draw == 0'].
2022-01-18 09:43:16.997 | DEBUG    | vivarium.framework.artifact.manager:_load_artifact:67 - Artifact additional filter terms are None.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a 

2022-01-18 09:43:23.721 | DEBUG    | vivarium.framework.values:_register_value_producer:325 - Registering value pipeline low_birth_weight_and_short_gestation.propensity
2022-01-18 09:43:23.722 | DEBUG    | vivarium.framework.values:_register_value_producer:325 - Registering value pipeline low_birth_weight_and_short_gestation.exposure
2022-01-18 09:43:23.900 | DEBUG    | vivarium.framework.values:_register_value_producer:325 - Registering value pipeline risk_factor.low_birth_weight_and_short_gestation.exposure_parameters
2022-01-18 09:43:23.901 | DEBUG    | vivarium.framework.values:_register_value_producer:325 - Registering value pipeline low_birth_weight.propensity
2022-01-18 09:43:23.902 | DEBUG    | vivarium.framework.values:_register_value_producer:325 - Registering value pipeline low_birth_weight.exposure
2022-01-18 09:43:23.914 | DEBUG    | vivarium.framework.values:_register_value_producer:325 - Registering value pipeline short_gestation.propensity
2022-01-18 09:43:23.915 | DEBU

## Capture birthweight at initialization
For the early and late neonatal age groups

In [5]:
pop0 = sim.get_population()
pop0

Unnamed: 0,tracked,sex,location,alive,age,exit_time,entrance_time,years_of_life_lost,cause_of_death,diarrheal_diseases,...,susceptible_to_lower_respiratory_infections_event_count,susceptible_to_lower_respiratory_infections_event_time,lower_respiratory_infections_event_count,lower_respiratory_infections_event_time,effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_diarrheal_diseases_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_diarrheal_diseases_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_lower_respiratory_infections_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_lower_respiratory_infections_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_affected_unmodeled_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_affected_unmodeled_relative_risk
0,True,Male,Ethiopia,alive,0.044994,NaT,2021-12-31 12:00:00,0.0,not_dead,diarrheal_diseases,...,0,NaT,0,NaT,4.861793,4.244653,4.861793,4.244653,4.861793,4.244653
1,True,Male,Ethiopia,alive,0.046464,NaT,2021-12-31 12:00:00,0.0,not_dead,susceptible_to_diarrheal_diseases,...,0,NaT,0,NaT,1.000728,1.019543,1.000728,1.019543,1.000728,1.019543
2,True,Female,Ethiopia,alive,0.052629,NaT,2021-12-31 12:00:00,0.0,not_dead,susceptible_to_diarrheal_diseases,...,0,NaT,0,NaT,1.696371,2.288050,1.696371,2.288050,1.696371,2.288050
3,True,Female,Ethiopia,alive,0.075327,NaT,2021-12-31 12:00:00,0.0,not_dead,susceptible_to_diarrheal_diseases,...,0,NaT,0,NaT,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9996,True,Female,Ethiopia,alive,0.060705,NaT,2021-12-31 12:00:00,0.0,not_dead,susceptible_to_diarrheal_diseases,...,0,NaT,0,NaT,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
9997,True,Female,Ethiopia,alive,0.017234,NaT,2021-12-31 12:00:00,0.0,not_dead,susceptible_to_diarrheal_diseases,...,0,NaT,0,NaT,1.108534,1.452202,1.108534,1.452202,1.108534,1.452202
9998,True,Female,Ethiopia,alive,0.068694,NaT,2021-12-31 12:00:00,0.0,not_dead,susceptible_to_diarrheal_diseases,...,0,NaT,0,NaT,1.000000,1.367767,1.000000,1.367767,1.000000,1.367767
9999,True,Male,Ethiopia,alive,0.066979,NaT,2021-12-31 12:00:00,0.0,not_dead,susceptible_to_diarrheal_diseases,...,0,NaT,0,NaT,1.140750,1.432534,1.140750,1.432534,1.140750,1.432534


In [6]:
art = Artifact('/ihme/costeffectiveness/artifacts/vivarium_ciff_sam/ethiopia.hdf',
               filter_terms=['year_start == 2019', f'age_end <= 5'])

In [67]:
interpolators = art.load(data_keys.LBWSG.RELATIVE_RISK_INTERPOLATOR)['draw_0'].reset_index()
interpolators = (
    # isolate RRs for target and drop non-neonatal age groups since they have RR == 1.0
    interpolators[interpolators['age_end'] < 0.5]
    .drop(columns=['age_end', 'year_start', 'year_end'])
    .set_index(['sex', 'draw_0'])
    .apply(lambda row: (metadata.AGE_GROUP.EARLY_NEONATAL_ID if row['age_start'] == 0.0
                        else metadata.AGE_GROUP.LATE_NEONATAL_ID), axis=1)
    .rename('age_group_id')
    .reset_index()
    .set_index(['sex', 'age_group_id'])
)['draw_0']

interpolators = interpolators.apply(lambda x: pickle.loads(bytes.fromhex(x)))
interpolators

sex     age_group_id
Female  2               <scipy.interpolate.fitpack2.RectBivariateSplin...
        3               <scipy.interpolate.fitpack2.RectBivariateSplin...
Male    2               <scipy.interpolate.fitpack2.RectBivariateSplin...
        3               <scipy.interpolate.fitpack2.RectBivariateSplin...
Name: draw_0, dtype: object

In [68]:
# inputs for interpolator function
is_tmrel = (sim.get_value('low_birth_weight_and_short_gestation.exposure')(pop0.index)
            .isin(data_keys.LBWSG.TMREL_CATEGORIES))
is_male = pop0['sex']=='Male'
is_enn = pop0['age']<7/365
gestational_age = sim.get_value('short_gestation.exposure')(pop0.index)
birth_weight = sim.get_value('low_birth_weight.exposure')(pop0.index)

In [69]:
log_relative_risk = pd.Series(0.0, index=pop0.index, name='enn_interpolated_rr') #create series initialized at  0
log_relative_risk[is_male & ~is_tmrel] = (
    interpolators['Male', 2](gestational_age[is_male & ~is_tmrel],
                                            birth_weight[is_male & ~is_tmrel], grid=False)
)
log_relative_risk[~is_male & ~is_tmrel] = (
    interpolators['Female', 2](gestational_age[~is_male & ~is_tmrel],
                                              birth_weight[~is_male & ~is_tmrel], grid=False)
)
enn_rrs = np.exp(log_relative_risk)
enn_rrs

0       4.861793
1       1.000728
2       1.696371
3       1.000000
          ...   
9996    1.000000
9997    1.108534
9998    1.000000
9999    1.140750
Name: enn_interpolated_rr, Length: 10000, dtype: float64

In [70]:
log_relative_risk = pd.Series(0.0, index=pop0.index, name='lnn_interpolated_rr') #create series initialized at  0
log_relative_risk[is_male & ~is_tmrel] = (
    interpolators['Male', 3](gestational_age[is_male & ~is_tmrel],
                                            birth_weight[is_male & ~is_tmrel], grid=False)
)
log_relative_risk[~is_male & ~is_tmrel] = (
    interpolators['Female', 3](gestational_age[~is_male & ~is_tmrel],
                                              birth_weight[~is_male & ~is_tmrel], grid=False)
)
lnn_rrs = np.exp(log_relative_risk)

In [72]:
#data = pd.concat([pop0.loc[:, ['age','sex',
#                              'effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_diarrheal_diseases_relative_risk',
#                              'effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_diarrheal_diseases_relative_risk']],
#                 sim.get_value('low_birth_weight_and_short_gestation.exposure')(pop0.index),
#                 sim.get_value('low_birth_weight.exposure')(pop0.index),
#                 sim.get_value('effect_of_low_birth_weight_and_short_gestation_on_diarrheal_diseases.relative_risk')(pop0.index),
#                 sim.get_value('short_gestation.exposure')(pop0.index)], 
#                 axis=1).rename(columns={0:'relative_risk'})
#data = pd.concat([data,enn_rrs,lnn_rrs],axis=1)
#assert np.all(test.enn_interpolated_rr==test.effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_affected_unmodeled_relative_risk)
#assert np.all(test.lnn_interpolated_rr==test.effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_affected_unmodeled_relative_risk)
data

Unnamed: 0,age,sex,effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_diarrheal_diseases_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_diarrheal_diseases_relative_risk,risk_factor.low_birth_weight_and_short_gestation_exposure,low_birth_weight.exposure,relative_risk,short_gestation.exposure,enn_interpolated_rr,lnn_interpolated_rr,timestep
0,0.044994,Male,4.244653,4.861793,cat40,2580.395737,4.244653,36.164988,4.861793,4.244653,0
1,0.046464,Male,1.019543,1.000728,cat52,3448.290620,1.019543,38.995680,1.000728,1.019543,0
2,0.052629,Female,2.288050,1.696371,cat42,2926.178834,2.288050,37.375198,1.696371,2.288050,0
3,0.075327,Female,1.000000,1.000000,cat54,3681.270909,1.000000,39.985003,1.000000,1.000000,0
...,...,...,...,...,...,...,...,...,...,...,...
9996,0.060705,Female,1.000000,1.000000,cat54,3657.393446,1.000000,38.801412,1.000000,1.000000,0
9997,0.017234,Female,1.452202,1.108534,cat48,3396.859007,1.108534,37.890959,1.108534,1.452202,0
9998,0.068694,Female,1.367767,1.000000,cat52,3286.124118,1.367767,39.102706,1.000000,1.367767,0
9999,0.066979,Male,1.432534,1.140750,cat48,3231.580577,1.432534,37.984494,1.140750,1.432534,0


In [38]:
#data_t = data.copy()
#data['timestep'] = 0 
#all_data = data.copy()
for x in list(range(1,15)):
    # take time step and initialize pop tables
    sim.step()
    data_t_minus_1 = data_t.copy()
    pop_t = sim.get_population()
    
    # interpolate rrs
    is_tmrel = (sim.get_value('low_birth_weight_and_short_gestation.exposure')(pop_t.index)
                .isin(data_keys.LBWSG.TMREL_CATEGORIES))
    is_male = pop_t['sex']=='Male'
    is_enn = pop_t['age']<7/365
    gestational_age = sim.get_value('short_gestation.exposure')(pop_t.index)
    birth_weight = sim.get_value('low_birth_weight.exposure')(pop_t.index)
    log_relative_risk = pd.Series(0.0, index=pop_t.index, name='enn_interpolated_rr') 
    log_relative_risk[is_male & ~is_tmrel] = (
        interpolators['Male', 2](gestational_age[is_male & ~is_tmrel],
                                                birth_weight[is_male & ~is_tmrel], grid=False)
    )
    log_relative_risk[~is_male & ~is_tmrel] = (
        interpolators['Female', 2](gestational_age[~is_male & ~is_tmrel],
                                                  birth_weight[~is_male & ~is_tmrel], grid=False)
    )
    enn_rrs = np.exp(log_relative_risk)
    log_relative_risk = pd.Series(0.0, index=pop_t.index, name='lnn_interpolated_rr')
    log_relative_risk[is_male & ~is_tmrel] = (
        interpolators['Male', 3](gestational_age[is_male & ~is_tmrel],
                                                birth_weight[is_male & ~is_tmrel], grid=False)
    )
    log_relative_risk[~is_male & ~is_tmrel] = (
        interpolators['Female', 3](gestational_age[~is_male & ~is_tmrel],
                                                  birth_weight[~is_male & ~is_tmrel], grid=False)
    )
    lnn_rrs = np.exp(log_relative_risk)
    
    data_t = pd.concat([pop_t.loc[:, ['age','sex', 'alive', 'tracked', 'entrance_time', 'exit_time',
                              'effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_diarrheal_diseases_relative_risk',
                              'effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_diarrheal_diseases_relative_risk']],
                 sim.get_value('low_birth_weight_and_short_gestation.exposure')(pop_t.index),
                 sim.get_value('low_birth_weight.exposure')(pop_t.index),
                 sim.get_value('effect_of_low_birth_weight_and_short_gestation_on_diarrheal_diseases.relative_risk')(pop_t.index),
                 sim.get_value('short_gestation.exposure')(pop_t.index)], 
                 axis=1).rename(columns={0:'relative_risk'})
    data_t = pd.concat([data_t,enn_rrs,lnn_rrs],axis=1)
    
    data_t['timestep'] = x
    all_data = pd.concat([all_data,data_t])


2022-01-18 09:54:58.638 | DEBUG    | vivarium.framework.engine:step:142 - 2022-01-08 00:00:00
  column = self.clip_to_seconds(column.astype(np.int64))
2022-01-18 09:55:02.080 | DEBUG    | vivarium.framework.engine:step:142 - 2022-01-08 12:00:00
2022-01-18 09:55:05.502 | DEBUG    | vivarium.framework.engine:step:142 - 2022-01-09 00:00:00
2022-01-18 09:55:08.933 | DEBUG    | vivarium.framework.engine:step:142 - 2022-01-09 12:00:00
2022-01-18 09:55:12.505 | DEBUG    | vivarium.framework.engine:step:142 - 2022-01-10 00:00:00
2022-01-18 09:55:15.927 | DEBUG    | vivarium.framework.engine:step:142 - 2022-01-10 12:00:00
2022-01-18 09:55:19.184 | DEBUG    | vivarium.framework.engine:step:142 - 2022-01-11 00:00:00
2022-01-18 09:55:22.653 | DEBUG    | vivarium.framework.engine:step:142 - 2022-01-11 12:00:00
2022-01-18 09:55:26.886 | DEBUG    | vivarium.framework.engine:step:142 - 2022-01-12 00:00:00
2022-01-18 09:55:30.072 | DEBUG    | vivarium.framework.engine:step:142 - 2022-01-12 12:00:00
202

In [39]:
all_data

Unnamed: 0,age,sex,effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_diarrheal_diseases_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_diarrheal_diseases_relative_risk,risk_factor.low_birth_weight_and_short_gestation_exposure,low_birth_weight.exposure,relative_risk,short_gestation.exposure,enn_interpolated_rr,lnn_interpolated_rr,timestep,alive,tracked,entrance_time,exit_time
0,0.044994,Male,4.244653,4.861793,cat40,2580.395737,4.244653,36.164988,4.861793,4.244653,0,,,NaT,NaT
1,0.046464,Male,1.019543,1.000728,cat52,3448.290620,1.019543,38.995680,1.000728,1.019543,0,,,NaT,NaT
2,0.052629,Female,2.288050,1.696371,cat42,2926.178834,2.288050,37.375198,1.696371,2.288050,0,,,NaT,NaT
3,0.075327,Female,1.000000,1.000000,cat54,3681.270909,1.000000,39.985003,1.000000,1.000000,0,,,NaT,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15057,0.000387,Female,1.850213,1.167391,cat52,3006.503460,1.167391,39.506639,1.167391,1.850213,14,alive,True,2022-01-14 12:00:00,NaT
15058,0.000914,Male,1.519061,1.161336,cat51,3153.268700,1.161336,41.583316,1.161336,1.519061,14,alive,True,2022-01-14 12:00:00,NaT
15059,0.000515,Female,5.574996,4.187688,cat36,2450.263469,4.187688,36.626378,4.187688,5.574996,14,alive,True,2022-01-14 12:00:00,NaT
15060,0.001030,Male,2.634015,1.888940,cat44,2735.789303,1.888940,38.981567,1.888940,2.634015,14,alive,True,2022-01-14 12:00:00,NaT


In [40]:
all_data.loc[(all_data.age<7/365)&
            (all_data.relative_risk!=all_data.enn_interpolated_rr)]

Unnamed: 0,age,sex,effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_diarrheal_diseases_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_diarrheal_diseases_relative_risk,risk_factor.low_birth_weight_and_short_gestation_exposure,low_birth_weight.exposure,relative_risk,short_gestation.exposure,enn_interpolated_rr,lnn_interpolated_rr,timestep,alive,tracked,entrance_time,exit_time


In [41]:
all_data.loc[(all_data.age>7/365)&
            (all_data.relative_risk!=all_data.lnn_interpolated_rr)]

Unnamed: 0,age,sex,effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_diarrheal_diseases_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_diarrheal_diseases_relative_risk,risk_factor.low_birth_weight_and_short_gestation_exposure,low_birth_weight.exposure,relative_risk,short_gestation.exposure,enn_interpolated_rr,lnn_interpolated_rr,timestep,alive,tracked,entrance_time,exit_time
118,0.019310,Female,1.414896,1.332400,cat48,3224.761362,1.414896,37.409889,1.203324,1.681791,1,alive,True,2021-12-31 12:00:00,NaT
255,0.019772,Male,3.750499,13.817846,cat42,2719.160985,3.750499,37.078773,2.509580,3.105795,1,alive,True,2021-12-31 12:00:00,NaT
329,0.019553,Male,1.269349,2.143247,cat51,3477.025667,1.269349,40.297039,1.000000,1.103268,1,alive,True,2021-12-31 12:00:00,NaT
396,0.020389,Female,1.305593,1.058833,cat53,4474.464719,1.305593,38.446207,1.000000,1.000000,1,alive,True,2021-12-31 12:00:00,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12520,0.020424,Female,3.482163,2.596194,cat42,2719.042852,3.482163,37.602564,2.098419,2.817657,14,alive,True,2022-01-07 12:00:00,NaT
12522,0.019989,Male,3.587184,2.975257,cat45,3105.229928,3.587184,36.071767,3.102668,2.509207,14,alive,True,2022-01-07 12:00:00,NaT
12524,0.019535,Male,15.949349,35.542693,cat27,1564.620416,15.949349,31.466664,47.362382,31.743178,14,alive,True,2022-01-07 12:00:00,NaT
12529,0.019495,Female,177.856176,171.553906,cat14,752.544417,177.856176,30.691692,135.736546,151.270806,14,alive,True,2022-01-07 12:00:00,NaT


In [42]:
all_data.loc[(all_data.age<7/365)&
            (all_data.effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_diarrheal_diseases_relative_risk
             !=all_data.enn_interpolated_rr)]

Unnamed: 0,age,sex,effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_diarrheal_diseases_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_diarrheal_diseases_relative_risk,risk_factor.low_birth_weight_and_short_gestation_exposure,low_birth_weight.exposure,relative_risk,short_gestation.exposure,enn_interpolated_rr,lnn_interpolated_rr,timestep,alive,tracked,entrance_time,exit_time


In [43]:
mismatch_interp = all_data.loc[#(all_data.age>7/365)&
            (all_data.effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_diarrheal_diseases_relative_risk
             !=all_data.lnn_interpolated_rr)]
mismatch_interp.age.describe()

count    16548.000000
mean         0.031983
std          0.009169
min          0.019178
25%          0.024226
50%          0.030348
75%          0.038357
max          0.057491
Name: age, dtype: float64

In [84]:
# sims born into sim have IDs between 10,000 and 12,533

mismatched = []
matched = []

for i in list(range(10001,15060)):
    if all_data.loc[i]['age'].max() > 7/365:
        if len(all_data.loc[i]['low_birth_weight.exposure'].unique())>1:
            mismatched.append(i)
        else:
            matched.append(i)

In [85]:
# sims born into sim have IDs between 10,000 and 12,533

ga_mismatched = []
ga_matched = []

for i in list(range(10001,15060)):
    if all_data.loc[i]['age'].max() > 7/365:
        if len(all_data.loc[i]['short_gestation.exposure'].unique())>1:
            ga_mismatched.append(i)
        else:
            ga_matched.append(i)

In [47]:
len(matched)

1969

In [48]:
len(mismatched)

520

In [86]:
len(ga_matched)

1988

In [87]:
len(ga_mismatched)

501

In [75]:
all_data.loc[mismatched]

Unnamed: 0,age,sex,effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_diarrheal_diseases_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_diarrheal_diseases_relative_risk,risk_factor.low_birth_weight_and_short_gestation_exposure,low_birth_weight.exposure,relative_risk,short_gestation.exposure,enn_interpolated_rr,lnn_interpolated_rr,timestep,alive,tracked,entrance_time,exit_time
10002,0.001268,Female,1.475793,1.160717,cat48,3466.780442,1.160717,37.541801,1.160717,1.475793,1,alive,True,2022-01-01 00:00:00,NaT
10002,0.002637,Female,1.475793,1.160717,cat48,3466.780442,1.160717,37.541801,1.160717,1.475793,2,alive,True,2022-01-01 00:00:00,NaT
10002,0.004006,Female,1.475793,1.160717,cat48,3466.780442,1.160717,37.541801,1.160717,1.475793,3,alive,True,2022-01-01 00:00:00,NaT
10002,0.005375,Female,1.475793,1.160717,cat48,3466.780442,1.160717,37.541801,1.160717,1.475793,4,alive,True,2022-01-01 00:00:00,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12524,0.015429,Male,15.949349,35.542693,cat26,2064.620416,35.542693,31.466664,35.542693,15.949349,11,alive,True,2022-01-07 12:00:00,NaT
12524,0.016797,Male,15.949349,35.542693,cat26,2064.620416,35.542693,31.466664,35.542693,15.949349,12,alive,True,2022-01-07 12:00:00,NaT
12524,0.018166,Male,15.949349,35.542693,cat26,2064.620416,35.542693,31.466664,35.542693,15.949349,13,alive,True,2022-01-07 12:00:00,NaT
12524,0.019535,Male,15.949349,35.542693,cat27,1564.620416,15.949349,31.466664,47.362382,31.743178,14,alive,True,2022-01-07 12:00:00,NaT


In [76]:
all_data.loc[10002]

Unnamed: 0,age,sex,effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_diarrheal_diseases_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_diarrheal_diseases_relative_risk,risk_factor.low_birth_weight_and_short_gestation_exposure,low_birth_weight.exposure,relative_risk,short_gestation.exposure,enn_interpolated_rr,lnn_interpolated_rr,timestep,alive,tracked,entrance_time,exit_time
10002,0.001268,Female,1.475793,1.160717,cat48,3466.780442,1.160717,37.541801,1.160717,1.475793,1,alive,True,2022-01-01,NaT
10002,0.002637,Female,1.475793,1.160717,cat48,3466.780442,1.160717,37.541801,1.160717,1.475793,2,alive,True,2022-01-01,NaT
10002,0.004006,Female,1.475793,1.160717,cat48,3466.780442,1.160717,37.541801,1.160717,1.475793,3,alive,True,2022-01-01,NaT
10002,0.005375,Female,1.475793,1.160717,cat48,3466.780442,1.160717,37.541801,1.160717,1.475793,4,alive,True,2022-01-01,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10002,0.034122,Female,1.475793,1.160717,cat47,3966.780442,1.475793,36.541801,3.280661,1.901827,11,alive,True,2022-01-01,NaT
10002,0.035491,Female,1.475793,1.160717,cat47,3966.780442,1.475793,36.541801,3.280661,1.901827,12,alive,True,2022-01-01,NaT
10002,0.036860,Female,1.475793,1.160717,cat47,3966.780442,1.475793,36.541801,3.280661,1.901827,13,alive,True,2022-01-01,NaT
10002,0.038229,Female,1.475793,1.160717,cat47,3966.780442,1.475793,36.541801,3.280661,1.901827,14,alive,True,2022-01-01,NaT


In [81]:
all_data.loc[12524]

Unnamed: 0,age,sex,effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_diarrheal_diseases_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_diarrheal_diseases_relative_risk,risk_factor.low_birth_weight_and_short_gestation_exposure,low_birth_weight.exposure,relative_risk,short_gestation.exposure,enn_interpolated_rr,lnn_interpolated_rr,timestep,alive,tracked,entrance_time,exit_time
12524,0.000370,Male,15.949349,35.542693,cat26,2064.620416,35.542693,31.466664,35.542693,15.949349,14,alive,True,2022-01-07 12:00:00,NaT
12524,0.001739,Male,15.949349,35.542693,cat26,2064.620416,35.542693,31.466664,35.542693,15.949349,1,alive,True,2022-01-07 12:00:00,NaT
12524,0.003108,Male,15.949349,35.542693,cat26,2064.620416,35.542693,31.466664,35.542693,15.949349,2,alive,True,2022-01-07 12:00:00,NaT
12524,0.004477,Male,15.949349,35.542693,cat26,2064.620416,35.542693,31.466664,35.542693,15.949349,3,alive,True,2022-01-07 12:00:00,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12524,0.015429,Male,15.949349,35.542693,cat26,2064.620416,35.542693,31.466664,35.542693,15.949349,11,alive,True,2022-01-07 12:00:00,NaT
12524,0.016797,Male,15.949349,35.542693,cat26,2064.620416,35.542693,31.466664,35.542693,15.949349,12,alive,True,2022-01-07 12:00:00,NaT
12524,0.018166,Male,15.949349,35.542693,cat26,2064.620416,35.542693,31.466664,35.542693,15.949349,13,alive,True,2022-01-07 12:00:00,NaT
12524,0.019535,Male,15.949349,35.542693,cat27,1564.620416,15.949349,31.466664,47.362382,31.743178,14,alive,True,2022-01-07 12:00:00,NaT


In [82]:
all_data.loc[mismatched[10]]

Unnamed: 0,age,sex,effect_of_low_birth_weight_and_short_gestation_on_late_neonatal_diarrheal_diseases_relative_risk,effect_of_low_birth_weight_and_short_gestation_on_early_neonatal_diarrheal_diseases_relative_risk,risk_factor.low_birth_weight_and_short_gestation_exposure,low_birth_weight.exposure,relative_risk,short_gestation.exposure,enn_interpolated_rr,lnn_interpolated_rr,timestep,alive,tracked,entrance_time,exit_time
10037,0.000687,Female,1.672086,1.07985,cat52,3100.290422,1.079850,39.139636,1.07985,1.672086,1,alive,True,2022-01-01,NaT
10037,0.002056,Female,1.672086,1.07985,cat52,3100.290422,1.079850,39.139636,1.07985,1.672086,2,alive,True,2022-01-01,NaT
10037,0.003425,Female,1.672086,1.07985,cat52,3100.290422,1.079850,39.139636,1.07985,1.672086,3,alive,True,2022-01-01,NaT
10037,0.004794,Female,1.672086,1.07985,cat52,3100.290422,1.079850,39.139636,1.07985,1.672086,4,alive,True,2022-01-01,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10037,0.033542,Female,1.672086,1.07985,cat53,4100.290422,1.672086,39.139636,1.00000,1.000000,11,alive,True,2022-01-01,NaT
10037,0.034910,Female,1.672086,1.07985,cat53,4100.290422,1.672086,39.139636,1.00000,1.000000,12,alive,True,2022-01-01,NaT
10037,0.036279,Female,1.672086,1.07985,cat53,4100.290422,1.672086,39.139636,1.00000,1.000000,13,alive,True,2022-01-01,NaT
10037,0.037648,Female,1.672086,1.07985,cat53,4100.290422,1.672086,39.139636,1.00000,1.000000,14,alive,True,2022-01-01,NaT


In [89]:
test = []
for i in mismatched:
    if len(all_data.loc[i]['risk_factor.low_birth_weight_and_short_gestation_exposure'].unique())!=2:
        test.append(i)

In [90]:
test

[]

In [61]:
all_data.loc[matched]['low_birth_weight.exposure'].describe()

count    11204.000000
mean      3353.703189
std        753.806355
min       1076.871398
25%       2870.942632
50%       3368.429868
75%       4009.426227
max       4499.744863
Name: low_birth_weight.exposure, dtype: float64

In [51]:
all_data.loc[mismatched]['low_birth_weight.exposure'].describe()

count    11204.000000
mean      3353.703189
std        753.806355
min       1076.871398
25%       2870.942632
50%       3368.429868
75%       4009.426227
max       4499.744863
Name: low_birth_weight.exposure, dtype: float64

In [63]:
all_data.loc[mismatched]['enn_interpolated_rr'].describe()

count    11204.000000
mean         7.555473
std         14.745351
min          1.000000
25%          1.000000
50%          1.519681
75%          6.008915
max        106.330732
Name: enn_interpolated_rr, dtype: float64

In [64]:
all_data.loc[matched]['enn_interpolated_rr'].describe()

count    42186.000000
mean         3.824029
std         13.565274
min          1.000000
25%          1.000000
50%          1.091879
75%          1.760753
max        484.337849
Name: enn_interpolated_rr, dtype: float64

In [65]:
all_data.loc[mismatched]['lnn_interpolated_rr'].describe()

count    11204.000000
mean         4.547918
std          8.572915
min          1.000000
25%          1.000000
50%          1.647384
75%          3.939780
max         82.443945
Name: lnn_interpolated_rr, dtype: float64

In [66]:
all_data.loc[matched]['lnn_interpolated_rr'].describe()

count    42186.000000
mean         3.333596
std         11.481474
min          1.000000
25%          1.000000
50%          1.357269
75%          2.418279
max        403.238926
Name: lnn_interpolated_rr, dtype: float64

In [78]:
all_data.loc[matched]['alive'].value_counts()

alive    42176
dead        10
Name: alive, dtype: int64