The goal of this notebook is to obtain the data required to do the survival analysis for stroke, mi, death and dementia events with the Kaiser population. The analysis is performed using R in a separate notebook. Almost all the code in this notebook has been incorporated to the microsim.validation.py script.

In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

from microsim.outcome import OutcomeType
from microsim.population_factory import PopulationFactory

microsimDir = "/Users/deligkaris.1/OneDrive - The Ohio State University Wexner Medical Center/MICROSIM/CODE/microsim"
os.chdir(microsimDir)
dataDir = "/Users/deligkaris.1/OneDrive - The Ohio State University Wexner Medical Center/MICROSIM/NOTEBOOKS/DATA"

In [2]:
pd.set_option('future.no_silent_downcasting', True) 

In [3]:
%%time
popSize = 500000
pop = PopulationFactory.get_kaiser_population(n=popSize, personFilters=None, wmhSpecific=True)

CPU times: user 20.9 s, sys: 5.85 s, total: 26.8 s
Wall time: 18.1 s


In [None]:
%%time
pop.advance(11, nWorkers=5)

In [None]:
#obtain data for the stroke survival analysis, see figure 1 in Kent2021
strokeInfo = pop.get_outcome_survival_info(outcomesTypeList = [OutcomeType.STROKE],
                                           personFunctionsList = [lambda x: x.get_scd_group(), 
                                                                  lambda x: x.get_wmh_severity_by_modality_group()])
df = pd.DataFrame(strokeInfo, columns=["time","event", "sbiwmhGroup", "severityGroup"])
df.to_csv(dataDir+"/kaiserStrokeValidation13YrTimes.csv", index=False)
df.head()

In [None]:
miInfo = pop.get_outcome_survival_info(outcomesTypeList = [OutcomeType.MI],
                                           personFunctionsList = [lambda x: x.get_scd_group(), 
                                                                  lambda x: x.get_wmh_severity_by_modality_group()])
df = pd.DataFrame(miInfo, columns=["time","event", "sbiwmhGroup", "severityGroup"])
df.to_csv(dataDir+"/kaiserMiValidation13YrTimes.csv", index=False)
df.head()

In [None]:
#obtain data for the dementia survival analysis, see figure 2 in Kent2023
dementiaInfo = pop.get_outcome_survival_info(outcomesTypeList = [OutcomeType.DEMENTIA],
                                           personFunctionsList = [lambda x: x.get_wmh_severity_by_modality_group(),
                                                                  lambda x: int(x.get_outcome_item_first(OutcomeType.WMH, "sbi")),
                                                                  lambda x: int(x.get_outcome_item_first(OutcomeType.WMH, "wmh"))])
df = pd.DataFrame(dementiaInfo, columns=["time","event", "severityGroup", "sbi", "wmh"])
df.to_csv(dataDir+"/kaiserDementiaValidation13YrTimes.csv", index=False)
df.head()

In [None]:
deathInfo = pop.get_outcome_survival_info(outcomesTypeList = [OutcomeType.DEATH],
                                           personFunctionsList = [lambda x: x.get_wmh_severity_by_modality_group(),
                                                                  lambda x: int(x.get_outcome_item_first(OutcomeType.WMH, "sbi")),
                                                                  lambda x: int(x.get_outcome_item_first(OutcomeType.WMH, "wmh"))])
df = pd.DataFrame(deathInfo, columns=["time","event", "severityGroup", "sbi", "wmh"])
df.to_csv(dataDir+"/kaiserDeathValidation13YrTimes.csv", index=False)
df.head()

In [None]:
strokeRatesRef = {1: 36.6, 2: 28.5, 3: 47.4, 0: 8.2, 5:31.2, 6: 13.,  7:34.5, 4: 4.8}
strokeMinCiRef = {1: 34.9, 2: 27.7, 3: 44.5, 0: 8.,  5:28.,  6: 12.4, 7:30.6, 4: 4.5}
strokeMaxCiRef = {1: 38.4, 2: 29.3, 3: 50.5, 0: 8.4, 5:34.6, 6: 13.6, 7:38.7, 4: 5.2}
groupStrings = {1:"CT SBI", 2: "CT WMD", 3: "CT BOTH", 0: "CT NONE", 5:"MRI SBI", 6:"MRI WMD", 7:"MRI BOTH", 4:"MRI NONE"}
strokeRates = pop.get_outcome_incidence_rates_by_scd_and_modality_at_end_of_wave(outcomesTypeList=[OutcomeType.STROKE], wave=3)
strokeRatesList = list()
print("     Group                  Reference     Simulation")
for group in strokeRatesRef.keys():
    strokeRatesList += [ [f"{groupStrings[group]:10} ", 
                                f"{strokeRatesRef[group]:>4.1f} ({strokeMinCiRef[group]:>5.1f} - {strokeMaxCiRef[group]:>4.1f} ) ",
                                f"{strokeRates[group]:<4.1f}" ] ]
    print(f"{groupStrings[group]:>10} " + 
          f"{strokeRatesRef[group]:>10.1f} ({strokeMinCiRef[group]:>5.1f} - {strokeMaxCiRef[group]:>4.1f} ) " +
          f"{strokeRates[group]:>14.1f}")
strokeRatesPd = pd.DataFrame(strokeRatesList, columns=["Group", "Reference", "Simulation"])        
strokeRatesPd.to_csv(dataDir+"/kaiserStrokeValidation4YrRates.csv", index=False)
#strokeRatesPd

In [None]:
dementiaRatesRef = {1:32.8, 2:37.7, 3:51.6, 0:6.7, 5:16.6, 6:9.6, 7:19.1, 4:2.9}
dementiaMinCiRef = {1:31.,  2:36.7, 3:48.3, 0:6.5, 5:14.2, 6:9.1, 7:16.2, 4:2.7}
dementiaMaxCiRef = {1:34.6, 2:38.7, 3:55.1, 0:6.9, 5:19.3, 6:10.1,7:22.4, 4:3.3}
dementiaRates = pop.get_outcome_incidence_rates_by_scd_and_modality_at_end_of_wave(outcomesTypeList=[OutcomeType.DEMENTIA], wave=3)
dementiaRatesList = list()
print("     Group                  Reference     Simulation")
for group in dementiaRatesRef.keys():
    dementiaRatesList += [ [f"{groupStrings[group]:>10} ", 
          f"{dementiaRatesRef[group]:>10.1f} ({dementiaMinCiRef[group]:>5.1f} - {dementiaMaxCiRef[group]:>4.1f} ) ",
          f"{dementiaRates[group]:>14.1f}"] ]
    print(f"{groupStrings[group]:>10} " + 
          f"{dementiaRatesRef[group]:>10.1f} ({dementiaMinCiRef[group]:>5.1f} - {dementiaMaxCiRef[group]:>4.1f} ) " +
          f"{dementiaRates[group]:>14.1f}")
dementiaRatesPd = pd.DataFrame(dementiaRatesList, columns=["Group", "Reference", "Simulation"])        
dementiaRatesPd.to_csv(dataDir+"/kaiserDementiaValidation4YrRates.csv", index=False)

In [None]:
miRates = pop.get_outcome_incidence_rates_by_scd_and_modality_at_end_of_wave(outcomesTypeList=[OutcomeType.MI], wave=3)
#print("     Group                  Reference     Simulation")
print("     Group     Simulation")
miRatesList = list()
for group in groupStrings.keys():
    miRatesList += [ [f"{groupStrings[group]:>10} ",  
                      f"{miRates[group]:>14.1f}"] ]
    print(f"{groupStrings[group]:>10} " + 
          f"{miRates[group]:>14.1f}")
miRatesPd = pd.DataFrame(miRatesList, columns=["Group", "Simulation"])        
miRatesPd.to_csv(dataDir+"/kaiserMiValidation4YrRates.csv", index=False)

In [None]:
deathRatesRef = {1:61.5, 2: 63.8, 3: 84.9, 0:18.2, 5:49.2, 6:28.5, 7:53.7, 4:14.}
deathMinCiRef = {1:59.1, 2:62.6,  3: 80.9, 0:17.8, 5:45.1, 6:27.6, 7:48.8, 4:13.4}
deathMaxCiRef = {1:63.9, 2:65.1,  3:89.2,  0:18.5, 5:53.6, 6:29.4, 7:59.0, 4:14.6}
deathRates = pop.get_outcome_incidence_rates_by_scd_and_modality_at_end_of_wave(outcomesTypeList=[OutcomeType.DEATH], wave=3)
deathRatesList = list()
print("     Group                  Reference     Simulation")
for group in deathRatesRef.keys():
    deathRatesList += [ [f"{groupStrings[group]:>10} ", 
                         f"{deathRatesRef[group]:>10.1f} ({deathMinCiRef[group]:>5.1f} - {deathMaxCiRef[group]:>4.1f} ) ",
                         f"{deathRates[group]:>14.1f}"] ]
    print(f"{groupStrings[group]:>10} " + 
          f"{deathRatesRef[group]:>10.1f} ({deathMinCiRef[group]:>5.1f} - {deathMaxCiRef[group]:>4.1f} ) " +
          f"{deathRates[group]:>14.1f}")
deathRatesPd = pd.DataFrame(deathRatesList, columns=["Group", "Reference", "Simulation"])        
deathRatesPd.to_csv(dataDir+"/kaiserDeathValidation4YrRates.csv", index=False)

In [None]:
(pop.get_outcome_incidence_rates_at_end_of_wave(outcomesTypeList=[OutcomeType.STROKE],  wave=3),
pop.get_outcome_incidence_rates_at_end_of_wave(outcomesTypeList=[OutcomeType.DEMENTIA], wave=3),
pop.get_outcome_incidence_rates_at_end_of_wave(outcomesTypeList=[OutcomeType.DEATH], wave=3),
pop.get_outcome_incidence_rates_at_end_of_wave(outcomesTypeList=[OutcomeType.MI], wave=3))