The goal of this notebook is to obtain the data required to do the survival analysis for stroke and dementia events with the Kaiser population. The analysis is performed using R in a separate notebook.

In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

from microsim.outcome import OutcomeType
from microsim.population_factory import PopulationFactory

microsimDir = "/Users/deligkaris.1/OneDrive - The Ohio State University Wexner Medical Center/MICROSIM/CODE/microsim"
os.chdir(microsimDir)
dataDir = "/Users/deligkaris.1/OneDrive - The Ohio State University Wexner Medical Center/MICROSIM/NOTEBOOKS/DATA"

In [2]:
pd.set_option('future.no_silent_downcasting', True) 

In [3]:
%%time
popSize = 500000
pop = PopulationFactory.get_kaiser_population(n=popSize)

  df = pd.concat([df,dfForGroup])


CPU times: user 2min 2s, sys: 14.8 s, total: 2min 17s
Wall time: 2min 8s


In [4]:
%%time
pop.advance(13, nWorkers=5)

  return bound(*args, **kwds)


CPU times: user 2min 46s, sys: 7min 37s, total: 10min 24s
Wall time: 44min 49s


In [5]:
#define functions used to obtain the information needed
def get_stroke_scd_group(sbi, wmh):
    if ((sbi==0) & (wmh==0)):
        return 0
    elif ((sbi==1) & (wmh==0)):
        return 1
    elif ((sbi==0) & (wmh==1)):
        return 2
    elif ((sbi==1) & (wmh==1)):
        return 3
    
def get_stroke_km_info(person):
    time = person.get_min_wave_of_first_outcomes_or_last_wave([OutcomeType.STROKE])+1
    stroke = int(person._stroke)
    sbi = int(person._outcomes[OutcomeType.WMH][0][1].sbi)
    wmh = int(person._outcomes[OutcomeType.WMH][0][1].wmh)
    scdGroup = get_stroke_scd_group(sbi, wmh)
    return (time, stroke, scdGroup)

def get_dementia_scd_group(modality, severityUnknown, severity, wmh):
    if modality == 'ct':
        if not wmh:
            return 0
        else:
            if severityUnknown:
                return 1
            else:
                if severity=='mild':
                    return 2
                elif severity=='moderate':
                    return 3
                elif severity=='severe':
                    return 4
                else:
                    raise RuntimeError("unknown severity")
    elif modality == 'mr':
        if not wmh:
            return 5
        else:
            if severityUnknown:
                return 6
            else:
                if severity=='mild':
                    return 7
                elif severity=='moderate':
                    return 8
                elif severity=='severe':
                    return 9
                else:
                    raise RuntimeError("unknown severity")
    else:
        raise RuntimeError("unknown modality")

def get_dementia_km_info(person):
    time = person.get_min_wave_of_first_outcomes_or_last_wave([OutcomeType.DEMENTIA])+1
    dementia = int(person._dementia)
    modality = person._modality
    severityUnknown = int(person._outcomes[OutcomeType.WMH][0][1].wmhSeverityUnknown)
    wmh = int(person._outcomes[OutcomeType.WMH][0][1].wmh)
    severity = person._outcomes[OutcomeType.WMH][0][1].wmhSeverity
    severity = severity.value if severity is not None else None
    scdGroup = get_dementia_scd_group(modality, severityUnknown, severity, wmh)
    return (time, dementia, scdGroup)    

In [6]:
#obtain data for the stroke survival analysis, see figure 1 in Kent2021
strokeInfo = list(map(lambda x: get_stroke_km_info(x), pop._people))
df = pd.DataFrame(strokeInfo, columns=["time","event", "scdGroup"])
df.to_csv(dataDir+"/kaiserStrokeValidation.csv", index=False)
df.head()

Unnamed: 0,time,event,scdGroup
0,13,0,0
1,13,0,2
2,13,0,0
3,13,0,0
4,13,0,0


In [7]:
#obtain data for the dementia survival analysis, see figure 2 in Kent2023
dementiaInfo = list(map(lambda x: get_dementia_km_info(x), pop._people))
df = pd.DataFrame(dementiaInfo, columns=["time","event", "scdGroup"])
df.to_csv(dataDir+"/kaiserDementiaValidation.csv", index=False)
df.head()

Unnamed: 0,time,event,scdGroup
0,13,0,0
1,13,0,6
2,13,0,0
3,13,0,0
4,13,0,5
