The goal of this notebook is to perform some preliminary simulations with the Kaiser WMH population. These results will be used for an ISC meeting presentation.

In [1]:
import os
microsimDir = "/Users/deligkaris.1/OneDrive - The Ohio State University Wexner Medical Center/MICROSIM/CODE/microsim"
os.chdir(microsimDir)

import pandas as pd

from microsim.person_filter import PersonFilter
from microsim.trials.trial_description import KaiserTrialDescription
from microsim.trials.trial_type import TrialType
from microsim.trials.trial import Trial
from microsim.trials.trial_outcome_assessor_factory import TrialOutcomeAssessorFactory
from microsim.treatment import TreatmentStrategiesType, DefaultTreatmentsType
from microsim.treatment_strategy_repository import TreatmentStrategyRepository
from microsim.bp_treatment_strategies import SprintForSbpRiskThreshold
from microsim.statin_treatment_strategies import StatinTreatmentStrategy

In [2]:
pd.set_option('future.no_silent_downcasting', True) #make the calculation here future-proof

### Part 1: determine the overall risk in the kaiser CCM population, without specifically attending to CCM as a risk factor

  - initialize a kaiser population
  - use the intercept only risk model that does not include CCM covariates
  - run a 4 year trial with "intensive treatment” define as:  statin guidelines (use statin for all with risk > 7.5%) + risk-based SPRINT      guideline (implement SPRINT for all individuals over 7.5% who meet SPRINT bp criteria)
  - compare treatment in usual care vs. intensive treatment by baseline risk quintiles. (for each quintile define the range of risks, and       then look at bp meds added and statins added)
  - compare all outcomes — cognitive, dementia, cardiovascular, quality of life in treated vs. usual care.

In [3]:
pf = PersonFilter() 
#pf.add_filter("person", 
#              "lowCvLimit", 
#              lambda x: (CVModelRepository().select_outcome_model_for_person(x).get_risk_for_person(x, years=10)<0.075))
pf.add_filter("df",
              "noStatin",
              lambda x: x[DefaultTreatmentsType.STATIN.value]==0)

ts = TreatmentStrategyRepository()
ts._repository[TreatmentStrategiesType.BP.value] = SprintForSbpRiskThreshold(wmhSpecific=False)
ts._repository[TreatmentStrategiesType.STATIN.value] = StatinTreatmentStrategy(cvRiskCutoff=0.075)

sampleSize = 300000
duration = 4
td = KaiserTrialDescription(trialType = TrialType.COMPLETELY_RANDOMIZED,
                            blockFactors=list(),
                            sampleSize = sampleSize, 
                            duration = duration, 
                            treatmentStrategies = ts,
                            nWorkers = 5, 
                            personFilters=pf,
                            wmhSpecific=False) 

tr = Trial(td)

tr.print_covariate_distributions()

                          Printing covariate information for people still alive...
                          self=treated, alive people count=  300000               other=control, alive people count=  300000  
                          self=treated, unique alive people count=  143260        other=control, unique alive people count=  143262  
                          self                                                    other
                          -----------------------------------------------------   -----------------------------------------------------
                          min      0.25    med     0.75     max    mean     sd     min     0.25    med     0.75     max    mean     sd
                          -----------------------------------------------------   -----------------------------------------------------
                    age    45.0    57.0    63.0    70.0   112.0    63.9     9.3    45.0    57.0    63.0    70.0   109.0    63.9     9.3
                    sbp  

In [4]:
%%time
tr.run()

toa = TrialOutcomeAssessorFactory.get_trial_outcome_assessor()
tr.analyze(toa)

tr

  return bound(*args, **kwds)


Trial is completed.
CPU times: user 5min 11s, sys: 8min 26s, total: 13min 38s
Wall time: 25min 34s


Trial Description
	Trial type: 1
	Block factors: []
	Sample size: 300000
	Duration: 4
	Treatment strategies: ['bp', 'statin']
	Number of workers: 5
	Person filters: 
	 Person Filters:
	    filter type   filter name    
	             df   noStatin       

	Population type: PopulationType.KAISER
Trial
	Trial completed: True
Trial results:
	Analysis: linear
	                                         Z      Intercept           Z SE         pValue
	                    qalys:            0.02           3.70           0.00           0.00
	                  meanGCP:            0.14          52.74           0.02           0.00
	                  lastGCP:            0.16          51.14           0.02           0.00
	Analysis: logistic
	                                         Z      Intercept           Z SE         pValue
	                    death:           -0.08          -2.42           0.01           0.00
	                 anyEvent:           -0.04           0.80           0.01           0.00


In [5]:
tr.print_treatment_strategy_variables_information()

                          Printing treatment strategy variable information at the end of the trial...
                          self=treated, alive people count=  277276  
                          self=treated & alive, unique people count=  138456  
                          self
                          -----------------------------------------------------
                          proportions
                          -----------
            bpMedsAdded
                      0   0.57
                      1   0.14
                      2   0.14
                      3   0.12
                      4   0.04
           statinsAdded
                      0   0.40
                      1   0.60
                          -----------------------------------------------------
                          proportions in each quintile
                          -----------------------------------------------------
                          bpMedsAdded
       CV risk quintile      0       1      

### Part 3: determine the overall risk in the kaiser CCM population, with attending to CCM as a risk factor

 - initialize a kaiser population
 - use the CCM risk model that does  include CCM covariates
 - run a 4 year trial with "intensive treatment” define as:  statin guidelines (use statin for all with risk > 7.5%) + risk-based SPRINT      guideline (implement SPRINT for all individuals over 7.5% who meet SPRINT bp criteria)
 - compare treatment in usual care vs. intensive treatment by baseline risk quintiles. (for each quintile define the range of risks, and      then look at bp meds added and statins added)
 - compare all outcomes — cognitive, dementia, cardiovascular, quality of life in intensive vs. usual care.

In [6]:
pf = PersonFilter() 
#pf.add_filter("person", 
#              "lowCvLimit", 
#              lambda x: (CVModelRepository().select_outcome_model_for_person(x).get_risk_for_person(x, years=10)<0.075))
pf.add_filter("df",
              "noStatin",
              lambda x: x[DefaultTreatmentsType.STATIN.value]==0)

ts = TreatmentStrategyRepository()
ts._repository[TreatmentStrategiesType.BP.value] = SprintForSbpRiskThreshold(wmhSpecific=True)
ts._repository[TreatmentStrategiesType.STATIN.value] = StatinTreatmentStrategy(cvRiskCutoff=0.075)

sampleSize = 300000
duration = 4
td = KaiserTrialDescription(trialType = TrialType.COMPLETELY_RANDOMIZED,
                            blockFactors=list(),
                            sampleSize = sampleSize, 
                            duration = duration, 
                            treatmentStrategies = ts,
                            nWorkers = 5, 
                            personFilters=pf,
                            wmhSpecific=True) 

tr = Trial(td)

tr.print_covariate_distributions()

                          Printing covariate information for people still alive...
                          self=treated, alive people count=  300000               other=control, alive people count=  300000  
                          self=treated, unique alive people count=  143601        other=control, unique alive people count=  143430  
                          self                                                    other
                          -----------------------------------------------------   -----------------------------------------------------
                          min      0.25    med     0.75     max    mean     sd     min     0.25    med     0.75     max    mean     sd
                          -----------------------------------------------------   -----------------------------------------------------
                    age    45.0    57.0    63.0    70.0   112.0    63.9     9.3    45.0    57.0    63.0    70.0   114.0    63.9     9.3
                    sbp  

In [7]:
%%time
tr.run()

toa = TrialOutcomeAssessorFactory.get_trial_outcome_assessor()
tr.analyze(toa)

tr

  return bound(*args, **kwds)


Trial is completed.
CPU times: user 6min 22s, sys: 23min 50s, total: 30min 13s
Wall time: 1h 19s


Trial Description
	Trial type: 1
	Block factors: []
	Sample size: 300000
	Duration: 4
	Treatment strategies: ['bp', 'statin']
	Number of workers: 5
	Person filters: 
	 Person Filters:
	    filter type   filter name    
	             df   noStatin       

	Population type: PopulationType.KAISER
Trial
	Trial completed: True
Trial results:
	Analysis: linear
	                                         Z      Intercept           Z SE         pValue
	                    qalys:            0.02           3.70           0.00           0.00
	                  meanGCP:            0.12          52.76           0.02           0.00
	                  lastGCP:            0.12          51.16           0.02           0.00
	Analysis: logistic
	                                         Z      Intercept           Z SE         pValue
	                    death:           -0.10          -2.40           0.01           0.00
	                 anyEvent:           -0.04           0.80           0.01           0.00


In [8]:
tr.print_treatment_strategy_variables_information()

                          Printing treatment strategy variable information at the end of the trial...
                          self=treated, alive people count=  277297  
                          self=treated & alive, unique people count=  138812  
                          self
                          -----------------------------------------------------
                          proportions
                          -----------
            bpMedsAdded
                      0   0.63
                      1   0.12
                      2   0.13
                      3   0.11
                      4   0.03
           statinsAdded
                      0   0.40
                      1   0.60
                          -----------------------------------------------------
                          proportions in each quintile
                          -----------------------------------------------------
                          bpMedsAdded
       CV risk quintile      0       1      

### Part 2: compare how treatment varies in the kaiser CCM population, depending on whether we use an intercept only model or a model that considers CCM risk

 - initialize a kaiser population. clone it.
 - compare the assigned risk quintile on both models
 - in one cloned population, use the intercept only risk model and run teh sim for one year with the intensive treatment arm.
 - in the other cloned population, use the CCM risk model and run teh sim for one year with the intensive treatment arm
 - match individual between teh two cloned populations. 
 - compare treatment between the two cloned populations (e.g. how many patients added/subtracted bp meds, statins)
 - compare treatment between teh cloned populations stratified by baseline risk (e.g. rows are risk quintiles from the intercept-only model, columns are risk quintiles form the CCM model), cell values are delta bp meds. repeat the same table but, cells are delta statins.

In [9]:
from microsim.population_factory import PopulationFactory

In [10]:
#pop1 is using wmh information
popSize = 100000
pop1 = PopulationFactory.get_kaiser_population(n=popSize)
pop2 = pop1.copy()
pop2._modelRepository

{'dynamicRiskFactors': <microsim.cohort_risk_model_repository.CohortDynamicRiskFactorModelRepository at 0x63d745040>,
 'defaultTreatments': <microsim.cohort_risk_model_repository.CohortDefaultTreatmentModelRepository at 0x3f44bba90>,
 'outcomes': <microsim.outcome_model_repository.OutcomeModelRepository at 0x404002e50>,
 'staticRiskFactors': <microsim.cohort_risk_model_repository.CohortDynamicRiskFactorModelRepository at 0x63d745040>}

In [11]:
#by creation pop2 is similar to pop1 but we are now using a repository with wmhSpecific=False
pop2ModelRepository = PopulationFactory.get_kaiser_population_model_repo(wmhSpecific=False)
pop2._modelRepository = pop2ModelRepository._repository
pop2._modelRepository

{'dynamicRiskFactors': <microsim.cohort_risk_model_repository.CohortDynamicRiskFactorModelRepository at 0x63db90af0>,
 'defaultTreatments': <microsim.cohort_risk_model_repository.CohortDefaultTreatmentModelRepository at 0x32d50b730>,
 'outcomes': <microsim.outcome_model_repository.OutcomeModelRepository at 0x4589854f0>,
 'staticRiskFactors': <microsim.cohort_risk_model_repository.CohortStaticRiskFactorModelRepository at 0x4489d08b0>}

In [12]:
#find the CV risks at baseline, before advacing at all, using the appropriate model
from microsim.cv_model_repository import CVModelRepository
import numpy as np
cvModelRepository = CVModelRepository(wmhSpecific=True)
popAlive = filter(lambda x: x.is_alive, pop1._people)
cvRiskList1 = list(map(lambda x: cvModelRepository.select_outcome_model_for_person(x).get_risk_for_person(x, years=10), popAlive))
cvRiskBoundaries1 = np.quantile(cvRiskList1, np.linspace(0, 1, 6))
cvRiskQuintiles1 = np.digitize(cvRiskList1, cvRiskBoundaries1, right=False)
cvRiskQuintiles1[cvRiskList1 == cvRiskBoundaries1[-1]] = 5

cvModelRepository = CVModelRepository(wmhSpecific=False)
popAlive = filter(lambda x: x.is_alive, pop2._people)
cvRiskList2 = list(map(lambda x: cvModelRepository.select_outcome_model_for_person(x).get_risk_for_person(x, years=10), popAlive))
cvRiskBoundaries2 = np.quantile(cvRiskList2, np.linspace(0, 1, 6))
cvRiskQuintiles2 = np.digitize(cvRiskList2, cvRiskBoundaries2, right=False)
cvRiskQuintiles2[cvRiskList2 == cvRiskBoundaries2[-1]] = 5

In [13]:
binEdges = np.arange(0.5, 6.5, 1)
proportionsInQuintiles = np.histogram2d(cvRiskQuintiles1,cvRiskQuintiles2, bins=[binEdges, binEdges])[0]/popSize

print(" "*25, "-"*53)
print(" "*25, "proportion of people in CV risk quintile bins")
print(" "*25, "-"*53)
risks = ["1", "2", "3", "4", "5"]
print(" "*25, "CV risk intercept-only")
print(" "*3 + "CV risk WMH-specific      " + "       ".join(risks))
for i,row in enumerate(np.flip(proportionsInQuintiles, axis=0)/proportionsInQuintiles.sum()):
    printString = f"{risks[-i-1]:>23} "
    for item in row:
        printString += f"{item:> 7.2f} " 
    print(printString) 

                          -----------------------------------------------------
                          proportion of people in CV risk quintile bins
                          -----------------------------------------------------
                          CV risk intercept-only
   CV risk WMH-specific      1       2       3       4       5
                      5    0.00    0.01    0.02    0.06    0.11 
                      4    0.00    0.03    0.03    0.06    0.09 
                      3    0.01    0.01    0.09    0.09    0.00 
                      2    0.01    0.13    0.06    0.00    0.00 
                      1    0.17    0.03    0.00    0.00    0.00 


In [14]:
#set up the appropriate treatment strategies for the two populations
ts1 = TreatmentStrategyRepository()
ts1._repository[TreatmentStrategiesType.BP.value] = SprintForSbpRiskThreshold(wmhSpecific=True)
ts1._repository[TreatmentStrategiesType.STATIN.value] = StatinTreatmentStrategy(cvRiskCutoff=0.075)

ts2 = TreatmentStrategyRepository()
ts2._repository[TreatmentStrategiesType.BP.value] = SprintForSbpRiskThreshold(wmhSpecific=False)
ts2._repository[TreatmentStrategiesType.STATIN.value] = StatinTreatmentStrategy(cvRiskCutoff=0.075)

In [15]:
#make the predictions
pop1.advance(years=1, treatmentStrategies=ts1, nWorkers=5)
pop2.advance(years=1, treatmentStrategies=ts2, nWorkers=5)

  return bound(*args, **kwds)
  return bound(*args, **kwds)


In [16]:
#print the proportions for bpMedsAdded differences
ts = TreatmentStrategiesType.BP.value
tsv = "bpMedsAdded"
#popAlive = filter(lambda x: x.is_alive, pop1._people)
popAlive = pop1._people
bpMedsAddedList1 = list(map(lambda x: x._treatmentStrategies[ts][tsv], popAlive))

popAlive = pop2._people
bpMedsAddedList2 = list(map(lambda x: x._treatmentStrategies[ts][tsv], popAlive))

bpMedsAddedDiffList = list(map(lambda x: x[1]-x[0], zip(bpMedsAddedList1, bpMedsAddedList2)))

from collections import Counter
tsvList = bpMedsAddedDiffList
tsvValueCounts = Counter(tsvList)
for key in sorted(tsvValueCounts.keys()):
    print(f"{key:>23} {tsvValueCounts[key]/len(tsvList): 6.4f}")

                     -3  0.0005
                     -2  0.0011
                     -1  0.0019
                      0  0.9490
                      1  0.0222
                      2  0.0164
                      3  0.0088


In [17]:
#import matplotlib.pyplot as plt
#plt.hist(bpMedsAddedDiffList)
#plt.show()

In [18]:
#print the proportions for the statinsAdded differences
ts = TreatmentStrategiesType.STATIN.value
tsv = "statinsAdded"

popAlive = pop1._people
statinsAddedList1 = list(map(lambda x: x._treatmentStrategies[ts][tsv], popAlive))

popAlive = pop2._people
statinsAddedList2 = list(map(lambda x: x._treatmentStrategies[ts][tsv], popAlive))

saDiffList = list(map(lambda x: x[1]-x[0], zip(statinsAddedList1, statinsAddedList2)))

from collections import Counter
tsvList = saDiffList
tsvValueCounts = Counter(tsvList)
for key in sorted(tsvValueCounts.keys()):
    print(f"{key:>23} {tsvValueCounts[key]/len(tsvList): 6.4f}")

                      0  0.9996
                      1  0.0004


In [19]:
bmaDiffArray = np.zeros([5,5])
for i in range(1, 6):
    for j in range(1, 6):
        listForIAndJ = list(map(lambda y: y[2], filter(lambda x: (x[0]==i) & (x[1]==j), 
                                                       zip(cvRiskQuintiles2, cvRiskQuintiles1, bpMedsAddedDiffList))))
        if len(listForIAndJ)>0:
            bmaDiffArray[i-1, j-1] = np.mean(listForIAndJ)

In [20]:
#rows are based on risks from the intercept only models
#columns are based on risks from the wmh specific models
#cells are mean bpMedsAdded for all people in that cell
pd.DataFrame(bmaDiffArray)

Unnamed: 0,0,1,2,3,4
0,0.07316,-0.073657,-0.342351,0.0,-0.298734
1,0.638371,0.403316,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0


In [21]:
saDiffArray = np.zeros([5,5])
for i in range(1, 6):
    for j in range(1, 6):
        listForIAndJ = list(map(lambda y: y[2], filter(lambda x: (x[0]==i) & (x[1]==j), 
                                                       zip(cvRiskQuintiles2, cvRiskQuintiles1, saDiffList))))
        if len(listForIAndJ)>0:
            saDiffArray[i-1, j-1] = np.mean(listForIAndJ)

In [22]:
#rows are based on risks from the intercept only models
#columns are based on risks from the wmh specific models
#cells are mean statinsAdded for all people in that cell
pd.DataFrame(saDiffArray)

Unnamed: 0,0,1,2,3,4
0,0.0,0.02513,0.01306,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0
