### Effect size on BP loweing from older data

In [1]:
# BP lowering effect from Turnbull, F., Blood Pressure Lowering Treatment Trialists' Collaboration. (2003). Effects of different blood-pressure-lowering regimens on major cardiovascular events: results of prospectively-designed overviews of randomised trials. Lancet, 362(9395), 1527–1535. http://doi.org/10.1016/S0140-6736(03)14739-3
# SBP is a weighted average of the 3 groups = 
weightedSBPLowering = (18229 * 5 + 7482 * 8 + 20888 * 4) / (18229 + 7482 + 20888)
weightedDBPLowering = (18229 * 2 + 7482 * 4 + 20888 * 3) / (18229 + 7482 + 20888)
print(f"average BP lowering: {weightedSBPLowering:.2f}/{weightedDBPLowering:.2f}")

### Summary of effect sizes on BP lowering and their clinical impact from slightly newer data

In [2]:
# Trialists' Collaboration, T. B. P. L. T. (2014). Blood pressure-lowering treatment based on cardiovascular risk: a meta-analysis of individual patient data. The Lancet, 384(9943), 591–598. http://doi.org/10.1016/S0140-6736(14)61212-5
weightedSBPLowering = (25236 * 4.4 + 12256 * 6.1 + 8674 * 7.5 + 5751*6.1) / (25236 + 12256 + 8674 + 5751)
weightedDBPLowering = (25236 * 3.0 + 12256 * 3.1 + 8674 * 3.5 + 5751*2.6) / (25236 + 12256 + 8674 + 5751)
print(f"average BP lowering: {weightedSBPLowering:.2f}/{weightedDBPLowering:.2f}")

In [3]:
weightedStrokeRR = (25236 * 0.75 + 12256 * 0.83 + 8674 * 0.84 + 5751*0.84) / (25236 + 12256 + 8674 + 5751)
print(f"weighted stroke RR: {weightedStrokeRR:.2f}")

weightedCHDRR = (24755*0.85 + 12596*0.94 + 8817*0.85 + 5876*0.87)/(24755+12596+8817+5876)
print(f"weighted CHD RR: {weightedCHDRR:.2f}")

In [4]:
import os
import numpy as np
import numpy.random as npRand
import pandas as pd
import copy
from lifelines import CoxPHFitter

os.chdir("/Users/burke/Documents/research/bpCog/microsim")
from microsim.outcome import OutcomeType
from microsim.population import NHANESDirectSamplePopulation
from microsim.smoking_status import SmokingStatus
from microsim.race_ethnicity import NHANESRaceEthnicity
from microsim.outcome_model_type import OutcomeModelType

### Utility methods to filter the population adn add a BP med

In [5]:
# crudely filter for possible allhat candidates
def ageBPFilter(person):
    return person._age[0] > 55 and person._sbp[0] > 140 and person._sbp[0] < 180 and person._dbp[0] > 90 and person._dbp[0] < 110

# add a BP medication to a person using the BP med effect
def addABPMed(person):
    return {'_antiHypertensiveCount' : 1, '_bpMedsAdded' : 1}, {'_sbp': - 1* weightedSBPLowering, '_dbp' : -1 * weightedDBPLowering}, {OutcomeType.STROKE : 0.79, OutcomeType.MI : 0.87}

# add a BP medication to a person using the BP med effect
#def addABPMedFromADistribution(person):
#    return {'_antiHypertensiveCount' : 1}, {'_sbp': - 1* np.random.normal(weightedSBPLowering, 3.5), '_dbp' : -1 * np.random.normal(weightedDBPLowering, 2)}

### Setup a baseline population and one where <b>everybody</b> gets an additional BP med

In [6]:
def loadAndAdvancePopulation(popSize, numberOfClones, years):
    basePop99 =  NHANESDirectSamplePopulation(n=popSize, year=1999, filter=ageBPFilter)
    basePop01 =  NHANESDirectSamplePopulation(n=popSize, year=2001, filter=ageBPFilter)
    basePop03 = NHANESDirectSamplePopulation(n=popSize, year=2003, filter=ageBPFilter)
    basePop99._people = basePop99._people[basePop99._people.notnull()]
    basePop01._people = basePop01._people[basePop01._people.notnull()]
    basePop03._people = basePop03._people[basePop03._people.notnull()]
    
    basePop = basePop99
    basePop._people = basePop._people.append([basePop01._people,basePop03._people])
    
    baselinePop = NHANESDirectSamplePopulation(n=popSize, year=2001, filter=ageBPFilter) # start the sim in 2001
    popExtraBpMed = NHANESDirectSamplePopulation(n=popSize, year=2001, filter=ageBPFilter) # start the sim in 2001
    
    clonedPeople = []
    for i,person in basePop._people.iteritems():
        for copyCount in range (1,numberOfClones):
            clonedPeople.append(person.slightly_randomly_modify_baseline_risk_factors(basePop._risk_model_repository))
    baselinePop._people = pd.Series(clonedPeople)

    clonedPeople2 = []
    for i,person in basePop._people.iteritems():
        for copyCount in range (1,numberOfClones):
            clonedPeople2.append(person.slightly_randomly_modify_baseline_risk_factors(basePop._risk_model_repository))
    popExtraBpMed._people = pd.Series(clonedPeople2)
    
    baselinePop._people = baselinePop._people[baselinePop._people.notnull()]
    baselinePop._people = baselinePop._people.loc[[person.allhat_candidate(0) ==True for person in baselinePop._people]]
    popExtraBpMed._people = popExtraBpMed._people[popExtraBpMed._people.notnull()]
    popExtraBpMed._people = popExtraBpMed._people.loc[[person.allhat_candidate(0) ==True for person in popExtraBpMed._people]]
    
    popExtraBpMed.set_bp_treatment_strategy(addABPMed)
    baselinePop.advance_multi_process(years)
    popExtraBpMed.advance_multi_process(years)
    
    return (baselinePop, popExtraBpMed)

In [7]:
def loadAndAdvanceUnselectedPopulation(popSize, years):
    baselinePop = NHANESDirectSamplePopulation(n=popSize, year=2001) # start the sim in 2001
    #popExtraBpMed = NHANESDirectSamplePopulation(n=popSize, year=2001) # start the sim in 2001
    #popExtraBpMed.set_bp_treatment_strategy(addABPMed)

    baselinePop.advance_multi_process(years)
    #popExtraBpMed.advance_multi_process(years)
    
    #return (baselinePop, popExtraBpMed)
    return (baselinePop, None)

In [8]:
def summarizePopulation(pop, description):
    print(f"Baseline BP in {description} population: {pd.Series([x._sbp[0] for i, x in  pop._people.iteritems()]).mean():.2f}")
    print(f"BP in first wave in {description} population: {pd.Series([x._sbp[1] for i, x in  pop._people.iteritems()]).mean():.2f}")
    print(f"Last BP in {description} population: {pd.Series([x._sbp[-1] for i, x in  pop._people.iteritems()]).mean():.2f}")
    print(f"BP meds at baseline in {description} population: {pd.Series([x._antiHypertensiveCount[0] for i, x in  pop._people.iteritems()]).mean():.2f}")
    print(f"BP meds in first wave {description} population: {pd.Series([x._antiHypertensiveCount[1] for i, x in  pop._people.iteritems()]).mean():.2f}")
    print(f"BP meds in last wave {description} population: {pd.Series([x._antiHypertensiveCount[-1] for i, x in  pop._people.iteritems()]).mean():.2f}")
    print(f"# dead in {description} {pd.Series([x.is_dead() for i, x in  pop._people.iteritems()]).sum()}")


In [9]:
def getDFForPopulation(pop, numYears):
    data = {}
    for year in range(1,numYears+1):
        #popAtStartOfWave = pop.get_people_alive_at_the_start_of_wave(year)
        data['mi' + str(year)] = [x.has_mi_during_wave(year) for _,x in  pop._people.iteritems()]
        data['stroke' + str(year)] = [x.has_stroke_during_wave(year) for _,x in  pop._people.iteritems()]
        data['dead' + str(year)] = [x.is_dead() and len(x._age)==1 for i,x in  pop._people.iteritems()]
    
    data['age'] = [x._age[0] for i,x in  pop._people.iteritems()]
    data['allhat'] = [x.allhat_candidate(0) for i,x in  pop._people.iteritems()]
    
    return pd.DataFrame(data)

def getSimpleDFForPopulation(pop):
    return pd.DataFrame({'mi': [x.has_mi_during_simulation() for i,x in  pop._people.iteritems()],
                         'stroke' : [x.has_stroke_during_simulation() for i,x in  pop._people.iteritems()],
                         'yearsOfObservation' : [x.years_in_simulation() for i, x in pop._people.iteritems()]})


def getReshapedLongDF(untreatedEvents, treatedEvents, untreated_overallEvents, treated_overallEvents):
    untreatedEvents['treatment'] = 0
    treatedEvents['treatment'] = 1
    allEvents = pd.concat([untreatedEvents, treatedEvents], ignore_index=True)
    allEvents['id'] = allEvents.index
    
    reshapedLong = pd.wide_to_long(allEvents,stubnames=['mi', 'stroke', 'dead'], i='id', j='wave')
    reshapedLong = reshapedLong.sort_index()
    reshapedLong['waveAsColumn'] = reshapedLong.index.get_level_values('wave')
    reshapedLong.loc[reshapedLong.dead, 'diedInWaveTemp'] = reshapedLong.waveAsColumn
    reshapedLong['diedInWave'] = reshapedLong.groupby('id')['diedInWaveTemp'].max()
    reshapedLong['diedInWave'] = reshapedLong.groupby(['id'])['diedInWaveTemp'].transform(max)
    reshapedLong.loc[reshapedLong.diedInWave.isna(), 'diedInWave'] = 10000
    reshapedLong = reshapedLong.loc[reshapedLong.waveAsColumn <= reshapedLong.diedInWave]
    reshapedLong.drop(['diedInWaveTemp', 'diedInWave'], axis='columns', inplace=True)
    return reshapedLong
    
def getHazardRatios(untreatedEvents, treatedEvents, untreated_overallEvents, treated_overallEvents):

    reshapedLong = getReshapedLongDF(untreatedEvents, treatedEvents, untreated_overallEvents, treated_overallEvents)
    
    cph = CoxPHFitter()
    cph.fit(reshapedLong[['stroke', 'waveAsColumn', 'treatment']], duration_col='waveAsColumn', event_col='stroke', show_progress=False)
    strokeHR = np.exp(cph.params_[0])

    cph.fit(reshapedLong[['mi', 'waveAsColumn', 'treatment']], duration_col='waveAsColumn', event_col='mi', show_progress=False)
    miHR = np.exp(cph.params_[0])
    
    miRR = (treated_overallEvents['mi'].sum()/treated_overallEvents['yearsOfObservation'].sum())/(untreated_overallEvents['mi'].sum()/untreated_overallEvents['yearsOfObservation'].sum())
    strokeRR = (treated_overallEvents['stroke'].sum()/treated_overallEvents['yearsOfObservation'].sum())/(untreated_overallEvents['stroke'].sum()/untreated_overallEvents['yearsOfObservation'].sum())

    
    return (strokeHR, miHR, strokeRR, miRR)

### Repeat simulation and summarize responses with recalibrated BP data

In [12]:
strokeHRs = []
miHRs = []
strokeRRs = []
miRRs = []
treatedEventsList = []
overallTreatedEventsList = []
untreatedEventsList = []
overallUntreatedEventsList = []
longDFs = []

numIterations = 15
numYears = 5


for i in range(1,numIterations+1):
    print(f"\niteration: {i}")
    baselinePop = NHANESDirectSamplePopulation(n=100000, year=2001)
    baselinePop.advance_multi_process(numYears)
    summarizePopulation(baselinePop, "untreated")
    untreatedEvents = getDFForPopulation(baselinePop, numYears)
    untreatedEventsList.append(untreatedEvents)
    overallUntreatedEvents = getSimpleDFForPopulation(baselinePop)
    overallUntreatedEventsList.append(overallUntreatedEvents)
    print(f"untreated events: {untreatedEventsList[i-1].stroke1.sum()}\n")

    baselinePop.reset_to_baseline()
    baselinePop.set_bp_treatment_strategy(addABPMed)
    baselinePop.advance_multi_process(numYears)
    summarizePopulation(baselinePop, "treated")
    treatedEvents = getDFForPopulation(baselinePop, numYears)
    treatedEventsList.append(treatedEvents)
    overallTreatedEvents = getSimpleDFForPopulation(baselinePop)
    overallTreatedEventsList.append(overallTreatedEvents)

    longDFs.append(getReshapedLongDF(untreatedEvents, treatedEvents, overallUntreatedEvents, overallTreatedEvents))
    strokeHR, miHR, strokeRR, miRR = getHazardRatios(untreatedEvents, treatedEvents, overallUntreatedEvents, overallTreatedEvents)
    strokeHRs.append(strokeHR)
    miHRs.append(miHR)
    strokeRRs.append(strokeRR)
    miRRs.append(miRR)



iteration: 1


Process ForkPoolWorker-32:
Process ForkPoolWorker-31:
Process ForkPoolWorker-30:
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

Process ForkPoolWorker-37:
Process ForkPoolWorker-36:
Process ForkPoolWorker-35:
Process ForkPoolWorker-34:
Traceback (most recent call last):
Traceback (most recent call last):


In [36]:
pd.Series(strokeRRs).describe()

count    15.000000
mean      0.797526
std       0.032867
min       0.732929
25%       0.777909
50%       0.798264
75%       0.825812
max       0.847677
dtype: float64

In [37]:
pd.Series(strokeHRs).describe()

count    15.000000
mean      0.787780
std       0.030984
min       0.721667
25%       0.768213
50%       0.787165
75%       0.809126
max       0.841470
dtype: float64

In [38]:
pd.Series(miHRs).describe()

count    15.000000
mean      0.877046
std       0.023630
min       0.843356
25%       0.860178
50%       0.871512
75%       0.886364
max       0.921590
dtype: float64

In [40]:
pd.Series(miRRs).describe()

count    15.000000
mean      0.883742
std       0.023951
min       0.851471
25%       0.859612
50%       0.882608
75%       0.898686
max       0.924226
dtype: float64

### So, after considerable work, it seems that the recalibration is working very nicely and holding up over time
Target stroke RR 0.79, actual (RR 0.80, HR 0.79). Target MI RR 0.87, actual (RR 0.88, HR 0.88)

#### Check that the RRs and HRs are consistent over time

In [41]:
numIterations = 15
numYears=5

for year in range(1,numYears+1):
    strokeRR=0
    miRR=0
    deathRR=0
    for eventsNum in range(0, numIterations):
        strokeRR += treatedEventsList[eventsNum]['stroke' + str(year)].mean() / untreatedEventsList[eventsNum]['stroke' + str(year)].mean()
        miRR += treatedEventsList[eventsNum]['mi' + str(year)].mean() / untreatedEventsList[eventsNum]['mi' + str(year)].mean()
        deathRR += treatedEventsList[eventsNum]['dead' + str(year)].mean() / untreatedEventsList[eventsNum]['dead' + str(year)].mean()
    strokeRR = strokeRR/numIterations
    miRR = miRR/numIterations
    deathRR = deathRR/numIterations


    print(f"year: {year}, stroke: {strokeRR}, mi: {miRR}, death: {deathRR}")

year: 1, stroke: 0.79221671485854, mi: 0.8735829852523217, death: 0.981262946412519
year: 2, stroke: 0.7983494890566597, mi: 0.9023905286661105, death: 0.981262946412519
year: 3, stroke: 0.797920581779661, mi: 0.8786809972109446, death: 0.981262946412519
year: 4, stroke: 0.7854989332828539, mi: 0.8780390481503678, death: 0.981262946412519
year: 5, stroke: 0.7792048437637534, mi: 0.8635951173406141, death: 0.981262946412519


#### Looks like they hold up nicely over time

### Prior to testing recalibration, we explored whether it was possible to change the baseline simulation to get comarable RRs for the mean degree of BP lowering. After following this logic below, we concluded that it wasn't and went on to work on recalibration

### The problem is that we appear to be under-estimating the relative risks compared to trials

Expected Stroke RR from trials: <b>0.79</b> vs. simulation measured stroke RR: <b>0.95</b>
<p>Expected CHD RR from trials:  <b>0.87</b> vs. simulation measured MI RR: <b>0.97</b>

Theories:
<ul>
    <li><b> Random Error</b> - Certainly possible as a contributor as the trial-based estimates have considerable uncertaintly, but the magnitude of the differnce is too great to entirely put on random error.</li>
    <li><b> Sample Selection</b> — The main difference comparing our synthetic trial popuation to the actual trials is that the BPs are slightly higher in our sample (by about 3/3 points). Given that ASCVD includes a sbp-sbp quadratic term and a sbp-age interaction, its possible that this is driving part of the story.
        <p>In response, I tried 3 things:
        <p><ol>
            <li>Lowering the BPs for all patients in the sample by 3 points and re-running. This had a tiny effect on the relative risks (<i>both increasing to 0.72</i>). So no big effect.</li>
            <li>Turning off the quadratic and interaction effects. Maybe the problem isn't the mean BPs but, the presence of extreme high BPs in our sample compared to the trials (which is hard to assess, becasue the trials only report means).<i>Turning off the quadratic and interaction effects increaes the effect sizes to 0.73></i></li>
            <li>The other thought here woudl be about <b>race.</b> 16% of the simulated sample is Black. I'm not sure what that looks like in teh pooled analysis. But, given that race features heavily in ASCVD, its a conceivable problem. Turning off all race effects led to an effect of <i>0.74</i> for stroke and no change for MI, <i>0.71</i></li><p>
        <li>Turning off all of those effects change the RR to <i>0.73</i> for stroke and no change for MI, <i>0.71</i> - so race seems like its probably the most important seleectkon factor and may be drivin the other factors...but, on teh whole selection factors (that aren't of the form "Peopel in trials have better outcomes") dont' seem to explain much of the gap</li>

        </ol>  
    </li>
    <li><b> Measurement Error</b> — While always a condern with BPs, I have a hard time seeing how it would drive our findings here. First, the measurements from trials are (presumably) decent. Second, there is a fair bit of data supporting the BP lowering effect — those estimates should be fairly precise and both random error and non-differential measurement error shoudl balance out do to sample size. Third, if anything, the trial-measured BP lowering effect seems pretty small — and yet we're still over-estimating the BP lowering effect.</li>
    <li><b> Heterogeneity of BP lowering? </b> — If it were that patients with very high blood pressures had a larger effect to a single BP med, then we might miss the boat by giving everybody a fixed 5/3 BP reduction. However, without having anything to parameterize this off...its hard to do anything more than just to explore it.
    <ul>
    <li> Didn't build a model in whom BP lowering occurs (don't think we know...), but drew BP lowering from a distribution - possibly a very small effect: stroke RR <i>0.72</i>, MI: <i>0.71</i></li></ul>
    </li>
    <li><b>Confounding in ASCVD</b> — This seems most plausible to me. It seems pretty believale that patients with very high BPs also have other factors that drive their long-term risk that aren't well measured. So, the idea that ASCVD over-estimates the effect of BP lowring at high BPs is plausible. Unless we could account for that confouder (any papers on ASCVD and SES?), I thikn this means that we have to put in a recalibration factor to try and get our estimates to line up.</li>
    <li><b>Inaccurate trial estimation of treatment effect</b> — Well, if that's the case, we're screwed. We need the trials for treatment effect. I guess its possible — the trial population may be healthier than the ASCVD population, for example, and perhaps there is some sort of ceiling effect. But, that seems unlikely given that the predicted risk in the trials lines up almost perfectly with ASCVD</li>

<li><b>Period effects</b> — Doesn’t make a ton of sense though…those are probably leading to lower RRs over time and the trials, based on older data (right?) have less BP effect than the cohorts.</li>

</ul>

### code below is for summarizing and stratifying effects across age and BP deciles...it was an early check, but not the current goal of the notebook

### look at relative risk across age deciles

In [None]:
ageBpPop = pd.DataFrame({'mi': [x.has_mi_during_simulation() for i,x in  baselinePop._people.iteritems()],
                         'stroke' : [x.has_stroke_during_simulation() for i,x in  baselinePop._people.iteritems()],
                         'age' : [x._age[0] for i,x in  baselinePop._people.iteritems()],
                         'sbp' : [x._sbp[0] for i,x in  baselinePop._people.iteritems()],
                         'dbp' : [x._dbp[0] for i,x in  baselinePop._people.iteritems()],
                         'priorStrokeMI' : [x.has_stroke_prior_to_simulation() or x.has_mi_prior_to_simulation() for i,x in  baselinePop._people.iteritems()],
                         'currentSmoker' : [x._smokingStatus==SmokingStatus.CURRENT for i,x in  baselinePop._people.iteritems()],
                         'hdl' : [x._hdl[0] for i,x in  baselinePop._people.iteritems()],
                         'a1c' : [x._a1c[0] for i,x in  baselinePop._people.iteritems()],
                         'dead' : [x.is_dead() for i,x in  baselinePop._people.iteritems()],
                         'allhat' : [x.allhat_candidate(0) for i,x in  baselinePop._people.iteritems()],
                         'yearsOfObservation' : [x.years_in_simulation() for i, x in baselinePop._people.iteritems()]})

ageBpPop['ageDeciles'] = pd.qcut(ageBpPop.age, 5, labels=range(1,6))
ageBpPop['sbpDeciles'] = pd.qcut(ageBpPop.sbp, 5, labels=range(1,6))


ageBpTreatedPop = pd.DataFrame({'mi': [x.has_mi_during_simulation() for i,x in  popExtraBpMed._people.iteritems()],
                         'stroke' : [x.has_stroke_during_simulation() for i,x in  popExtraBpMed._people.iteritems()],
                         'age' : [x._age[0] for i,x in  popExtraBpMed._people.iteritems()],
                         'sbp' : [x._sbp[0] for i,x in  popExtraBpMed._people.iteritems()],
                         'dbp' : [x._dbp[0] for i,x in  popExtraBpMed._people.iteritems()],
                         'priorStrokeMI' : [x.has_stroke_prior_to_simulation() or x.has_mi_prior_to_simulation() for i,x in  popExtraBpMed._people.iteritems()],
                         'currentSmoker' : [x._smokingStatus==SmokingStatus.CURRENT for i,x in  popExtraBpMed._people.iteritems()],
                         'hdl' : [x._hdl[0] for i,x in  popExtraBpMed._people.iteritems()],
                         'a1c' : [x._a1c[0] for i,x in  popExtraBpMed._people.iteritems()],
                         'dead' : [x.is_dead() for i,x in  popExtraBpMed._people.iteritems()],
                         'allhat' : [x.allhat_candidate(0) for i,x in  popExtraBpMed._people.iteritems()],
                         'yearsOfObservation' : [x.years_in_simulation() for i, x in popExtraBpMed._people.iteritems()]})


ageBpTreatedPop['ageDeciles'] = pd.qcut(ageBpTreatedPop.age, 5, labels=range(1,6))
ageBpTreatedPop['sbpDeciles'] = pd.qcut(ageBpTreatedPop.sbp, 5, labels=range(1,6))

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

plt.bar(np.arange(0.85, 5.85, 1), ageBpPop.groupby('ageDeciles')['mi'].mean()*100, width=0.25, label = "Baseline")
plt.bar(np.arange(1.15, 6.15, 1), ageBpTreatedPop.groupby('ageDeciles')['mi'].mean()*100, width=0.25, label="Add BP Med")
plt.title("Mi Risk, by age decile")
plt.legend()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

plt.bar(np.arange(0.85, 5.85, 1), ageBpPop.groupby('sbpDeciles')['mi'].mean()*100, width=0.25, label = "Baseline")
plt.bar(np.arange(1.15, 6.15, 1), ageBpTreatedPop.groupby('sbpDeciles')['mi'].mean()*100, width=0.25, label="Add BP Med")
plt.title("MI risk by SBP decile")
plt.legend()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

plt.bar(np.arange(0.85, 5.85, 1), ageBpPop.groupby('ageDeciles')['stroke'].mean()*100, width=0.25, label = "Baseline")
plt.bar(np.arange(1.15, 6.15, 1), ageBpTreatedPop.groupby('ageDeciles')['stroke'].mean()*100, width=0.25, label="Add BP Med")
plt.legend()
plt.title("Stroke Risk, by age decile")

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

plt.bar(np.arange(0.85, 5.85, 1), ageBpPop.groupby('sbpDeciles')['stroke'].mean()*100, width=0.25, label = "Baseline")
plt.bar(np.arange(1.15, 6.15, 1), ageBpTreatedPop.groupby('sbpDeciles')['stroke'].mean()*100, width=0.25, label="Add BP Med")
plt.legend()
plt.title("Stroke Risk, by SBP decile")

In [None]:
print ("Relative MI risk reduction by age decile")
print((ageBpPop.groupby('ageDeciles')['mi'].mean() -ageBpTreatedPop.groupby('ageDeciles')['mi'].mean())/ageBpPop.groupby('ageDeciles')['mi'].mean())

print ("Relative stroke risk reduction by age decile")
print((ageBpPop.groupby('ageDeciles')['stroke'].mean() -ageBpTreatedPop.groupby('ageDeciles')['stroke'].mean())/ageBpPop.groupby('ageDeciles')['stroke'].mean())

In [None]:
ageBpPop.mean()

In [None]:
ageBpTreatedPop.mean()

In [None]:
plt.bar(1, ageBpPop['mi'].mean()*100, width=0.25, label = "Baseline")
plt.bar(2, ageBpTreatedPop['mi'].mean()*100, width=0.25, label="Add BP Med")
plt.legend()
plt.title("MI Risk in allhat-type population")

In [None]:
plt.bar(1, ageBpPop['stroke'].mean()*100, width=0.25, label = "Baseline")
plt.bar(2, ageBpTreatedPop['stroke'].mean()*100, width=0.25, label="Add BP Med")
plt.legend()
plt.title("Stroke Risk in allhat-type population")

In [None]:
print (f"Relative MI risk for one BP med in ALLHAT: {(ageBpTreatedPop['mi'].sum()/ageBpTreatedPop['yearsOfObservation'].sum())/(ageBpPop['mi'].sum()/ageBpTreatedPop['yearsOfObservation'].sum()):.2f}")
print (f"Relative stroke risk for one BP med in ALLHAT: {(ageBpTreatedPop['stroke'].sum()/ageBpTreatedPop['yearsOfObservation'].sum())/(ageBpPop['stroke'].sum()/ageBpTreatedPop['yearsOfObservation'].sum()):.2f}")

### Across 4 runs of the simulation, we got:
<ol>
    <li>Relative <b>MI</b> risk for one BP med in ALLHAT: 0.72
        <p>Relative <b>stroke</b> risk for one BP med in ALLHAT: 0.64
    </li>

<p>
<li>Relative <b>MI</b> risk for one BP med in ALLHAT: 0.73
    <p>Relative <b>stroke</b> risk for one BP med in ALLHAT: 0.74
</li>

<p><li>Relative <b>MI</b> risk for one BP med in ALLHAT: 0.66
    <p>Relative <b>stroke</b> risk for one BP med in ALLHAT: 0.67
</li>

<P><li>
    Relative <b>MI</b> risk for one BP med in ALLHAT: 0.75
    <p>Relative <b>stroke</b> risk for one BP med in ALLHAT: 0.70
</li>
</ol>

1.  for stroke, the relative risk should be around 0.62-0.77 — so we're over estimating risk by a little bit...
2. for MI, the relative risk shoudl be around 0.78-0.85...so, we're over estimating risk by quite a bit....

### conclusions
1. We're probably estimating the overall risk reasonably accurately, although perhpaps a little aggressively (we're around 0.72 for MI and 0.67 for sroke)
2. Our models aren't pickig up the role of blood pressure lowering for stroke >> MI

### questions
1. What is the right setup for a comparison? Duration? 
2. What is the right treamtent comparison, "add one med in year one...and then some people non-adhere over 5 years is the current model"
3. Could our divergence just be that our population is a bit differnet than ALLHAT? (In spite of the same inclusion criteria, we got a somewhat differnet sample...)
4. Coudl the divergence be for failing to account for mortality? Should I estimate HR censoring on death? Actually...its quite clear that I shoudl...let me do that...
5. Is the place to focus on the event partitioning model? Its basically just an age model (more strokes amongst the old, more MIs amongst the young...). Should we include BP treatment into the model (i.e. more treamtent = fewer strokes?)
6. How importance is getting this issue "right" for the BP cog results?

In [None]:
untreatedEvents = pd.DataFrame({'mi1': [x.has_mi_during_wave(0) for i,x in  baselinePop._people.iteritems()],
                         'mi2': [x.has_mi_during_wave(1) for i,x in  baselinePop._people.iteritems()],
                         'mi3': [x.has_mi_during_wave(2) for i,x in  baselinePop._people.iteritems()],
                         'mi4': [x.has_mi_during_wave(3) for i,x in  baselinePop._people.iteritems()],
                         'mi5': [x.has_mi_during_wave(4) for i,x in  baselinePop._people.iteritems()],
                         'stroke1': [x.has_stroke_during_wave(0) for i,x in  baselinePop._people.iteritems()],
                         'stroke2': [x.has_stroke_during_wave(1) for i,x in  baselinePop._people.iteritems()],
                         'stroke3': [x.has_stroke_during_wave(2) for i,x in  baselinePop._people.iteritems()],
                         'stroke4': [x.has_stroke_during_wave(3) for i,x in  baselinePop._people.iteritems()],
                         'stroke5': [x.has_stroke_during_wave(4) for i,x in  baselinePop._people.iteritems()],                         'age' : [x._age[0] for i,x in  baselinePop._people.iteritems()],
                         'dead1': [x.is_dead() and len(x._age)==1 for i,x in  baselinePop._people.iteritems()],
                         'dead2': [x.is_dead() and len(x._age)==2 for i,x in  baselinePop._people.iteritems()],
                         'dead3': [x.is_dead() and len(x._age)==3 for i,x in  baselinePop._people.iteritems()],
                         'dead4': [x.is_dead() and len(x._age)==4 for i,x in  baselinePop._people.iteritems()],
                         'dead5': [x.is_dead() and len(x._age)==5 for i,x in  baselinePop._people.iteritems()],                         
                         'age' : [x._age[0] for i,x in  baselinePop._people.iteritems()],
                         'allhat' : [x.allhat_candidate(0) for i,x in  baselinePop._people.iteritems()]})

treatedEvents = pd.DataFrame({'mi1': [x.has_mi_during_wave(0) for i,x in  popExtraBpMed._people.iteritems()],
                         'mi2': [x.has_mi_during_wave(1) for i,x in  popExtraBpMed._people.iteritems()],
                         'mi3': [x.has_mi_during_wave(2) for i,x in  popExtraBpMed._people.iteritems()],
                         'mi4': [x.has_mi_during_wave(3) for i,x in  popExtraBpMed._people.iteritems()],
                         'mi5': [x.has_mi_during_wave(4) for i,x in  popExtraBpMed._people.iteritems()],
                         'stroke1': [x.has_stroke_during_wave(0) for i,x in  popExtraBpMed._people.iteritems()],
                         'stroke2': [x.has_stroke_during_wave(1) for i,x in  popExtraBpMed._people.iteritems()],
                         'stroke3': [x.has_stroke_during_wave(2) for i,x in  popExtraBpMed._people.iteritems()],
                         'stroke4': [x.has_stroke_during_wave(3) for i,x in  popExtraBpMed._people.iteritems()],
                         'stroke5': [x.has_stroke_during_wave(4) for i,x in  popExtraBpMed._people.iteritems()],                         
                         'age' : [x._age[0] for i,x in  popExtraBpMed._people.iteritems()],
                         'dead1': [x.is_dead() and len(x._age)==1 for i,x in  popExtraBpMed._people.iteritems()],
                         'dead2': [x.is_dead() and len(x._age)==2 for i,x in  popExtraBpMed._people.iteritems()],
                         'dead3': [x.is_dead() and len(x._age)==3 for i,x in  popExtraBpMed._people.iteritems()],
                         'dead4': [x.is_dead() and len(x._age)==4 for i,x in  popExtraBpMed._people.iteritems()],
                         'dead5': [x.is_dead() and len(x._age)==5 for i,x in  popExtraBpMed._people.iteritems()],                         
                         'allhat' : [x.allhat_candidate(0) for i,x in  popExtraBpMed._people.iteritems()]})

In [None]:
untreatedEvents['treatment'] = 0
treatedEvents['treatment'] = 1
allEvents = pd.concat([untreatedEvents, treatedEvents], ignore_index=True)
allEvents['id'] = allEvents.index

In [None]:
reshapedLong = pd.wide_to_long(allEvents,stubnames=['mi', 'stroke', 'dead'], i='id', j='wave')
reshapedLong = reshapedLong.sort_index()
reshapedLong['waveAsColumn'] = reshapedLong.index.get_level_values('wave')
reshapedLong.loc[reshapedLong.dead, 'diedInWaveTemp'] = reshapedLong.waveAsColumn
reshapedLong['diedInWave'] = reshapedLong.groupby('id')['diedInWaveTemp'].max()
reshapedLong['diedInWave'] = reshapedLong.groupby(['id'])['diedInWaveTemp'].transform(max)
reshapedLong = reshapedLong.loc[reshapedLong.waveAsColumn <= reshapedLong.diedInWave]
reshapedLong.drop(['diedInWaveTemp', 'diedInWave'], axis='columns', inplace=True)

In [None]:
from lifelines import CoxPHFitter

cph = CoxPHFitter()
cph.fit(reshapedLong[['stroke', 'waveAsColumn', 'treatment']], duration_col='waveAsColumn', event_col='stroke', show_progress=False)
print(f"\nHR of treatment on stroke: {np.exp(cph.params_[0]):.2f}")

cph.fit(reshapedLong[['mi', 'waveAsColumn', 'treatment']], duration_col='waveAsColumn', event_col='mi', show_progress=False)
print(f"\nHR of treatment on MI: {np.exp(cph.params_[0]):.2f}")


#cph.print_summary()  # access the results using cph.summary

### possible theories: 
<ol><li> applying a fixed point blood pressure lowering effect is unrealistic...we need to sample from distributions, although i doubt that changes things much...</li>
<li> is there a duaraiton effect here? the estimates are based off of multiple eyars...looking at one year might miss the boat because the highest risk peopel are jumping first...</li>
<li> population parameters don't line up with ALLHAT (our population is older, BPs are quite a bit higher here — so maybe we haven't gotten the population right. although, i'm not sure about the direction of the relative effcts, this may be workign in our favor</li>
<li> observational BP lowering estimates are fundamentally diconnected from trial based estimates (i.e. ASCVD is not calibrated with the trials - could directly test this with the model...</li>
<li> what does the specification of a causal effect mean here? does a BP med cause a 4 point constant reduction in your BP? does it lower your BP one and then you return to baseline?</li>
</ol>

In [None]:
from mcm.outcome import OutcomeType

eventsByAgeUntreated = pd.DataFrame({'stroke0' : [x.has_stroke_during_wave(0) for i,x in  baselinePop._people.iteritems()],
                         'stroke1' : [x.has_stroke_during_wave(1) for i,x in  baselinePop._people.iteritems()],
                         'stroke2' : [x.has_stroke_during_wave(2) for i,x in  baselinePop._people.iteritems()],
                         'stroke3' : [x.has_stroke_during_wave(3) for i,x in  baselinePop._people.iteritems()],
                         'stroke4' : [x.has_stroke_during_wave(4) for i,x in  baselinePop._people.iteritems()],
                         'mi0' : [x.has_mi_during_wave(0) for i,x in  baselinePop._people.iteritems()],
                         'mi1' : [x.has_mi_during_wave(1) for i,x in  baselinePop._people.iteritems()],
                         'mi2' : [x.has_mi_during_wave(2) for i,x in  baselinePop._people.iteritems()],
                         'mi3' : [x.has_mi_during_wave(3) for i,x in  baselinePop._people.iteritems()],
                         'mi4' : [x.has_mi_during_wave(4) for i,x in  baselinePop._people.iteritems()]})

eventsByAgeTreated = pd.DataFrame({'stroke0' : [x.has_stroke_during_wave(0) for i,x in  popExtraBpMed._people.iteritems()],
                         'stroke1' : [x.has_stroke_during_wave(1) for i,x in  popExtraBpMed._people.iteritems()],
                         'stroke2' : [x.has_stroke_during_wave(2) for i,x in  popExtraBpMed._people.iteritems()],
                         'stroke3' : [x.has_stroke_during_wave(3) for i,x in  popExtraBpMed._people.iteritems()],
                         'stroke4' : [x.has_stroke_during_wave(4) for i,x in  popExtraBpMed._people.iteritems()],
                         'mi0' : [x.has_mi_during_wave(0) for i,x in  popExtraBpMed._people.iteritems()],
                         'mi1' : [x.has_mi_during_wave(1) for i,x in  popExtraBpMed._people.iteritems()],
                         'mi2' : [x.has_mi_during_wave(2) for i,x in  popExtraBpMed._people.iteritems()],
                         'mi3' : [x.has_mi_during_wave(3) for i,x in  popExtraBpMed._people.iteritems()],
                         'mi4' : [x.has_mi_during_wave(4) for i,x in  popExtraBpMed._people.iteritems()]})


In [None]:
strokeUntreated = [eventsByAgeUntreated.stroke0.mean(),
                        eventsByAgeUntreated.stroke1.mean(),
                        eventsByAgeUntreated.stroke2.mean(),
                        eventsByAgeUntreated.stroke3.mean(),
                        eventsByAgeUntreated.stroke4.mean()]
strokeUntreatedError = np.array([eventsByAgeUntreated.stroke0.sem(),
                        eventsByAgeUntreated.stroke1.sem(),
                        eventsByAgeUntreated.stroke2.sem(),
                        eventsByAgeUntreated.stroke3.sem(),
                        eventsByAgeUntreated.stroke4.sem()])*1.96
strokeTreated = [eventsByAgeTreated.stroke0.mean(),
                        eventsByAgeTreated.stroke1.mean(),
                        eventsByAgeTreated.stroke2.mean(),
                        eventsByAgeTreated.stroke3.mean(),
                        eventsByAgeTreated.stroke4.mean()]
strokeTreatedError = np.array([eventsByAgeTreated.stroke0.sem(),
                        eventsByAgeTreated.stroke1.sem(),
                        eventsByAgeTreated.stroke2.sem(),
                        eventsByAgeTreated.stroke3.sem(),
                        eventsByAgeTreated.stroke4.sem()])*1.96

plt.bar(np.arange(0,5),strokeUntreated ,yerr=strokeUntreatedError, width=0.4, capsize=3,label="Untreated")
plt.bar(np.arange(0.4,5.4, 1), strokeTreated, yerr=strokeTreatedError, width=0.4, capsize=3, label="Treated")
plt.legend(loc=2)
plt.title("Stroke Rates by temporal wave with treatment")

In [None]:
pd.Series(strokeTreated)/pd.Series(strokeUntreated)

In [None]:
miUntreated = [eventsByAgeUntreated.mi0.mean(),
                        eventsByAgeUntreated.mi1.mean(),
                        eventsByAgeUntreated.mi2.mean(),
                        eventsByAgeUntreated.mi3.mean(),
                        eventsByAgeUntreated.mi4.mean()]

miUntreatedError = np.array([eventsByAgeUntreated.mi0.sem(),
                        eventsByAgeUntreated.mi1.sem(),
                        eventsByAgeUntreated.mi2.sem(),
                        eventsByAgeUntreated.mi3.sem(),
                        eventsByAgeUntreated.mi4.sem()])*1.96
miTreated = [eventsByAgeTreated.mi0.mean(),
                        eventsByAgeTreated.mi1.mean(),
                        eventsByAgeTreated.mi2.mean(),
                        eventsByAgeTreated.mi3.mean(),
                        eventsByAgeTreated.mi4.mean()]
miTreatedError = np.array([eventsByAgeTreated.mi0.sem(),
                        eventsByAgeTreated.mi1.sem(),
                        eventsByAgeTreated.mi2.sem(),
                        eventsByAgeTreated.mi3.sem(),
                        eventsByAgeTreated.mi4.sem()])*1.96

plt.bar(np.arange(0,5), miUntreated, yerr=miUntreatedError, width=0.4, capsize=3, label="Untreated")
plt.bar(np.arange(0.4,5.4, 1), miTreated, yerr=miTreatedError, width=0.4, capsize=3, label="Treated")
plt.legend(loc=2)
plt.title("MI Rates by temporal wave with treatment")

In [None]:
pd.Series(miTreated)/pd.Series(miUntreated)