In [1]:
import pandas as pd
import numpy as np

Here is the idea — we'll setup baseline risks (higher in secondary than primary prevention) and then apply relative risk reductions acrtoss groups to estimate the total ARRs. 

We'll assume that treamtent compliance is perfect — will estimate the "untreated" effect simply as a counter-factual comparator.

Assume that everybody starts event free and everybody is treated. We'll look at 3 time epochs t1 (year 0-> year1), t2 (year 1->2) and t3 (year 2->3)

### Setup a baseline risk distribution

In [7]:
n = 1000000
baselineRisk = 0.10
secondaryRiskMultiplier = 2.0
rrrPrimaryPrevention = 0.20
rrrSecondaryPrevention = 0.20
# for this hypothetical, the increase is risk is purely as a risk marker, there is no casual increase in risk.

timeSeries = pd.DataFrame(data={'baselineRisk' : np.random.normal(loc=baselineRisk, scale=(0.04), size=n)})
timeSeries[timeSeries['baselineRisk'] < 0 ] = 0
timeSeries['secondaryRisk'] = timeSeries['baselineRisk'] * secondaryRiskMultiplier

### Assign events for the first epoch

In [8]:
# there is probably a way to do this with a logical operator that only returns true for 1 and 1, but this is clearer
def applyTreatment(x):
    if x ==1:
        return np.random.uniform(low=0, high=1, size=1) > rrrPrimaryPrevention
    elif x==0:
        return 0

# risk of stroke without treatment between time 0 to time 1 — this represents an unobserved counter-factual
# in this story, everygody is going to be treated.

timeSeries['noTreatmentT1'] = (timeSeries['baselineRisk'] > np.random.uniform(low=0, high=1, size=n)).astype(int)
timeSeries['treatmentT1'] = (timeSeries['noTreatmentT1'].apply(applyTreatment)).astype(int)

timeSeries['treatmentEffectT1'] = timeSeries['noTreatmentT1'] - timeSeries['treatmentT1']
timeSeries['anySecondaryPrevention'] = timeSeries['treatmentT1']

### Assign events for the second epoch

In [9]:
def assignEventAcrossPrimaryAndSecondaryPrevention(x):
    riskForComparison = x.secondaryRisk if x.anySecondaryPrevention else x.baselineRisk
    return np.random.uniform(low=0, high=1, size=1) < riskForComparison

timeSeries['noTreatmentT2'] = (timeSeries.apply(assignEventAcrossPrimaryAndSecondaryPrevention, axis=1)).astype(int)
timeSeries['treatmentT2'] = (timeSeries['noTreatmentT2'].apply(applyTreatment)).astype(int)

timeSeries['treatmentEffectT2'] = timeSeries['noTreatmentT2'] - timeSeries['treatmentT2']
timeSeries['anySecondaryPrevention'] = pd.DataFrame([timeSeries['anySecondaryPrevention'],timeSeries['treatmentT2']]).max()

### 3rd Epoch

In [10]:
timeSeries['noTreatmentT3'] = (timeSeries.apply(assignEventAcrossPrimaryAndSecondaryPrevention, axis=1)).astype(int)
timeSeries['treatmentT3'] = (timeSeries['noTreatmentT3'].apply(applyTreatment)).astype(int)

timeSeries['treatmentEffectT3'] = timeSeries['noTreatmentT3'] - timeSeries['treatmentT3']
timeSeries['anySecondaryPrevention'] = pd.DataFrame([timeSeries['anySecondaryPrevention'],timeSeries['treatmentT3']]).max()

conditions = [
    (timeSeries['treatmentT1'] == 1),
    (timeSeries['treatmentT2'] == 1),
    (timeSeries['treatmentT3'] == 1)]
choices = [1, 2, 3]
timeSeries['strokeWave'] = np.select(conditions, choices, default='0')

timeSeries['totalTreatmentEffect'] = (timeSeries.treatmentEffectT1 + timeSeries.treatmentEffectT2 + timeSeries.treatmentEffectT3) / 3

In [13]:
timeSeries.treatmentEffectT1.value_counts(normalize=True)

0    0.979914
1    0.020086
Name: treatmentEffectT1, dtype: float64

In [11]:
timeSeries.groupby('strokeWave')[['treatmentEffectT1', 'treatmentEffectT2', 'treatmentEffectT3','totalTreatmentEffect']].mean()

Unnamed: 0_level_0,treatmentEffectT1,treatmentEffectT2,treatmentEffectT3,totalTreatmentEffect
strokeWave,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.021157,0.020847,0.021173,0.021059
1,0.0,0.045613,0.0461,0.030571
2,0.026121,0.0,0.045263,0.023794
3,0.02517,0.02411,0.0,0.016427


In [14]:
timeSeries['treatmentEffectPrimaryOnly'] = timeSeries.baselineRisk * rrrPrimaryPrevention

In [15]:
timeSeries.groupby('strokeWave')[['treatmentEffectT1', 'treatmentEffectT2', 'treatmentEffectT3','totalTreatmentEffect','treatmentEffectPrimaryOnly']].mean()

Unnamed: 0_level_0,treatmentEffectT1,treatmentEffectT2,treatmentEffectT3,totalTreatmentEffect,treatmentEffectPrimaryOnly
strokeWave,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0.021157,0.020847,0.021173,0.021059,0.019199
1,0.0,0.045613,0.0461,0.030571,0.023177
2,0.026121,0.0,0.045263,0.023794,0.022957
3,0.02517,0.02411,0.0,0.016427,0.022695
