In [1]:
import numpy as np
import pandas as pd
import os
import random

from microsim.treatment_strategy_repository import TreatmentStrategyRepository
from microsim.treatment import TreatmentStrategiesType, TreatmentStrategyStatus
from microsim.bp_treatment_strategies import AddNBPMedsTreatmentStrategy
from microsim.population_factory import PopulationFactory, PopulationType
from microsim.population import Population

microsimDir = "/Users/deligkaris.1/OneDrive - The Ohio State University Wexner Medical Center/MICROSIM/CODE/microsim"
os.chdir(microsimDir)

In [2]:
from microsim.risk_factor import StaticRiskFactorsType, DynamicRiskFactorsType
bFactor = StaticRiskFactorsType.GENDER.value

In [3]:
from enum import Enum, IntEnum

class CategoricalRiskFactorsType(Enum):
    RACE_ETHNICITY = "raceEthnicity" 
    EDUCATION = "education"          
    GENDER = "gender"                
    SMOKING_STATUS = "smokingStatus" 
    PVD = "pvd"
    ALCOHOL_PER_WEEK = "alcoholPerWeek"
    AFIB = "afib"
    ANY_PHYSICAL_ACTIVITY = "anyPhysicalActivity" 
    
class ContinuousRiskFactorsType(Enum):
    AGE = "age"   # int
    SBP = "sbp"   # int
    DBP = "dbp"   # int
    A1C = "a1c"   # float
    HDL = "hdl"   # int
    LDL = "ldl"   # int
    TRIG = "trig"  # int
    TOT_CHOL = "totChol"   # int
    BMI = "bmi"   #float
    WAIST = "waist"  # int, waist circumference in cm
    CREATININE = "creatinine" # float
    
class TrialType(IntEnum):
    COMPLETELY_RANDOMIZED = 1
    COMPLETELY_RANDOMIZED_IN_BLOCKS = 2
    BERNOULLI_RANDOMIZED = 3
    BERNOULLI_RANDOMIZED_IN_BLOCKS = 4
    NON_RANDOMIZED = 5
    POTENTIAL_OUTCOMES = 6
    

In [4]:
class TrialDescription:
    def __init__(self, 
                 trialType=TrialType.COMPLETELY_RANDOMIZED, 
                 blockFactors=list(),
                 sampleSize=100, 
                 duration=5, 
                 treatmentStrategies=TreatmentStrategyRepository(), 
                 nWorkers=1, 
                 inclusionFilters=None):
        self.trialType = trialType
        self.blockFactors = blockFactors           
        self.sampleSize = sampleSize
        self.duration = duration
        self.treatmentStrategies = treatmentStrategies
        self.nWorkers = nWorkers
        self.inclusionFilters = inclusionFilters
        self._rng = np.random.default_rng() 
        self.popType = None
        self.is_valid_trial()
        
    def is_valid_trial(self):
        self.assess_trial_type_and_block_factors()
        self.assess_sample_size()
        self.assess_duration()
        self.assess_number_of_workers()   
        
    def is_block_randomized(self):
        return ((self.trialType==TrialType.COMPLETELY_RANDOMIZED_IN_BLOCKS)|
                (self.trialType==TrialType.BERNOULLI_RANDOMIZED_IN_BLOCKS))
    
    def is_completely_randomized(self):
         return ((self.trialType==TrialType.COMPLETELY_RANDOMIZED_IN_BLOCKS)|
                (self.trialType==TrialType.COMPLETELY_RANDOMIZED))
        
    def is_bernoulli_randomized(self):
         return ((self.trialType==TrialType.BERNOULLI_RANDOMIZED_IN_BLOCKS)|
                (self.trialType==TrialType.BERNOULLI_RANDOMIZED))

    def assess_trial_type_and_block_factors(self):
        if (self.is_block_randomized()) & (len(self.blockFactors)==0):
            raise RuntimeError("Trial is setup to use blocks but no block factors were provided.")
        elif (not self.is_block_randomized()) & (len(self.blockFactors)>0):
            raise RuntimeError("Trial is not setup to use blocks but block factors were provided.")

    def assess_sample_size(self):
        if (self.sampleSize<=0):
            raise RuntimeError("Sample size cannot be less than or equal to 0.")
        elif  (self.sampleSize>10000000):
            raise RuntimeError("Sample size exceeds the maximum bound.")

    def assess_duration(self):
        if (self.duration<=0):
            raise RuntimeError("Duration cannot be less than or equal to 0.")
        elif  (self.duration>200):
            raise RuntimeError("Duration exceeds the maximum bound.")
            
    def assess_number_of_workers(self):
        if (self.nWorkers<=0):
            raise RuntimeError("Number of workers cannot be less than or equal to 0.")
        elif  (self.nWorkers>100):
            raise RuntimeError("Number of workers exceeds the maximum bound.")
            
class NhanesTrialDescription(TrialDescription):
    def __init__(self, 
                 trialType=TrialType.COMPLETELY_RANDOMIZED, 
                 blockFactors=list(),
                 sampleSize=100, 
                 duration=5, 
                 treatmentStrategies=TreatmentStrategyRepository(), 
                 nWorkers=1, 
                 inclusionFilters=None,
                 year=1999, 
                 nhanesWeights=False, 
                 distributions=False):
        super().__init__(trialType, blockFactors, sampleSize, duration, treatmentStrategies, nWorkers=nWorkers, inclusionFilters=inclusionFilters)
        self.year = year
        self.nhanesWeights=nhanesWeights
        self.distributions=distributions
        self.popArgs = {"n":self.sampleSize,
                        "year":self.year,
                        "dfFilter":self.inclusionFilters,
                        "nhanesWeights":self.nhanesWeights,
                        "distributions":self.distributions}
        self.popType = PopulationType.NHANES

In [16]:
ts = TreatmentStrategyRepository()
ts._repository[TreatmentStrategiesType.BP.value] = AddNBPMedsTreatmentStrategy(1)
td = NhanesTrialDescription(trialType = TrialType.COMPLETELY_RANDOMIZED_IN_BLOCKS, 
                            #trialType = TrialType.NON_RANDOMIZED,
                            blockFactors = [StaticRiskFactorsType.GENDER.value], 
                            #blockFactors = [DynamicRiskFactorsType.AGE.value],
                            #blockFactors=list(),
                            sampleSize = 2000, 
                            duration = 5, 
                            treatmentStrategies = ts, 
                            nWorkers = 4, 
                            inclusionFilters=None, 
                            year=1999, nhanesWeights=True, distributions=False)

In [17]:
#from microsim.population import NHANESDirectSamplePopulation

class Trial:
    
    def __init__(self, trialDescription): 
        if trialDescription.popType is None:
            raise RuntimeError(f"popType in trialDescription must belong in the set({[pt for pt in PopulationType]})")
        else:
            self.trialDescription = trialDescription
        self.treatedPop, self.controlPop = self.get_trial_populations()
        self.results = {}
    
    def get_trial_populations(self):
        treatedPeople, controlPeople = self.get_trial_people()
        return (Population(treatedPeople, PopulationFactory.get_population_model_repo(self.trialDescription.popType)),
                Population(controlPeople, PopulationFactory.get_population_model_repo(self.trialDescription.popType)))
            
    def get_trial_people(self):
        if self.trialDescription.trialType == TrialType.POTENTIAL_OUTCOMES:
            return self.get_trial_people_identical()
        else: 
            treatedPeople, controlPeople = self.get_trial_people_non_randomized()
            if self.trialDescription.trialType==TrialType.NON_RANDOMIZED:
                return treatedPeople, controlPeople
            elif ((self.trialDescription.trialType == TrialType.COMPLETELY_RANDOMIZED) | 
                  (self.trialDescription.trialType == TrialType.BERNOULLI_RANDOMIZED)):
                people = pd.concat([treatedPeople, controlPeople])
                return self.randomize_trial_people(people)
            elif ((self.trialDescription.trialType == TrialType.COMPLETELY_RANDOMIZED_IN_BLOCKS) | 
                  (self.trialDescription.trialType == TrialType.BERNOULLI_RANDOMIZED_IN_BLOCKS)):
                people = pd.concat([treatedPeople, controlPeople])
                return self.randomize_trial_people_in_blocks(people)
            else:
                raise RuntimeError("Unknown TrialType in Trial.get_trial_people function.")
    
    def get_trial_people_non_randomized(self):
        treatedPeople = PopulationFactory.get_people(self.trialDescription.popType, **self.trialDescription.popArgs)
        controlPeople = PopulationFactory.get_people(self.trialDescription.popType, **self.trialDescription.popArgs)
        PopulationFactory.set_index_in_people(controlPeople, start=treatedPeople.shape[0])
        return treatedPeople, controlPeople
    
    def get_trial_people_identical(self):
        controlPeople = PopulationFactory.get_people(self.trialDescription.popType, **self.trialDescription.popArgs)
        treatedPeople = Population.get_people_copy(controlPeople)
        PopulationFactory.set_index_in_people(controlPeople, start=treatedPeople.shape[0])
        return treatedPeople, controlPeople
    
    def print_covariate_distributions(self):
        if not self.trialDescription.is_block_randomized():
            print(" "*25, "self=treated", " "*42,  "other=control")
            self.treatedPop.print_lastyear_summary_comparison(self.controlPop)
        else:
            blockFactor = self.blockFactors[0]
            treatedPeopleBlocks = Population.get_people_blocks(self.treatedPop._people, blockFactor, nBlocks=10)
            controlPeopleBlocks = Population.get_people_blocks(self.controlPop._people, blockFactor, nBlocks=10)
            for cat in treatedPeopleBlocks.keys():
                
            categories = set( [treatedPeopleBlocks.keys()] + 
    
    def randomize_trial_people(self, people):
        nDraws = people.shape[0]
        if self.trialDescription.is_bernoulli_randomized():
            draws = self.trialDescription._rng.uniform(size=nDraws) 
        elif self.trialDescription.is_completely_randomized():
            draws = [0]*(nDraws//2) + [1]*(nDraws//2) if nDraws%2==0 else [0]*(nDraws//2) + [1]*((nDraws//2)+1)
            draws = random.sample(draws, len(draws))
        else:
            raise RuntimeError("Unknown TrialType in Trial randomize_people function.")
        controlPeople = pd.Series([p for i,p in enumerate(people) if draws[i]<0.5])
        treatedPeople = pd.Series([p for i,p in enumerate(people) if draws[i]>=0.5])
        return treatedPeople, controlPeople
    
    def randomize_trial_people_in_blocks(self, people):
        blockFactor = self.trialDescription.blockFactors[0]
        blocks = Population.get_people_blocks(people, blockFactor, nBlocks=10)
        categories = blocks.keys()
        treatedPeople = pd.Series(dtype=object)
        controlPeople = pd.Series(dtype=object)
        for cat in categories:
            treatedPeopleBlock, controlPeopleBlock = self.randomize_trial_people(blocks[cat])
            treatedPeople = pd.concat([treatedPeople, treatedPeopleBlock])
            controlPeople = pd.concat([controlPeople, controlPeopleBlock])
        return treatedPeople, controlPeople
            
        
        #if blockFactor in [x.value for x in CategoricalRiskFactorsType]:
        #    categories = set(list(map(lambda x: getattr(x, "_"+blockFactor), treatedPeople))).union(
        #                 set(list(map(lambda x: getattr(x, "_"+blockFactor), controlPeople))))
        #    blocks = dict()
        #    for cat in categories:
        #        blocks[cat] = {"treatedPeople": pd.Series(list(filter(lambda x: getattr(x, "_"+blockFactor)==cat, treatedPeople))),
        #                       "controlPeople": pd.Series(list(filter(lambda x: getattr(x, "_"+blockFactor)==cat, controlPeople)))}
        #        treatedPeopleBlock, controlPeopleBlock = self.randomize_trial_people(blocks[cat]["treatedPeople"], blocks[cat]["controlPeople"])
        #        blocks[cat] = {"treatedPeople": treatedPeopleBlock,
        #                       "controlPeople": controlPeopleBlock}
        #    treatedPeople = pd.Series(dtype=object)
        #    controlPeople = pd.Series(dtype=object)
        #    for cat in categories:
        #        treatedPeople = pd.concat([treatedPeople, blocks[cat]["treatedPeople"]])
        #        controlPeople = pd.concat([controlPeople, blocks[cat]["controlPeople"]])
        #    return treatedPeople, controlPeople
        #elif blockFactor in [x.value for x in ContinuousRiskFactorsType]:
        #    nCategories = 10
        #    categories = list(range(nCategories))
        #    blockFactorMin, blockFactorMax = list(map(lambda x: (min(x), max(x)), 
        #                                              [list(map(lambda x: getattr(x, "_"+blockFactor)[-1], treatedPeople))]+
        #                                               [list(map(lambda x: getattr(x, "_"+blockFactor)[-1], controlPeople))]))[0]
        #    categoryBounds = np.linspace(blockFactorMin, blockFactorMax, nCategories+1)
        #    blocks = dict()
        #    for cat in categories:
        #        blocks[cat] = {"treatedPeople": pd.Series(list(filter(lambda x: (getattr(x,"_"+blockFactor)[-1]>categoryBounds[cat]) &
        #                                                      (getattr(x,"_"+blockFactor)[-1]<=categoryBounds[cat+1]), treatedPeople))),
        #                       "controlPeople": pd.Series(list(filter(lambda x: (getattr(x,"_"+blockFactor)[-1]>categoryBounds[cat]) &
        #                                                      (getattr(x,"_"+blockFactor)[-1]<=categoryBounds[cat+1]), controlPeople)))}
        #        treatedPeopleBlock, controlPeopleBlock = self.randomize_trial_people(blocks[cat]["treatedPeople"], blocks[cat]["controlPeople"])
        #        blocks[cat] = {"treatedPeople": treatedPeopleBlock,
        #                       "controlPeople": controlPeopleBlock}
        #    treatedPeople = pd.Series(dtype=object)
        #    controlPeople = pd.Series(dtype=object)
        #    for cat in categories:
        #        treatedPeople = pd.concat([treatedPeople, blocks[cat]["treatedPeople"]])
        #        controlPeople = pd.concat([controlPeople, blocks[cat]["controlPeople"]])
        #    return treatedPeople, controlPeople
        #else:
        #    raise RuntimeError("Unrecognized block factor type in Trial function randomize_people_block") 
        
    def run(self):
        self.controlPop.advance(self.trialDescription.duration, 
                                treatmentStrategies=None, 
                                nWorkers=self.trialDescription.nWorkers)
        self.treatedPop.advance(1, 
                                treatmentStrategies = self.trialDescription.treatmentStrategies,
                                nWorkers=self.trialDescription.nWorkers)
        
        for key in TreatmentStrategiesType:
            if self.trialDescription.treatmentStrategies._repository[key.value] is not None:
                self.trialDescription.treatmentStrategies._repository[key.value].status = TreatmentStrategyStatus.MAINTAIN

        self.treatedPop.advance(self.trialDescription.duration-1, 
                                treatmentStrategies = self.trialDescription.treatmentStrategies,
                                nWorkers=self.trialDescription.nWorkers)
        
    def get_outcome_relative_risk(self, outcomeType):
        controlRisk = self.controlPop.get_outcome_risk(outcomeType)
        treatedRisk = self.treatedPop.get_outcome_risk(outcomeType)
        outcomeRelativeRisk = treatedRisk/controlRisk
        return outcomeRelativeRisk

In [18]:
#tr = NhanesTrial(td)
tr = Trial(td)

In [19]:
tr.print_covariate_distributions()

In [20]:
%%time
tr.run()

CPU times: user 923 ms, sys: 163 ms, total: 1.09 s
Wall time: 10.4 s


In [21]:
from microsim.outcome import OutcomeType

In [22]:
tr.get_outcome_relative_risk(OutcomeType.STROKE), tr.get_outcome_relative_risk(OutcomeType.MI)

(0.279720139930035, 0.679320339830085)

In [23]:
tr.treatedPop._people

0       Person(name = 46189 index = 2  raceEthnicity=4...
1       Person(name = 45369 index = 3  raceEthnicity=3...
2       Person(name = 42015 index = 6  raceEthnicity=3...
3       Person(name = 46248 index = 7  raceEthnicity=3...
4       Person(name = 44148 index = 9  raceEthnicity=3...
                              ...                        
1028    Person(name = 44659 index = 3983  raceEthnicit...
1029    Person(name = 42063 index = 3986  raceEthnicit...
1030    Person(name = 43397 index = 3987  raceEthnicit...
1031    Person(name = 44597 index = 3989  raceEthnicit...
1032    Person(name = 44379 index = 3996  raceEthnicit...
Length: 2001, dtype: object

In [24]:
tr.controlPop._people

0       Person(name = 45419 index = 8  raceEthnicity=3...
1       Person(name = 46313 index = 11  raceEthnicity=...
2       Person(name = 44880 index = 12  raceEthnicity=...
3       Person(name = 45381 index = 15  raceEthnicity=...
4       Person(name = 46114 index = 17  raceEthnicity=...
                              ...                        
1027    Person(name = 44308 index = 3988  raceEthnicit...
1028    Person(name = 46884 index = 3990  raceEthnicit...
1029    Person(name = 42880 index = 3993  raceEthnicit...
1030    Person(name = 43742 index = 3998  raceEthnicit...
1031    Person(name = 44841 index = 3999  raceEthnicit...
Length: 1999, dtype: object

In [None]:
tr.treatedPop._people.iloc[0]._gender.value

In [None]:
people = PopulationFactory.get_nhanes_people(year=1999)

In [None]:
people.shape

In [None]:
build_people_using_nhanes_for_sampling_gen(nhanes, n, filter=None, random_seed=None, weights=None):
    """Creates a Pandas Series collection of Person instances."""

    if weights is None:
        weights = nhanes.WTINT2YR
    repeated_sample = nhanes.sample(n, weights=weights, random_state=random_seed, replace=True)
    initializationModelRepository = {DynamicRiskFactorsType.AFIB: AFibPrevalenceModel(), 
                                     DynamicRiskFactorsType.PVD: PVDPrevalenceModel()}
    people = pd.DataFrame.apply(repeated_sample,
                                build_person, initializationModelRepository=initializationModelRepository, axis="columns")

    #sets the unique identifier for each Person instance
    list(map(lambda person, i: setattr(person, "_index", i), people, range(n))) 

    return people

In [None]:
from enum import Enum

class CategoricalRiskFactorsType(Enum):
    RACE_ETHNICITY = "raceEthnicity" 
    EDUCATION = "education"          
    GENDER = "gender"                
    SMOKING_STATUS = "smokingStatus" 
    PVD = "pvd"
    ALCOHOL_PER_WEEK = "alcoholPerWeek"
    AFIB = "afib"
    ANY_PHYSICAL_ACTIVITY = "anyPhysicalActivity" 
    
class ContinuousRiskFactorsType(Enum):
    AGE = "age"   # int
    SBP = "sbp"   # int
    DBP = "dbp"   # int
    A1C = "a1c"   # float
    HDL = "hdl"   # int
    LDL = "ldl"   # int
    TRIG = "trig"  # int
    TOT_CHOL = "totChol"   # int
    BMI = "bmi"   #float
    WAIST = "waist"  # int, waist circumference in cm
    CREATININE = "creatinine" # float