In [1]:
import numpy as np
import pandas as pd
import os
import random

from microsim.treatment_strategy_repository import TreatmentStrategyRepository
from microsim.treatment import TreatmentStrategiesType, TreatmentStrategyStatus
from microsim.bp_treatment_strategies import AddNBPMedsTreatmentStrategy
from microsim.population_factory import PopulationFactory, PopulationType
from microsim.population import Population
from microsim.trials.trial_type import TrialType
from microsim.trials.trial_description import NhanesTrialDescription
from microsim.risk_factor import CategoricalRiskFactorsType, ContinuousRiskFactorsType
from microsim.risk_factor import StaticRiskFactorsType, DynamicRiskFactorsType
from microsim.person_filter import PersonFilter
from microsim.trials.relative_risk_analysis import RelativeRiskAnalysis
from microsim.trials.cox_regression_analysis import CoxRegressionAnalysis
from microsim.trials.linear_regression_analysis import LinearRegressionAnalysis
from microsim.trials.logistic_regression_analysis import LogisticRegressionAnalysis

microsimDir = "/Users/deligkaris.1/OneDrive - The Ohio State University Wexner Medical Center/MICROSIM/CODE/microsim"
os.chdir(microsimDir)

In [2]:
pf = PersonFilter()
#pf

In [3]:
#ts = TreatmentStrategyRepository()
#ts._repository[TreatmentStrategiesType.BP.value] = AddNBPMedsTreatmentStrategy(1)

td = NhanesTrialDescription(trialType = TrialType.COMPLETELY_RANDOMIZED_IN_BLOCKS, 
                            #trialType = TrialType.COMPLETELY_RANDOMIZED,
                            #trialType = TrialType.NON_RANDOMIZED,
                            blockFactors = [StaticRiskFactorsType.GENDER.value], 
                            #blockFactors = [DynamicRiskFactorsType.AGE.value],
                            #blockFactors=list(),
                            sampleSize = 1000, 
                            duration = 5, 
                            treatmentStrategies = "1bpMedsAdded", #ts, 
                            nWorkers = 4, 
                            personFilters=pf, 
                            year=1999, nhanesWeights=True, distributions=False)
#td

In [4]:
#from microsim.trials.linear_regression_analysis import LinearRegressionAnalysis
#from microsim.trials.logistic_regression_analysis import LogisticRegressionAnalysis
#from microsim.trials.cox_regression_analysis import CoxRegressionAnalysis
from microsim.outcome import OutcomeType

class OutcomeAssessor:
    def __init__(self, addCommonAssessments=True):
        self._assessments = dict()
        self._analysis = {"linear": LinearRegressionAnalysis(),
                          "logistic": LogisticRegressionAnalysis(),
                          "cox": CoxRegressionAnalysis(),
                          "relRisk": RelativeRiskAnalysis()} 
        if addCommonAssessments:
            self.add_common_assessments()
        
    def add_outcome_assessment(self, assessmentName, assessmentFunctionDict, assessmentAnalysis):
        if assessmentAnalysis in self._analysis.keys():
            if assessmentName not in self._assessments.keys():
                if (((assessmentAnalysis!="cox") & (len(assessmentFunctionDict)==1)) | 
                    ((assessmentAnalysis=="cox") & (len(assessmentFunctionDict)==2))):
                    self._assessments[assessmentName] = {"assessmentFunctionDict": assessmentFunctionDict,
                                                         "assessmentAnalysis": assessmentAnalysis}
                else:
                    print(f"Cannot add outcome assessment {assessmentName} because of incorrect assessmentFunctionDict length.")
            else:
                print(f"Cannot add outcome assessment {assessmentName} because this assessment name already exists.")
        else:
            print(f"Cannot add outcome assessment with analysis {assessmentAnalysis} because this analysis does not exist.")
            print(f"Available assessment analysis are: {[analysis for analysis in self._analysis.keys()]}")
        
    def rm_outcome_assessment(self, assessmentName):
        if assessmentName in self._assessments.keys():
            del self._assessments[assessmentName]
        else:
            print(f"Cannot remove outcome assessment with name {assessmentName} because this assessment name does not exist.")
            
    def rm_outcome_assessments(self, assessmentNameList):
        for assessmentName in assessmentNameList:
            self.rm_outcome_assessment(assessmentName)
            
    def add_common_assessments(self):
        self.add_outcome_assessment("death", 
                                    {"outcome": lambda x: x.has_outcome(OutcomeType.DEATH)}, 
                                    "logistic")
        self.add_outcome_assessment("anyEvent", 
                                    {"outcome": lambda x: x.has_any_outcome([OutcomeType.DEATH, OutcomeType.MI, OutcomeType.STROKE,
                                                                  OutcomeType.DEMENTIA, OutcomeType.CI])}, 
                                    "logistic")
        self.add_outcome_assessment("vascularEventOrDeath", 
                                    {"outcome": lambda x: x.has_any_outcome([OutcomeType.DEATH, OutcomeType.MI, OutcomeType.STROKE])}, 
                                    "logistic")
        self.add_outcome_assessment("vascularEvent", 
                                    {"outcome": lambda x: x.has_any_outcome([OutcomeType.MI, OutcomeType.STROKE])}, 
                                    "logistic")
        self.add_outcome_assessment("qalys", 
                                    {"outcome": lambda x: x.get_outcome_item_sum(OutcomeType.QUALITYADJUSTED_LIFE_YEARS, "qaly")}, 
                                    "linear")
        self.add_outcome_assessment("meanGCP", 
                                    {"outcome": lambda x: x.get_outcome_item_mean(OutcomeType.COGNITION, "gcp")}, 
                                    "linear")
        self.add_outcome_assessment("lastGCP", 
                                    {"outcome": lambda x: x.get_outcome_item_last(OutcomeType.COGNITION, "gcp")}, 
                                    "linear")
        self.add_outcome_assessment("cogEvent", 
                                    {"outcome": lambda x: x.has_any_outcome([OutcomeType.CI, OutcomeType.DEMENTIA])}, 
                                    "logistic")
        self.add_outcome_assessment("deathCox", 
                                    {"outcome": lambda x: x.has_outcome(OutcomeType.DEATH),
                                     "time": lambda x: x.get_min_wave_of_first_outcomes_or_last_wave([OutcomeType.DEATH])},
                                    "cox")
        self.add_outcome_assessment("cogEventCox", 
                                    {"outcome": lambda x: x.has_any_outcome([OutcomeType.CI, OutcomeType.DEMENTIA]),
                                     "time": lambda x: x.get_min_wave_of_first_outcomes_or_last_wave([OutcomeType.CI, OutcomeType.DEMENTIA])},
                                    "cox")
        self.add_outcome_assessment("vascularEventOrDeathCox",
                                    {"outcome": lambda x: x.has_any_outcome([OutcomeType.DEATH, OutcomeType.MI, OutcomeType.STROKE]),
                                     "time": lambda x: x.get_min_wave_of_first_outcomes_or_last_wave([OutcomeType.DEATH, OutcomeType.MI, OutcomeType.STROKE])},
                                     "cox")
        self.add_outcome_assessment("strokeRR",
                                    {"outcome": lambda x: x.get_outcome_risk(OutcomeType.STROKE)},
                                    "relRisk")

    #def get_trial_outcome_df(self, trial):
    #    treatment = [1]*trial.treatedPop._n+[0]*trial.controlPop._n
    #    dfDict=dict()
    #    dfDict["treatment"]=treatment
    #    for assessmentName in self._assessments.keys():
    #        assessmentAnalysis = self._assessments[assessmentName]["assessmentAnalysis"]
    #        assessmentFunctionDict = self._assessments[assessmentName]["assessmentFunctionDict"]
    #        assessmentFunction = assessmentFunctionDict["outcome"]
    #        dfDict[assessmentName] = list(map(assessmentFunction, [trial.treatedPop]))[0] + list(map(assessmentFunction, [trial.controlPop]))[0]
    #        if assessmentAnalysis=="logistic":
    #            dfDict[assessmentName] = [int(x) for x in dfDict[assessmentName]]
    #        elif assessmentAnalysis=="cox":
    #            assessmentFunction = assessmentFunctionDict["time"]
    #            dfDict[assessmentName+"Time"] = list(map(assessmentFunction, [trial.treatedPop]))[0] + list(map(assessmentFunction, [trial.controlPop]))[0]
    #        #if assessmentAnalysis=="cox":
    #        #    dfDict[assessmentName+"time"] = list(map(lambda x: x.get_min_wave_of_first_outcomes_or_last_wave
    #    if len(trial.trialDescription.blockFactors)>0:
    #        blockFactor = trial.trialDescription.blockFactors[0]
    #        dfDict[blockFactor]=trial.treatedPop.get_attr(blockFactor) + trial.controlPop.get_attr(blockFactor)      
    #    return pd.DataFrame(dfDict)    
        
    def __str__(self):
        rep = f"Outcome Assessor\n\tAssessments:\n"
        for assessmentName in self._assessments.keys():
            rep += f"\t\tName: {assessmentName:<25}" 
            #rep += f"Function: {self._assessments[assessmentName]['assessmentFunction']},"
            rep += f"Analysis: {self._assessments[assessmentName]['assessmentAnalysis']:<15}\n"
        return rep
    
    def __repr__(self):
        return self.__str__()

In [5]:
oa = OutcomeAssessor()

In [6]:
oa

Outcome Assessor
	Assessments:
		Name: death                    Analysis: logistic       
		Name: anyEvent                 Analysis: logistic       
		Name: vascularEventOrDeath     Analysis: logistic       
		Name: vascularEvent            Analysis: logistic       
		Name: qalys                    Analysis: linear         
		Name: meanGCP                  Analysis: linear         
		Name: lastGCP                  Analysis: linear         
		Name: cogEvent                 Analysis: logistic       
		Name: deathCox                 Analysis: cox            
		Name: cogEventCox              Analysis: cox            
		Name: vascularEventOrDeathCox  Analysis: cox            
		Name: strokeRR                 Analysis: relRisk        

In [7]:
#from microsim.population import NHANESDirectSamplePopulation

class Trial:
    
    def __init__(self, trialDescription): 
        if trialDescription.popType is None:
            raise RuntimeError(f"popType in trialDescription must belong in the set({[pt for pt in PopulationType]})")
        else:
            self.trialDescription = trialDescription
        self.treatedPop, self.controlPop = self.get_trial_populations()
        self.completed = False
        self.results = dict()
    
    def get_trial_populations(self):
        treatedPeople, controlPeople = self.get_trial_people()
        return (Population(treatedPeople, PopulationFactory.get_population_model_repo(self.trialDescription.popType)),
                Population(controlPeople, PopulationFactory.get_population_model_repo(self.trialDescription.popType)))
            
    def get_trial_people(self):
        if self.trialDescription.trialType == TrialType.POTENTIAL_OUTCOMES:
            return self.get_trial_people_identical()
        else: 
            treatedPeople, controlPeople = self.get_trial_people_non_randomized()
            if self.trialDescription.is_not_randomized():
                return treatedPeople, controlPeople
            elif self.trialDescription.is_not_block_randomized():
                people = pd.concat([treatedPeople, controlPeople])
                return self.randomize_trial_people(people)
            elif self.trialDescription.is_block_randomized():
                people = pd.concat([treatedPeople, controlPeople])
                return self.randomize_trial_people_in_blocks(people)
            else:
                raise RuntimeError("Unknown TrialType in Trial.get_trial_people function.")
    
    def get_trial_people_non_randomized(self):
        treatedPeople = PopulationFactory.get_people(self.trialDescription.popType, **self.trialDescription.popArgs)
        controlPeople = PopulationFactory.get_people(self.trialDescription.popType, **self.trialDescription.popArgs)
        PopulationFactory.set_index_in_people(controlPeople, start=treatedPeople.shape[0])
        return treatedPeople, controlPeople
    
    def get_trial_people_identical(self):
        controlPeople = PopulationFactory.get_people(self.trialDescription.popType, **self.trialDescription.popArgs)
        treatedPeople = Population.get_people_copy(controlPeople)
        PopulationFactory.set_index_in_people(controlPeople, start=treatedPeople.shape[0])
        return treatedPeople, controlPeople
            
    def randomize_trial_people(self, people):
        nDraws = people.shape[0]
        if self.trialDescription.is_bernoulli_randomized():
            draws = self.trialDescription._rng.uniform(size=nDraws) 
        elif self.trialDescription.is_completely_randomized():
            draws = [0]*(nDraws//2) + [1]*(nDraws//2) if nDraws%2==0 else [0]*(nDraws//2) + [1]*((nDraws//2)+1)
            draws = random.sample(draws, len(draws))
        else:
            raise RuntimeError("Unknown TrialType in Trial randomize_people function.")
        controlPeople = pd.Series([p for i,p in enumerate(people) if draws[i]<0.5])
        treatedPeople = pd.Series([p for i,p in enumerate(people) if draws[i]>=0.5])
        return treatedPeople, controlPeople
    
    def randomize_trial_people_in_blocks(self, people):
        blockFactor = self.trialDescription.blockFactors[0]
        blocks = Population.get_people_blocks(people, blockFactor, nBlocks=10)
        categories = blocks.keys()
        treatedPeople = pd.Series(dtype=object)
        controlPeople = pd.Series(dtype=object)
        for cat in categories:
            treatedPeopleBlock, controlPeopleBlock = self.randomize_trial_people(blocks[cat])
            treatedPeople = pd.concat([treatedPeople, treatedPeopleBlock])
            controlPeople = pd.concat([controlPeople, controlPeopleBlock])
        return treatedPeople, controlPeople
        
    def run(self):
        if self.completed:
            print("Cannot run a trial that has already been completed.")
        else:
            self.controlPop.advance(self.trialDescription.duration, 
                                    treatmentStrategies=None, 
                                    nWorkers=self.trialDescription.nWorkers)
            self.treatedPop.advance(1, 
                                    treatmentStrategies = self.trialDescription.treatmentStrategies,
                                    nWorkers=self.trialDescription.nWorkers)
        
            for key in TreatmentStrategiesType:
                if self.trialDescription.treatmentStrategies._repository[key.value] is not None:
                    self.trialDescription.treatmentStrategies._repository[key.value].status = TreatmentStrategyStatus.MAINTAIN

            self.treatedPop.advance(self.trialDescription.duration-1, 
                                    treatmentStrategies = self.trialDescription.treatmentStrategies,
                                    nWorkers=self.trialDescription.nWorkers)
            self.completed = True
            print("Trial is completed.")
            
    def analyze(self, outcomeAssessor):
        #df = self.get_trial_outcome_df(outcomeAssessor)
        for assessmentName in outcomeAssessor._assessments.keys():
            assessmentAnalysis = outcomeAssessor._assessments[assessmentName]["assessmentAnalysis"]
            #if assessmentAnalysis not in self.results.keys():
            #    self.results[assessmentAnalysis] = dict()
            assessmentAnalysisFunction = outcomeAssessor._analysis[assessmentAnalysis]
            assessmentFunctionDict = outcomeAssessor._assessments[assessmentName]["assessmentFunctionDict"]
            #assessmentFunction = assessmentFunctionDict["outcome"]
            #assessmentResults = assessmentAnalysis.analyze(df, assessmentName, self.trialDescription.blockFactors)
            #assessmentResults = assessmentAnalysis.analyze(self, assessmentName, assessmentFunction)
            assessmentResults = assessmentAnalysisFunction.analyze(self, assessmentFunctionDict, assessmentAnalysis)
            self.results[assessmentName] = assessmentResults
    
    def run_analyze(self, outcomeAssessor):
        self.run()
        self.analyze(outcomeAssessor)
        
    #def get_outcome_relative_risk(self, outcomeType):
    #    controlRisk = self.controlPop.get_outcome_risk(outcomeType)
    #    treatedRisk = self.treatedPop.get_outcome_risk(outcomeType)
    #    outcomeRelativeRisk = treatedRisk/controlRisk
    #    return outcomeRelativeRisk
    
    #def get_trial_outcome_df(self, outcomeAssessor):
    #    treatment = [1]*self.treatedPop._n+[0]*self.controlPop._n
    #    dfDict=dict()
    #    dfDict["treatment"]=treatment
    #    for assessmentName in outcomeAssessor._assessments.keys():
    #        assessmentAnalysis = outcomeAssessor._assessments[assessmentName]["assessmentAnalysis"]
    #        assessmentFunctionDict = outcomeAssessor._assessments[assessmentName]["assessmentFunctionDict"]
    #        assessmentFunction = assessmentFunctionDict["outcome"]
    #        dfDict[assessmentName] = list(map(assessmentFunction, [self.treatedPop]))[0] + list(map(assessmentFunction, [self.controlPop]))[0]
    #        if assessmentAnalysis=="logistic":
    #            dfDict[assessmentName] = [int(x) for x in dfDict[assessmentName]]
    #        elif assessmentAnalysis=="cox":
    #            assessmentFunction = assessmentFunctionDict["time"]
    #            dfDict[assessmentName+"Time"] = list(map(assessmentFunction, [self.treatedPop]))[0] + list(map(assessmentFunction, [self.controlPop]))[0]
    #        #if assessmentAnalysis=="cox":
    #        #    dfDict[assessmentName+"time"] = list(map(lambda x: x.get_min_wave_of_first_outcomes_or_last_wave
    #    if len(self.trialDescription.blockFactors)>0:
    #        blockFactor = self.trialDescription.blockFactors[0]
    #        dfDict[blockFactor]=self.treatedPop.get_attr(blockFactor) + self.controlPop.get_attr(blockFactor)      
    #    return pd.DataFrame(dfDict)
    
    def print_covariate_distributions(self):
        if not self.trialDescription.is_block_randomized():
            print(" "*25, 
                      "self=treated, unique people count=",  Population.get_unique_people_count(self.treatedPop._people), 
                      " "*15,
                      "other=control, unique people count=",  Population.get_unique_people_count(self.controlPop._people))
            self.treatedPop.print_lastyear_summary_comparison(self.controlPop)
        else:
            blockFactor = self.trialDescription.blockFactors[0]
            people = pd.concat([self.treatedPop._people, self.controlPop._people])
            peopleBlocks = Population.get_people_blocks(people, blockFactor, nBlocks=10)
            for key in peopleBlocks.keys():
                treatedPeopleBlock = pd.Series(list(filter(lambda x: 
                                                           x._index in list(map(lambda x: x._index, self.treatedPop._people)), 
                                                           peopleBlocks[key])))
                controlPeopleBlock = pd.Series(list(filter(lambda x: 
                                                           x._index in list(map(lambda x: x._index, self.controlPop._people)), 
                                                           peopleBlocks[key])))
                print(" "*25, "-"*109)
                print(" "*25, f"block:{blockFactor}={key}")
                print(" "*25, 
                      "self=treated, unique people count=",  Population.get_unique_people_count(treatedPeopleBlock), 
                      " "*15,
                      "other=control, unique people count=",  Population.get_unique_people_count(controlPeopleBlock))
                Population.print_people_summary_at_index_comparison(treatedPeopleBlock, controlPeopleBlock, -1)
                
    def __string__(self):
        rep = self.trialDescription.__str__()
        rep += f"\nTrial\n"
        rep += f"\tTrial completed: {self.completed}\n"
        if self.completed:
            rep += f"Trial results:\n"
            rep += "\t" +" "*25 + " "*8 + "Z/relRisk" + " "*6 + "Intercept" + " "*11 + "Z SE" + " "*9 + "pValue\n"
            for key in self.results.keys():
                rep += f"\t{key:>25}: "
                for result in self.results[key]:
                    if (result is not None) & (result is not float('inf')):
                        rep += f"{result:>15.2f}"
                    elif result== float('inf'):
                        rep += f"{'inf':>15}"
                    else:
                        rep += " "*15
                rep += "\n"
        return rep
    def __repr__(self):
        return self.__string__()

In [8]:
#tr = NhanesTrial(td)
tr = Trial(td)

In [9]:
#tr

In [10]:
#tr.print_covariate_distributions()

In [11]:
%%time
tr.run()

Trial is completed.
CPU times: user 475 ms, sys: 91 ms, total: 566 ms
Wall time: 6.85 s


In [12]:
tr.analyze(oa)

In [13]:
tr

Trial Description
	Trial type: 2
	Block factors: ['gender']
	Sample size: 1000
	Duration: 5
	Treatment strategies: <microsim.treatment_strategy_repository.TreatmentStrategyRepository object at 0x1074ce6a0>
	Number of workers: 4
	Person filters: 
	 Person Filters:
	    filter type   filter name    
	             df   lowSBPLimit    
	             df   lowDBPLimit    
	             df   highAntiHypertensivesLimit
	         person   highDemAndCVLimit

	Year: 1999
	NHANES weights: True
	Distributions: False
	Population type: PopulationType.NHANES
Trial
	Trial completed: True
Trial results:
	                                 Z/relRisk      Intercept           Z SE         pValue
	                    death:           -0.16          -5.00           0.40           0.69
	                 anyEvent:           -0.15           0.59           0.10           0.11
	     vascularEventOrDeath:            0.10          -4.46           0.31           0.76
	            vascularEvent:            0.29        

In [None]:
tr.get_outcome_relative_risk(OutcomeType.DEATH)

In [None]:
tr.treatedPop._people.shape[0]

In [None]:
tr.controlPop._people.shape[0]

In [None]:
len(list(filter(lambda x: x._sbp[0]<125, tr.treatedPop._people)))

In [None]:
len(list(filter(lambda x: x._sbp[0]<126, tr.controlPop._people)))

In [None]:
len(list(filter(lambda x: x._dbp[0]<85, tr.treatedPop._people)))

In [None]:
build_people_using_nhanes_for_sampling_gen(nhanes, n, filter=None, random_seed=None, weights=None):
    """Creates a Pandas Series collection of Person instances."""

    if weights is None:
        weights = nhanes.WTINT2YR
    repeated_sample = nhanes.sample(n, weights=weights, random_state=random_seed, replace=True)
    initializationModelRepository = {DynamicRiskFactorsType.AFIB: AFibPrevalenceModel(), 
                                     DynamicRiskFactorsType.PVD: PVDPrevalenceModel()}
    people = pd.DataFrame.apply(repeated_sample,
                                build_person, initializationModelRepository=initializationModelRepository, axis="columns")

    #sets the unique identifier for each Person instance
    list(map(lambda person, i: setattr(person, "_index", i), people, range(n))) 

    return people

In [None]:
from enum import Enum

class CategoricalRiskFactorsType(Enum):
    RACE_ETHNICITY = "raceEthnicity" 
    EDUCATION = "education"          
    GENDER = "gender"                
    SMOKING_STATUS = "smokingStatus" 
    PVD = "pvd"
    ALCOHOL_PER_WEEK = "alcoholPerWeek"
    AFIB = "afib"
    ANY_PHYSICAL_ACTIVITY = "anyPhysicalActivity" 
    
class ContinuousRiskFactorsType(Enum):
    AGE = "age"   # int
    SBP = "sbp"   # int
    DBP = "dbp"   # int
    A1C = "a1c"   # float
    HDL = "hdl"   # int
    LDL = "ldl"   # int
    TRIG = "trig"  # int
    TOT_CHOL = "totChol"   # int
    BMI = "bmi"   #float
    WAIST = "waist"  # int, waist circumference in cm
    CREATININE = "creatinine" # float