In [1]:
import numpy as np
import pandas as pd
import os
import random

from microsim.treatment_strategy_repository import TreatmentStrategyRepository
from microsim.treatment import TreatmentStrategiesType, TreatmentStrategyStatus
from microsim.bp_treatment_strategies import AddNBPMedsTreatmentStrategy
from microsim.population_factory import PopulationFactory, PopulationType
from microsim.population import Population
from microsim.trials.trial_type import TrialType
from microsim.trials.trial_description import NhanesTrialDescription
from microsim.risk_factor import CategoricalRiskFactorsType, ContinuousRiskFactorsType
from microsim.risk_factor import StaticRiskFactorsType, DynamicRiskFactorsType

microsimDir = "/Users/deligkaris.1/OneDrive - The Ohio State University Wexner Medical Center/MICROSIM/CODE/microsim"
os.chdir(microsimDir)

In [2]:
#ts = TreatmentStrategyRepository()
#ts._repository[TreatmentStrategiesType.BP.value] = AddNBPMedsTreatmentStrategy(1)

td = NhanesTrialDescription(#trialType = TrialType.COMPLETELY_RANDOMIZED_IN_BLOCKS, 
                            trialType = TrialType.COMPLETELY_RANDOMIZED,
                            #trialType = TrialType.NON_RANDOMIZED,
                            #blockFactors = [StaticRiskFactorsType.GENDER.value], 
                            #blockFactors = [DynamicRiskFactorsType.AGE.value],
                            blockFactors=list(),
                            sampleSize = 1000, 
                            duration = 5, 
                            treatmentStrategies = "1bpMedsAdded", #ts, 
                            nWorkers = 4, 
                            inclusionFilters=None, 
                            year=1999, nhanesWeights=True, distributions=False)

In [3]:
td

Trial Description
	Trial type: 1
	Block factors: []
	Sample size: 1000
	Duration: 5
	Treatment strategies: <microsim.treatment_strategy_repository.TreatmentStrategyRepository object at 0x1193b7dc0>
	Number of workers: 4
	Inclusion filters: None
	Year: 1999
	NHANES weights: True
	Distributions: False
	Population type: PopulationType.NHANES

In [4]:
import statsmodels.formula.api as smf

class LogisticRegressionAnalysis:
    def __init__(self):
        pass
    
    def analyze(self, df, outcomeName, blockFactors):
        formula = f"{outcomeName} ~ treatment"
        for blockFactor in blockFactors:
            formula += f" + {blockFactor}"
        reg = smf.logit(formula, df).fit(disp=False)
        return reg.params['treatment'], reg.params['Intercept'], reg.bse['treatment'], reg.pvalues['treatment']
    
class LinearRegressionAnalysis:
    def __init__(self):
        pass
    
    def analyze(self, df, outcomeName, blockFactors):
        formula = f"{outcomeName} ~ treatment"
        for blockFactor in blockFactors:
            formula += f" + {blockFactor}"
        reg = smf.ols(formula, df).fit(disp=False)
        return reg.params['treatment'], reg.params['Intercept'], reg.bse['treatment'], reg.pvalues['treatment']

from lifelines import CoxPHFitter
from numpy.linalg import LinAlgError
from lifelines.exceptions import ConvergenceError    

class CoxRegressionAnalysis:
    def __init__(self):
        pass
    
    def analyze(self, df, outcomeName, blockFactors):
        formula = f"{outcomeName} ~ treatment"
        for blockFactor in blockFactors:
            formula += f" + {blockFactor}"
        cph = CoxPHFitter()
        try:
            cph.fit(df, duration_col='time', event_col=outcomeName)

            return cph.params_['treatment'], None, cph.standard_errors_['treatment'], cph.summary.loc['treatment', 'p']
        except (LinAlgError, ConvergenceError):
            return np.nan, np.nan, np.nan, np.nan, (np.nan, np.nan)

In [29]:
#from microsim.trials.linear_regression_analysis import LinearRegressionAnalysis
#from microsim.trials.logistic_regression_analysis import LogisticRegressionAnalysis
#from microsim.trials.cox_regression_analysis import CoxRegressionAnalysis
from microsim.outcome import OutcomeType

class OutcomeAssessor:
    def __init__(self, addCommonAssessments=True):
        self._assessments = dict()
        self._analysis = {"linear": LinearRegressionAnalysis(),
                          "logistic": LogisticRegressionAnalysis(),
                          "cox": CoxRegressionAnalysis()} 
        if addCommonAssessments:
            self.add_common_assessments()
        
    def add_outcome_assessment(self, assessmentName, assessmentFunction, assessmentAnalysis):
        if assessmentAnalysis in self._analysis.keys():
            if assessmentName not in self._assessments.keys():
                self._assessments[assessmentName] = {"assessmentFunction": assessmentFunction,
                                                    "assessmentAnalysis": assessmentAnalysis}
            else:
                print(f"Cannot add outcome assessment with name {assessmentName} because this assessment name already exists.")
        else:
            print(f"Cannot add outcome assessment with analysis {assessmentAnalysis} because this analysis does not exist.")
            print(f"Available assessment analysis are: {[analysis for analysis in self._analysis.keys()]}")
        
    def rm_outcome_assessment(self, assessmentName):
        if assessmentName in self._assessments.keys():
            del self._assessments[assessmentName]
        else:
            print(f"Cannot remove outcome assessment with name {assessmentName} because this assessment name does not exist.")
            
    def add_common_assessments(self):
        self.add_outcome_assessment("death", lambda x: x.has_outcome(OutcomeType.DEATH), "logistic")
        self.add_outcome_assessment("mi", lambda x: x.has_outcome(OutcomeType.MI), "logistic")
        self.add_outcome_assessment("deathAndMi", lambda x: x.has_all_outcomes([OutcomeType.DEATH, OutcomeType.MI]), "logistic")
        self.add_outcome_assessment("deathOrMi", lambda x: x.has_any_outcome([OutcomeType.DEATH, OutcomeType.MI]), "logistic")
        
    def __str__(self):
        rep = f"Outcome Assessor\n\tAssessments:\n"
        for assessmentName in self._assessments.keys():
            rep += f"\t\tName: {assessmentName:<15}" 
            #rep += f"Function: {self._assessments[assessmentName]['assessmentFunction']},"
            rep += f"Analysis: {self._assessments[assessmentName]['assessmentAnalysis']:<15}\n"
        return rep
    
    def __repr__(self):
        return self.__str__()

In [30]:
oa = OutcomeAssessor()

In [31]:
oa

Outcome Assessor
	Assessments:
		Name: death          Analysis: logistic       
		Name: mi             Analysis: logistic       
		Name: deathAndMi     Analysis: logistic       
		Name: deathOrMi      Analysis: logistic       

In [None]:
#from microsim.population import NHANESDirectSamplePopulation

class Trial:
    
    def __init__(self, trialDescription): 
        if trialDescription.popType is None:
            raise RuntimeError(f"popType in trialDescription must belong in the set({[pt for pt in PopulationType]})")
        else:
            self.trialDescription = trialDescription
        self.treatedPop, self.controlPop = self.get_trial_populations()
        self.completed = False
        self.results = dict()
    
    def get_trial_populations(self):
        treatedPeople, controlPeople = self.get_trial_people()
        return (Population(treatedPeople, PopulationFactory.get_population_model_repo(self.trialDescription.popType)),
                Population(controlPeople, PopulationFactory.get_population_model_repo(self.trialDescription.popType)))
            
    def get_trial_people(self):
        if self.trialDescription.trialType == TrialType.POTENTIAL_OUTCOMES:
            return self.get_trial_people_identical()
        else: 
            treatedPeople, controlPeople = self.get_trial_people_non_randomized()
            if self.trialDescription.is_not_randomized():
                return treatedPeople, controlPeople
            elif self.trialDescription.is_not_block_randomized():
                people = pd.concat([treatedPeople, controlPeople])
                return self.randomize_trial_people(people)
            elif self.trialDescription.is_block_randomized():
                people = pd.concat([treatedPeople, controlPeople])
                return self.randomize_trial_people_in_blocks(people)
            else:
                raise RuntimeError("Unknown TrialType in Trial.get_trial_people function.")
    
    def get_trial_people_non_randomized(self):
        treatedPeople = PopulationFactory.get_people(self.trialDescription.popType, **self.trialDescription.popArgs)
        controlPeople = PopulationFactory.get_people(self.trialDescription.popType, **self.trialDescription.popArgs)
        PopulationFactory.set_index_in_people(controlPeople, start=treatedPeople.shape[0])
        return treatedPeople, controlPeople
    
    def get_trial_people_identical(self):
        controlPeople = PopulationFactory.get_people(self.trialDescription.popType, **self.trialDescription.popArgs)
        treatedPeople = Population.get_people_copy(controlPeople)
        PopulationFactory.set_index_in_people(controlPeople, start=treatedPeople.shape[0])
        return treatedPeople, controlPeople
            
    def randomize_trial_people(self, people):
        nDraws = people.shape[0]
        if self.trialDescription.is_bernoulli_randomized():
            draws = self.trialDescription._rng.uniform(size=nDraws) 
        elif self.trialDescription.is_completely_randomized():
            draws = [0]*(nDraws//2) + [1]*(nDraws//2) if nDraws%2==0 else [0]*(nDraws//2) + [1]*((nDraws//2)+1)
            draws = random.sample(draws, len(draws))
        else:
            raise RuntimeError("Unknown TrialType in Trial randomize_people function.")
        controlPeople = pd.Series([p for i,p in enumerate(people) if draws[i]<0.5])
        treatedPeople = pd.Series([p for i,p in enumerate(people) if draws[i]>=0.5])
        return treatedPeople, controlPeople
    
    def randomize_trial_people_in_blocks(self, people):
        blockFactor = self.trialDescription.blockFactors[0]
        blocks = Population.get_people_blocks(people, blockFactor, nBlocks=10)
        categories = blocks.keys()
        treatedPeople = pd.Series(dtype=object)
        controlPeople = pd.Series(dtype=object)
        for cat in categories:
            treatedPeopleBlock, controlPeopleBlock = self.randomize_trial_people(blocks[cat])
            treatedPeople = pd.concat([treatedPeople, treatedPeopleBlock])
            controlPeople = pd.concat([controlPeople, controlPeopleBlock])
        return treatedPeople, controlPeople
        
    def run(self):
        if self.completed:
            print("Cannot run a trial that has already been completed.")
        else:
            self.controlPop.advance(self.trialDescription.duration, 
                                    treatmentStrategies=None, 
                                    nWorkers=self.trialDescription.nWorkers)
            self.treatedPop.advance(1, 
                                    treatmentStrategies = self.trialDescription.treatmentStrategies,
                                    nWorkers=self.trialDescription.nWorkers)
        
            for key in TreatmentStrategiesType:
                if self.trialDescription.treatmentStrategies._repository[key.value] is not None:
                    self.trialDescription.treatmentStrategies._repository[key.value].status = TreatmentStrategyStatus.MAINTAIN

            self.treatedPop.advance(self.trialDescription.duration-1, 
                                    treatmentStrategies = self.trialDescription.treatmentStrategies,
                                    nWorkers=self.trialDescription.nWorkers)
            self.completed = True
            print("Trial is completed.")
            
    def analyze(self, outcomeAssessor):
        df = self.get_trial_outcome_df(outcomeAssessor)
        for assessmentName in outcomeAssessor._assessments.keys():
            assessmentAnalysis = outcomeAssessor._analysis[outcomeAssessor._assessments[assessmentName]["assessmentAnalysis"]]
            assessmentResults = assessmentAnalysis.analyze(df, assessmentName, self.trialDescription.blockFactors)
            self.results[assessmentName] = assessmentResults
    
    def run_analyze(self, outcomeAssessor):
        self.run()
        self.analyze(outcomeAssessor)
        
    def get_outcome_relative_risk(self, outcomeType):
        controlRisk = self.controlPop.get_outcome_risk(outcomeType)
        treatedRisk = self.treatedPop.get_outcome_risk(outcomeType)
        outcomeRelativeRisk = treatedRisk/controlRisk
        return outcomeRelativeRisk
    
    def get_trial_outcome_df(self, outcomeAssessor):
        treatment = [1]*self.treatedPop._n+[0]*self.controlPop._n
        dfDict=dict()
        dfDict["treatment"]=treatment
        for assessmentName in outcomeAssessor._assessments.keys():
            assessmentAnalysis = outcomeAssessor._assessments[assessmentName]["assessmentAnalysis"]
            assessmentFunction = outcomeAssessor._assessments[assessmentName]["assessmentFunction"]
            dfDict[assessmentName] = list(map(assessmentFunction, [self.treatedPop]))[0] + list(map(assessmentFunction, [self.controlPop]))[0]
            if assessmentAnalysis=="logistic":
                dfDict[assessmentName] = [int(x) for x in dfDict[assessmentName]]
        if len(self.trialDescription.blockFactors)>0:
            blockFactor = self.trialDescription.blockFactors[0]
            dfDict[blockFactor]=self.treatedPop.get_attr(blockFactor) + self.controlPop.get_attr(blockFactor)      
        return pd.DataFrame(dfDict)
    
    def print_covariate_distributions(self):
        if not self.trialDescription.is_block_randomized():
            print(" "*25, 
                      "self=treated, unique people count=",  Population.get_unique_people_count(self.treatedPop._people), 
                      " "*15,
                      "other=control, unique people count=",  Population.get_unique_people_count(self.controlPop._people))
            self.treatedPop.print_lastyear_summary_comparison(self.controlPop)
        else:
            blockFactor = self.trialDescription.blockFactors[0]
            people = pd.concat([self.treatedPop._people, self.controlPop._people])
            peopleBlocks = Population.get_people_blocks(people, blockFactor, nBlocks=10)
            for key in peopleBlocks.keys():
                treatedPeopleBlock = pd.Series(list(filter(lambda x: 
                                                           x._index in list(map(lambda x: x._index, self.treatedPop._people)), 
                                                           peopleBlocks[key])))
                controlPeopleBlock = pd.Series(list(filter(lambda x: 
                                                           x._index in list(map(lambda x: x._index, self.controlPop._people)), 
                                                           peopleBlocks[key])))
                print(" "*25, "-"*109)
                print(" "*25, f"block:{blockFactor}={key}")
                print(" "*25, 
                      "self=treated, unique people count=",  Population.get_unique_people_count(treatedPeopleBlock), 
                      " "*15,
                      "other=control, unique people count=",  Population.get_unique_people_count(controlPeopleBlock))
                Population.print_people_summary_at_index_comparison(treatedPeopleBlock, controlPeopleBlock, -1)
                
    def __string__(self):
        rep = self.trialDescription.__str__()
        rep += f"\nTrial\n"
        rep += f"\tTrial completed: {self.completed}\n"
        if self.completed:
            rep += f"Trial results:\n"
            rep += "\t" +" "*15 + " "*16 + "Z" + " "*6 + "Intercept" + " "*11 + "Z SE" + " "*8 + "pValues\n"
            for key in self.results.keys():
                rep += f"\t{key:>15}: "
                for result in self.results[key]:
                    rep += f"{result:>15.2f}"
                rep += "\n"
        return rep
    def __repr__(self):
        return self.__string__()

In [None]:
#tr = NhanesTrial(td)
tr = Trial(td)

In [None]:
#tr

In [None]:
#tr.print_covariate_distributions()

In [None]:
%%time
tr.run()

In [None]:
tr.analyze(oa)

In [None]:
tr

In [None]:
pd.DataFrame(tr.results)

In [None]:
build_people_using_nhanes_for_sampling_gen(nhanes, n, filter=None, random_seed=None, weights=None):
    """Creates a Pandas Series collection of Person instances."""

    if weights is None:
        weights = nhanes.WTINT2YR
    repeated_sample = nhanes.sample(n, weights=weights, random_state=random_seed, replace=True)
    initializationModelRepository = {DynamicRiskFactorsType.AFIB: AFibPrevalenceModel(), 
                                     DynamicRiskFactorsType.PVD: PVDPrevalenceModel()}
    people = pd.DataFrame.apply(repeated_sample,
                                build_person, initializationModelRepository=initializationModelRepository, axis="columns")

    #sets the unique identifier for each Person instance
    list(map(lambda person, i: setattr(person, "_index", i), people, range(n))) 

    return people

In [None]:
from enum import Enum

class CategoricalRiskFactorsType(Enum):
    RACE_ETHNICITY = "raceEthnicity" 
    EDUCATION = "education"          
    GENDER = "gender"                
    SMOKING_STATUS = "smokingStatus" 
    PVD = "pvd"
    ALCOHOL_PER_WEEK = "alcoholPerWeek"
    AFIB = "afib"
    ANY_PHYSICAL_ACTIVITY = "anyPhysicalActivity" 
    
class ContinuousRiskFactorsType(Enum):
    AGE = "age"   # int
    SBP = "sbp"   # int
    DBP = "dbp"   # int
    A1C = "a1c"   # float
    HDL = "hdl"   # int
    LDL = "ldl"   # int
    TRIG = "trig"  # int
    TOT_CHOL = "totChol"   # int
    BMI = "bmi"   #float
    WAIST = "waist"  # int, waist circumference in cm
    CREATININE = "creatinine" # float