In [1]:
import os
import copy
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
from pandarallel import pandarallel
import numpy as np
import matplotlib.pyplot as plt
import importlib.util

In [2]:
from microsim.population import Population, NHANESDirectSamplePopulation, build_people_using_nhanes_for_sampling
from microsim.sim_settings import simSettings
from microsim.outcome_model_repository import OutcomeModelRepository
from microsim.qaly_assignment_strategy import QALYAssignmentStrategy
from microsim.cohort_risk_model_repository import CohortRiskModelRepository
from microsim.person import Person
from microsim.education import Education
from microsim.gender import NHANESGender
from microsim.race_ethnicity import NHANESRaceEthnicity
from microsim.smoking_status import SmokingStatus
from microsim.alcohol_category import AlcoholCategory

from typing import Callable

In [3]:
#I will need to work on rearranging/removing these settings and replace them with options for how to run a population
#serially or in parallel
#pandarallel.initialize(verbose=1) #microsim by default now does not initialize pandarallel
#simSettings.pandarallelFlag = True #with this flag all new population instances will be set to use pandarallel

#if every person object will have their own rng stream, then perhaps this can be pushed to be initialized in every population instance
#instead of the current approach, so every population instance can have its own random number generator
#care must be taken if the population is to advance using multiprocessing or not though
#in a similar way that is currently done with trial sets
seedSequence = np.random.SeedSequence()
rngStream = np.random.default_rng(seed=seedSequence)

microsimDir = "/Users/deligkaris.1/OneDrive - The Ohio State University Wexner Medical Center/MICROSIM/CODE/microsim"
os.chdir(microsimDir)

In [None]:
#I think monkey patching is a better way of doing tests
#rather than including logic/code in methods that modify the function behavior so that tests can be performed

In [16]:
#principles: modularity/clear division of labor, consistency, understandable, easily modifiable, minimize probability for errors

## RISK FACTORS

In [4]:
#age is a risk factor so it will be included with the other risk factors and it needs a model 
class AgeModel:
    def __init__(self):
        pass
    
    def estimate_next_risk(self, person, rng=None):
        return person._age[-1]+1
    
#cohortModule = importlib.import_module("microsim.cohort_risk_model_repository")

class myCohortRiskModelRepository(CohortRiskModelRepository):
    def __init__(self):
        super().__init__()
        self._repository["age"] = AgeModel()
        
#setattr(cohortModule, "CohortRiskModelRepository", myCohortRiskModelRepository)

#nature includes rules for person objects for 2 things: for risk factors and outcomes
#a population instance needs to know the rules for these 2 things
#is the CohortRiskModelRepository the class that will hold the rules for the risk factors? or just one of the options?
#maybe we should make a RiskFactorModelRepository similar to OutcomeModelRepository and each RiskFactor is connected to a model 
#(for outcomes every outcome is connected to an outcome model repository because the logic is more complex for outcomes)
#I prefer RiskFactorModelRepository, the term risk is also used in outcomes as a probability....
#also, check for the attribute of the RiskFactorModelRepository and OutcomeModelRepository, the first one
#is self._repository and the other one is I think self._models...keep it consistent

## OUTCOMES

In [6]:
#person objects have outcomes, keeping a list in the class here
#when the person object instance is advancing outcomes, it will use these ones

#but a list of the same thing (outcomes) in the person object is not a good idea, unless they are different things
#if the plan is for these to always be consistent/the same we should find a way to keep only one

#but then we may also need to make a class of risk factors in the same way we do for OutcomeType
#maybe this is the way to go

#but also, not all outcomes are equal...some outcomes depend on other outcomes
#so maybe define 2 outcome levels, base/fundamental and outcome functions that are at a higher level
#and also, different outcomes have different data structures/types, others are time dependent numbers (gcp, qalys)
#others are discrete events (mi, stroke, death)
#for now I will not work on making a taxonomy of outcomes, but simply organizing in a simple way the outcomes of interest 
#for person objects
#based on this, the sequence of these outcometypes is important

from enum import Enum

class OutcomeType(Enum):
    CARDIOVASCULAR = "cv"
    STROKE = "stroke"
    MI = "mi"
    DEMENTIA = "dementia"
    GLOBAL_COGNITIVE_PERFORMANCE = "gcp"
    DEATH = "death"
    QUALITYADJUSTED_LIFE_YEARS = "qalys"

In [None]:
#I do not think I need this, not even sure this is a good idea

#class NullModelRepository:
#    def __init__(self):
#        pass
    
#    def select_outcome_model_for_person(self, person):
#        pass    

In [7]:
from microsim.outcome_model_repository import OutcomeModelRepository

#this class will hold the rules for the outcomes
#this is the only thing needed to specify all rules for all outcomes

class myOutcomeModelRepository(OutcomeModelRepository):
    def __init__(self):
        #strokeAndMIModelRepositoryInstance = strokeAndMIModelRepository() 
        self._outcomes = {OutcomeType.DEMENTIA: dementiaModelRepository(),
                          OutcomeType.GLOBAL_COGNITIVE_PERFORMANCE: gcpModelRepository(),
                          OutcomeType.QUALITYADJUSTED_LIFE_YEARS: qalyModelRepository(),
                          OutcomeType.CARDIOVASCULAR: cvModelRepository(),
                          OutcomeType.MI: strokePartitionModelRepository(),
                          OutcomeType.STROKE: miPartitionModelRepository(),
                          #OutcomeType.STROKE: NullModelRepository(),
                          #OutcomeType.STROKE: strokeAndMIModelRepositoryInstance,
                          #OutcomeType.MI: strokeAndMIModelRepositoryInstance,
                          OutcomeType.DEATH: deathModelRepository()}
        
#thoughts again:
#the outcome model repository will initialize either a specific outcome class (eg stroke class) or a 
# specific outcome model repository (eg stroke repository class)
# the difference: if all person objects use the same outcome class, then no need for the specific outcome repo
# if there will be a lot of logic things happening in order to get the risk and outcome then use the specific outcome repo
#every specific outcome class will have get_next_outcome function and will return either an Outcome or None
#every specific outcome repo will have a select_outcome_model_for_person function
#CHANGED MY MIND, for simplicity the outcome model repository will first access the specific outcome model repository
#and that class will select the appropriate outcome model for the person, even if there is only one

#every model class that is selected by the model repositories above will share some functions:
# 1) get_next_outcome that person objects will use in the population 
# 2) generate_next_outcome that will be used by the get_next_outcome if an outcome needs to be generated

In [14]:
#cv is not an outcome type that we are directly interested in...but it is an outcome type that we need to use
#with some model implementations (stroke and mi)
#so make it have the same structure as the rest of the outcomes
#there is a cv model repository that selects the cv model for person 
#and there are two cv models, one for male and one for female but they share the same functions
#so create a base cv model for both

from microsim.ascvd_outcome_model import ASCVDOutcomeModel
from microsim.gender import NHANESGender
from microsim.regression_model import RegressionModel
from microsim.data_loader import load_model_spec

class cvModelRepository:
    def __init__(self):
        self._models = {"male": cvModelMale(),
                        "female": cvModelFemale()}

    def select_outcome_model_for_person(self, person):
        return self._models["male"] if person._gender==NHANESGender.MALE else self._models["female"]

class cvModelBase(ASCVDOutcomeModel):
    def __init__(self, coefficients, tot_chol_hdl_ratio, black_race_x_tot_chol_hdl_ratio):
        self._secondary_prevention_multiplier = 1.0
        self._mi_case_fatality = 0.13
        self._secondary_mi_case_fatality = 0.13
        self._stroke_case_fatality = 0.15
        self._secondary_stroke_case_fatality = 0.15
        super().__init__(RegressionModel(
                            coefficients=coefficients,
                            coefficient_standard_errors={key: 0 for key in coefficients},
                            residual_mean=0,
                            residual_standard_deviation=0,),
                         tot_chol_hdl_ratio=tot_chol_hdl_ratio,
                         black_race_x_tot_chol_hdl_ratio=black_race_x_tot_chol_hdl_ratio,)
        
    def get_risk_for_person(self, person):
        cvRisk = super().get_risk_for_person(person, person._rng, years=1)
        if (person._mi) | (person._stroke):
            cvRisk = cvRisk * self._secondary_prevention_multiplier
        return cvRisk
        
    def generate_next_outcome(self, person):
        #for now assume it is not a fatal event, not great though...
        #if in the future we chose different stroke/mi models that do not update cv outcome fatality
        #then cv fatality will need to be decided here
        fatal = False
        return Outcome(OutcomeType.CARDIOVASCULAR, fatal)
        
    def get_next_outcome(self, person):
        if person._rng.uniform(size=1) < self.get_risk_for_person(person):
            return self.generate_next_outcome(person)
        else: 
            return None        
        
        
class cvModelMale(cvModelBase):
    def __init__(self):
        maleCVCoefficients = {
            "lagAge": 0.064200,
            "black": 0.482835,
            "lagSbp#lagSbp": -0.000061,
            "lagSbp": 0.038950,
            "current_bp_treatment": 2.055533,
            "current_diabetes": 0.842209,
            "current_smoker": 0.895589,
            "lagAge#black": 0,
            "lagSbp#current_bp_treatment": -0.014207,
            "lagSbp#black": 0.011609,
            "black#current_bp_treatment": -0.119460,
            "lagAge#lagSbp": 0.000025,
            "black#current_diabetes": -0.077214,
            "black#current_smoker": -0.226771,
            "lagSbp#black#current_bp_treatment": 0.004190,
            "lagAge#lagSbp#black": -0.000199,
            "Intercept": -11.679980,
        }
        tot_chol_hdl_ratio=0.193307
        black_race_x_tot_chol_hdl_ratio=-0.117749
        super().__init__(maleCVCoefficients, tot_chol_hdl_ratio, black_race_x_tot_chol_hdl_ratio)


class cvModelFemale(cvModelBase):
    def __init__(self):
        femaleCVCoefficients = {
            "lagAge": 0.106501,
            "black": 0.432440,
            "lagSbp#lagSbp": 0.000056,
            "lagSbp": 0.017666,
            "current_bp_treatment": 0.731678,
            "current_diabetes": 0.943970,
            "current_smoker": 1.009790,
            "lagAge#black": -0.008580,
            "lagSbp#current_bp_treatment": -0.003647,
            "lagSbp#black": 0.006208,
            "black#current_bp_treatment": 0.152968,
            "lagAge#lagSbp": -0.000153,
            "black#current_diabetes": 0.115232,
            "black#current_smoker": -0.092231,
            "lagSbp#black#current_bp_treatment": -0.000173,
            "lagAge#lagSbp#black": -0.000094,
            "Intercept": -12.823110,
        }
        tot_chol_hdl_ratio=0.151318
        black_race_x_tot_chol_hdl_ratio=0.070498
        super().__init__(femaleCVCoefficients, tot_chol_hdl_ratio, black_race_x_tot_chol_hdl_ratio)
        

In [None]:
#maybe call it strokePartitionModelRepository
class strokePartitionModelRepository:
    def __init__(self):
        self._model = strokePartitionModel()
    
    def select_outcome_model_for_person(self, person):
        return self._model

#there are 2 approaches that can be taken here
#1: this model asks cvModel to see if there was an cv outcome for this person
#2: the cv model was asked before strokeModel is called and the person object has stored the potential cv outcome, and the strokeModel
#.  is just checking on the person object to see if there was a cv outcome in the current year
class strokePartitionModel(StatsModelLinearRiskFactorModel):
    def __init__(self):
        model_spec = load_model_spec("StrokeMIPartitionModel")
        super().__init__(RegressionModel(**model_spec))
        #self._cvModelRepo = cvModelRepository()
        self._stroke_case_fatality = 0.15
        self._stroke_secondary_case_fatality = 0.15
   
    def will_have_fatal_stroke(self, person):
        fatalStrokeProb = self._stroke_case_fatality
        fatalProb = self._stroke_secondary_case_fatality if person._stroke else fatalStrokeProb
        return person._rng.uniform(size=1) < fatalProb

    def get_next_stroke_probability(self, person):
        #I am not sure why it was set to 0 at the beginning
        strokeProbability = 0
        strokeProbability = scipySpecial.expit( super().estimate_next_risk(person) )
        return strokeProbability
    
    def generate_next_outcome(self, person):
        fatal = self.will_have_fatal_stroke(person)
        ### call other models that are for generating stroke phenotype here.
        nihss = StrokeNihssModel(rng=rng).estimate_next_risk(person)
        strokeSubtype = StrokeSubtypeModelRepository(rng=rng).get_stroke_subtype(person)
        strokeType = StrokeTypeModel(rng=rng).get_stroke_type(person)
        #localization = Localization.LEFT_HEMISPHERE
        #disability = 3 
        #I think these are better moved to the gcp stroke model
        gcpStrokeRandomEffect = rng.normal(0., 3.90)
        gcpStrokeSlopeRandomEffect = rng.normal(0., 0.264)
        person._randomEffects["gcpStroke"] = gcpStrokeRandomEffect
        person._randomEffects["gcpStrokeSlope"] = gcpStrokeSlopeRandomEffect
        #return StrokeOutcome(fatal, nihss, strokeType, strokeSubtype, localization, disability)
        return StrokeOutcome(fatal, nihss, strokeType, strokeSubtype)
        
    def update_cv_outcome(self, person, fatal):
        #need to double check this
        person._outcomes[OutcomeType.CARDIOVASCULAR][-1].fatal = fatal
        
    def get_next_outcome(self, person):
        #if self._cvModelRepo.select_outcome_model_for_person(person).get_next_outcome(person):
        if person.has_outcome_at_current_age(OutcomeType.CARDIOVASCULAR):
            if person._rng.uniform(size=1) < self.get_next_stroke_probability(person):
                strokeOutcome = self.generate_next_outcome(person)
                self.update_cv_outcome(person, strokeOutcome.fatal)
                return strokeOutcome
            else: 
                return None

In [None]:

class miPartitionModelRepository:
    def __init__(self):
        self._model = miPartitionModel()
    
    def select_outcome_model_for_person(self, person):
        return self._model

class miPartitionModel:
    def __init__(self):
        self._mi_case_fatality = 0.13
        self._mi_secondary_case_fatality = 0.13
        
    def update_cv_outcome(self, person, fatal):
        #need to double check this
        person._outcomes[OutcomeType.CARDIOVASCULAR][-1].fatal = fatal    
    
    def will_have_fatal_mi(self, person):
        fatalMIProb = self._mi_case_fatality
        fatalProb = self._mi_secondary_case_fatality if person._mi else fatalMIProb
        return person._rng.uniform(size=1) < fatalProb
    
    def generate_next_outcome(self, person):
        fatal = self.will_have_fatal_mi(person)
        return Outcome(outcomeType.MI, fatal)
        
    def get_next_outcome(self, person):
        if person.has_outcome_at_current_age(OutcomeType.CARDIOVASCULAR):
            #assumes that stroke has been decided by now
            if person.has_outcome_at_current_age(OutcomeType.STROKE):
                return None
            else:
                miOutcome = self.generate_next_outcome(person)
                self.update_cv_outcome(person, miOutcome.fatal)
                return miOutcome

In [None]:
class deathModelRepository:
    def __init__(self):
        self._model = deathModel()
        
    def select_outcome_model_for_person(self, person):
        return self._model
    
class deathModel:
    def __init__(self):
        
    def get_next_outcome(self, person):
        

In [None]:
#I think I prefer to move this code to the strokeAndMIModel

#from microsim.statsmodel_linear_risk_factor_model import StatsModelLinearRiskFactorModel
#import scipy.special as scipySpecial

#class strokePartitionModel(StatsModelLinearRiskFactorModel):
#    def __init__(self):
#        model_spec = load_model_spec("StrokeMIPartitionModel")
#        super().__init__(RegressionModel(**model_spec))

#    def get_next_outcome(self, person):
        

In [None]:
#from microsim.cv_outcome_determination import CVOutcomeDetermination
#from microsim.statsmodel_linear_risk_factor_model import StatsModelLinearRiskFactorModel
#import scipy.special as scipySpecial

#class strokeAndMIModelRepository(CVOutcomeDetermination):
#class strokeAndMIModelRepository():
#    def __init__(self):
        #super().__init__()
        #with either key you access the same model
        #self._models = {"mi": strokeAndMIModel(),
        #                "stroke": strokeAndMIModel()}
#        pass
    
#    def select_outcome_model_for_person(self, person):
        #return self._models["cv"]
#        return strokeAndMIModel()
    
#from microsim.cv_outcome_determination import CVOutcomeDetermination

#class strokeandMIModel(CVOutcomeDetermination):
#class strokeAndMIModel(StatsModelLinearRiskFactorModel):
#    def __init__(self):
        #super().__init__()
#        model_spec = load_model_spec("StrokeMIPartitionModel")
#        super().__init__(RegressionModel(**model_spec))
#        self._cvModelRepo = cvModelRepository()
#        #self._strokePartitionModel = strokePartitionModel()
   
#   def get_next_stroke_probability(self, person):
#        strokeProbability = scipySpecial.expit( self.estimate_next_risk(person) )
#        return strokeProbability

#    def get_next_outcome(self, person):
#        if self._cvModelRepo.select_outcome_model_for_person(person).get_next_outcome(person):
#            if person._rng.uniform(size=1) < self.get_next_stroke_probability(person):
#                self.generate_next_
#            else: 

In [None]:
from microsim.dementia_model import DementiaModel

class myDementiaModel:
    def __init__(self):
        super().__init__()
     
    #perhaps move the logic to the specific outcome (dementia repo) 
    #def get_risk_for_person(self, person):
    #    return self.linear_predictor(person)
        
    def get_next_outcome(self, person):
        #return Outcome(OutcomeType.DEMENTIA, False) if rng.uniform(size=1)<self.get_risk_for_person(person) else None
        return Outcome(OutcomeType.DEMENTIA, False) if rng.uniform(size=1)<self.linear_predictor(person) else None

class dementiaModelRepository:
    def __init__(self):
        #Q: maybe call it "default" instead of dementia?
        self._models = {"dementia": myDementiaModel()}
        
    def select_outcome_model_for_person(self, person):
        return self._models["dementia"]
        

In [None]:
from microsim.gcp_model import GCPModel
from microsim.gcp_stroke_model import GCPStrokeModel

class gcpModelRepository:
    def __init__(self):
        #Q: why does the GCPModel initialize an outcome model repository?
        self._models = {"gcp": GCPModel(),
                        "gcpStroke": GCPStrokeModel()}
        
    def select_outcome_model_for_person(self, person):
        return self._models["gcpStroke"] if person.has_outcome_during_simulation(OutcomeType.STROKE) else self._models["gcp"]

In [None]:
from microsim.qaly_assignment_strategy import QALYAssignmentStrategy

class myQALYModel(QALYAssignmentStrategy):
    def __init__(self):
        super().__init__()

    def get_next_outcome(self, person):
        return self.get_next_qaly(person)
    
class qalyModelRepository:
    def __init__(self):
        self._models = {"qalys": myQALYModel()}
        
    def select_outcome_model_for_person(self, person):
        return self._models["qalys"]

In [None]:
#these are lists that do not change over the course of a population and are the same no matter the population
#treatment strategies
#I could argue that these might be better suited under the Person class, these are just the attributes all
#person objects will have no matter the population
#the rules that dictate how advancement is done will vary from population to population so the population needs to be
#able to dictate them to person objects
#but since the person advance methods need these rules in order to work, person objects should have at least a default
#set of rules (now they do not)
#when person objects get these rules from a population class, memory pointers will be used (I think)
#but it is unclear to me how this would work with multiprocessing 
#if this can work with multiprocessing, an approach could be: each person object has default set of rules,
#and those are modified by the population, with each person object keeping a reference to the population rules
#as a person attribute
    
#also, I am dividing the person attributes in groups....so that each group will have its own
#requirements (eg a dynamic risk factor need to be associated with an estimate next risk method)
#and the set of all groups should provide ideally all, but at least most of, a person object's attributes

#after meeting on 1/22
#person objects are just a data structure, store information
#rules by which future of person objects are predicted are stored at the population level
#and initialized there
#I wonder if I can think of a population as a description of nature...but then that would mean
# that a population is person objects plus the rules for risk factors and outcomes (ie no treatment)
#but that would make sense, as trials would then be a comparison between something we can change
#that is the treatment, trials would be our way of analyzing population to make inferences
#but then the default treatment should be set at the trial level, not the population level...

#I would also like to include the units for all attributes here in the source

#A person object is essentially: 
#.     1) its state which consists of both past and present, 
#.     2) rules for aging that state,  (that may be stored at the population level see note above)
#.     3) tools for analyzing/reporting the state
#A population object is a collection of person objects therefore is: 
#.     1) its state, which is really the state of its people, 
#      2) rules for aging that state, which are rules for aging person objects, (how nature works, see note above)
#.     3) tools for analyzing/reporting that state
#A trial is a collection of populations therefore is: 
#.     1) its state is the state of its populations
#.     2) rules for advancing those populations (only what we control, the treatment)
#.     3) tools for analyzing/reporting that state

#I imagine that trial inclusion/exclusion criteria can be pushed to the population class, as filters in the pandas nhanes df
#as soon as that is read
#currently, imagine the worst case scenario with an extremely picky trial, we will need to create a very large population
#with a large memory cost, to get a small trial population

#also, the best case scenario for inferences would be to compare identical populations, so
#instead of creating different populations as part of trials, create just one and subject the two copies
#to different rules of advancing...

personModule = importlib.import_module("microsim.person")

class myPerson(Person):
    
    #I would like to have something that includes all attributes of the object
    #riskFactors include dynamic and static risk factors
    stateDynamic = ["riskFactors","treatments", "outcomes", "qalys"]
    stateStatic = ["selfReportedData", "randomEffects", "rng"]
    state = stateDynamic + stateStatic
    #or this?
    state = {"static": ["selfReportedData", "randomEffects", "rng"],
             "dynamic": ["riskFactors","treatments", "outcomes", "qalys"]}
    #state = ["riskFactors","treatments", "outcomes", "qalys", "selfReportedData", "randomEffects", "rng"]
    
    #I assume there will be dynamic and static risk factors
    riskFactorsDynamic = [
    #self._riskFactorsDynamic = [
            "age",
            "sbp",
            "dbp",
            "a1c",
            "hdl",
            "ldl",
            "trig",
            "totChol",
            "bmi",
            "anyPhysicalActivity",
            "afib",
            "waist",
            "alcoholPerWeek",
            "creatinine",
            "pvd",
        ]
        # not sure why this was in the past perhaps included as a risk factor
        # , 'otherLipidLoweringMedicationCount']
    
    #I can see education, smokingStatus becoming in the future dynamic risk factors
    riskFactorsStatic = ["raceEthnicity",
                         "education",
                         "gender",
                         "smokingStatus"]
    
    riskFactors = riskFactorsStatic+riskFactorsDynamic
    #or?
    riskFactors = {"static": ["raceEthnicity", "education", "gender", "smokingStatus"],
                   "dynamic": ["age", "sbp", "dbp", "a1c", "hdl", "ldl", "trig", "totChol", "bmi", "anyPhysicalActivity",
                               "afib", "waist", "alcoholPerWeek", "creatinine", "pvd"]}
    
    #should treatments be defined using their disease (eg hypertension), 
    #their effect (eg antihypertension), their drug class (eg statin), or what risk factors they affect (eg bp)?
    #treatment class, can affect outcome or risk factors
    #well, also , now after the meeting, if treatments are going to be set at the trial level for inferences,
    #maybe these treatment-related attributes will need to be assigned to the person objects after the person objects 
    #have been initialized, during the trial initialization
    treatments = ["antiHypertensiveCount", "statin"]
    #treatments = ["antiHypertensive", "statin", "otherLipidLoweringMedicationCount"]
    #self._treatments = ["antiHypertensiveCount", "statin"]
    
    #I am not sure this is needed now...
    timeVaryingCovariates = copy.copy(riskFactorsDynamic)
    timeVaryingCovariates.extend(treatments)
    #self._timeVaryingCovariates = copy.copy(self._riskFactorsDynamic)
    #self._timeVaryingCovariates.extend(self._treatments)
    #self._timeVaryingCovariates.append("bpMedsAdded")
    
    #outcomes need to be ordered from non fatal first, to potentially fatal at the end
    #gcp is a list, dementia is boolean, mi/stroke/death are discrete events
    #outcomes are more difficult to include structure
    #
    outcomesNonFatal = ["gcp", "dementia"]
    outcomesFatal = ["mi", "stroke", "death"]
    outcomes = outcomesNonFatal + outcomesFatal
    #but I see that dementia is like an event so perhaps?
    outcomes = {"events": ["dementia", "mi", "stroke", "death"],
                "dynamic": ["gcp", "qalys"]}
    
    selfReportedData = ["selfReportStrokeAge", "selfReportMIAge"]
    
    def __init__(
        self,
        age: int,
        gender: NHANESGender,
        raceEthnicity: NHANESRaceEthnicity,
        sbp: int,
        dbp: int,
        a1c: float,
        hdl: int,
        totChol: int,
        bmi: float,
        ldl: int,
        trig: int,
        waist: int,  # Waist circumference in cm
        anyPhysicalActivity: int,
        education: Education,
        smokingStatus: SmokingStatus,
        alcohol: AlcoholCategory,
        antiHypertensiveCount: int,
        statin: int,
        otherLipidLoweringMedicationCount: int,
        creatinine: float,
        initializeAfib: Callable,
        initializationRepository=None, #do we need this?
        selfReportStrokeAge=None,
        selfReportMIAge=None,
        randomEffects=None,
        rng=None,
        **kwargs,
    ) -> None:

        super().__init__(age,
            gender,
            raceEthnicity,
            sbp,
            dbp,
            a1c,
            hdl,
            totChol,
            bmi,
            ldl,
            trig,
            waist,  # Waist circumference in cm
            anyPhysicalActivity,
            education,
            smokingStatus,
            alcohol,
            antiHypertensiveCount,
            statin,
            otherLipidLoweringMedicationCount,
            creatinine,
            initializeAfib,
            initializationRepository,
            selfReportStrokeAge,
            selfReportMIAge,
            randomEffects,
            rng,
            **kwargs)
        
        #each person will advance on their own so keep track of this here
        self._currentWave = 0
        #need to double check that each person needs indeed their own stream
        seedSequence = np.random.SeedSequence()
        self._rng = np.random.default_rng(seed=seedSequence)
        
    #before I start defining functions, some global conventions....
    #get: fetches a result
    #update: modifies in place
    #assign: appends in a way 
    def is_alive(self):
        return self._alive[-1]
    
    def has_outcome_at_current_age(self, outcome):
        ageAtLastOutcome = self.get_age_at_last_outcome(outcome)
        if (ageAtLastOutcome is None) | (self._age[-1]!=ageAtLastOutcome):
            return False
        else:
            return True
    
    @property
    def _current_age(self):
        return self._age[-1]
    
    def get_next_treatment(self, treatment, treatmentRepository, rng=None):
        model = treatmentRepository.get_model(treatment)
        return model.estimate_next_risk(self, rng=rng)
    
    def advance(self, years, treatmentStrategies, repositories):
        for yearIndex in range(years):
            if self.is_alive():
                self._currentWave += 1
                self.advance_risk_factors(repositories["riskFactorsDynamic"])
                self.advance_treatments_and_update_risk_factors(repositories["treatments"], treatmentStrategies)
                self.advance_outcomes(repositories["outcomes"])
                self.advance_qalys(repositories["qalys"])
    
    #may need to fix alcohol because it needs to convert the risk to a category I think
    #may also need to implement the apply bounds functionality that is present in the current advance risk factors method
    #the rng=self._rng will eventually not be needed when estimate_next_risk functions utilize the person's own rng stream
    def advance_risk_factors(self, rfdRepository):
        for rf in self.riskFactorsDynamic:
            #rfdRepository.get_model(rf).estimate_next_risk(self)])
            setattr(self, "_"+rf, getattr(self,"_"+rf)+[self.get_next_risk_factor(rf, rfdRepository, rng=self._rng)]) 
            
    def advance_treatments(self, treatmentRepository, treatmentStrategies):
        for treatment in self.treatments:
            #applies the default treatments
            #it is not clear to me why treatment strategies affect the person attributes directly
            #whereas treatments affect the person attributes indirectly through the attribute regression models
            #will it always be like that? keep in mind that the regression models were designed to be 1 year based predictions
            #the assumption is that the effect of the treatment strategies is instantaneous but
            #there is nothing preventing us from using a regression model as the effect of a treatment strategy
            #also, notice that dynamic risk factors and treatments are lists that get their next quantity in the same way
            setattr(self, "_"+treatment, getattr(self,"_"+treatment)+[self.get_next_treatment(treatment, treatmentRepository, 
                                                                                              rng=self._rng)]) 
            #choice of words: get_next implies that it returns the final/next wave quantity, update implies that it modifies
            #that quantity in place
            #the vectorized bp treatment strategies are modifying the rows in place whereas the changes/absolute values are 
            #returned for person objects, the code is much more simple if the person is modified in place with treatment
            #strategies so do that for person objects
            #these two functions will need to be defined
            if treatmentStrategies[treatment] is not None:
                treatmentStrategies[treatment].update_next_treatment(self)
                #I want to make it explicit and more obvious that treatments update the risk factors
                treatmentStrategies[treatment].update_next_risk_factors(self)
                
    def advance_outcomes(self, outcomeRepository):
        for outcome in self.outcomes:
            #self.get_next_outcome(outcome, outcomeRepository) or self.assign_next_outcome if it needs to be done in place
            pass
    
setattr(personModule, "Person", myPerson)

In [None]:
popModule = importlib.import_module("microsim.population")

class myPopulation:
    
    def __init__(self, people):
        self._people = people
        
        #maybe do not set this as an attribute, so that I can parallelize the advancement later
        #self._numberAlive = self.get_numberAlive()
        
        #self._ageStandards = {}
        # luciana tag: discuss with luciana...want to keep track of the sim wave htat is currently running, while running
        # and also the total number of years advanced...need to think about how to do this is a way that will be safe
        # this approach has major risks if you forget to update one of these variables
        self._totalWavesAdvanced = 0
        #self._currentWave = 0
        
        #self.num_of_processes = 8

        #treatment strategies and the 3 repositories are the rules by which person objects can advance to the future
        #these will differ between populations and person objects need to obtain them from the population
        
        #every repository will need to have a model for each corresponding person attribute
        #eg the riskFactorDynamic repo will need to have a model for each item in the Person.riskFactorsDynamic list
        self._repositories = {"riskFactorsDynamic": None,
                              "treatments": None, #these are the default treatments in a population
                              "outcomes": None,
                              "qalys": None}
        
        #for all items in the Person.treatments list there can be in principle a treatmentStrategy 
        #I can imagine all of these being merged in a single data structure and passed on to the person objects
        #as a single argument, eg say pop._advancementRules
        #I can also imagine this as a class populationAdvancementRules
        #so eg NHANESDirectSamplePop will need to initialize 2 things, a population and the populationAdvancementRules
        #also, every item in the Person.treatments can have in principle a treatment strategy
        #the keys in this dictionary need to be the same as the self._treatments list
        self._treatmentStrategies = {"antiHypertensiveCount": None, 
                                     "statin": None}
        #self._treatmentStrategies = {"bp": None}
        #self._bpTreatmentStrategy = None
        
        #any repositories that are needed in a population method, will need to be included in the population class
        #perhaps in a default way, eg by not changing anything or doing nothing
        #currently, subclasses are the ones that define repositories that the population class actually needs
      
    def get_numberAlive(self):
        return sum(list(map(lambda x: int(x.is_alive()), self._people)))
    
    def advance(self, years, rng=None):
            
        logging.info(f"processing years: {self._totalWavesAdvanced}-{self._totalWavesAdvanced+years}")
        list(map(lambda x: x.advance(years, treatmentStrategies=self._treatmentStrategies, repositories=self._repositories), 
                 self._people))
        self._totalWavesAdvanced += years #sampling from NHANES is wave 0
        
setattr(popModule, "Population", myPopulation)
setattr(popModule, "Person", myPerson)
#setattr(popModule, "CohortRiskModelRepository", myCohortRiskModelRepository)

In [None]:
class NHANESDirectSamplePopulation(myPopulation):
    """Simple base class to sample with replacement from 2015/2016 NHANES"""

    def __init__(
        self,
        n,
        year,
        filter=None,
        generate_new_people=True,
        model_reposistory_type="cohort",
        random_seed=None,
        weights=None,
        rng=None,
    ):

        nhanes = pd.read_stata("microsim/data/fullyImputedDataset.dta")
        nhanes = nhanes.loc[nhanes.year == year]
        self._outcome_model_repository = OutcomeModelRepository()
        #rng = np.random.default_rng(rng)
        people = build_people_using_nhanes_for_sampling(
            nhanes,
            n,
            self._outcome_model_repository,
            filter=filter,
            random_seed=random_seed,
            weights=weights,
            rng=rng,
        )
        super().__init__(people)
        self._qaly_assignment_strategy = QALYAssignmentStrategy()
        self.n = n
        self.year = year
        self._initialize_risk_models(model_reposistory_type)

    def copy(self):
        newPop = NHANESDirectSamplePopulation(self.n, self.year, False)
        newPop._people = copy.deepcopy(self._people)
        return newPop

    def _initialize_risk_models(self, model_repository_type):
        if model_repository_type == "cohort":
            self._risk_model_repository = myCohortRiskModelRepository()
        elif model_repository_type == "nhanes":
            self._risk_model_repository = NHANESRiskModelRepository()
        else:
            raise Exception("unknwon risk model repository type" + model_repository_type)

In [None]:
#from microsim.population import NHANESDirectSamplePopulation
#from popModule import NHANESDirectSamplePopulation
popSize = 10
pop = NHANESDirectSamplePopulation(popSize, 2017, rng=rngStream)

In [None]:
pop._people.iloc[0].advance_risk_factors(pop._risk_model_repository)

In [None]:
pop._people.iloc[0]._sbp

In [None]:
pop._people.iloc[0]._alcoholPerWeek

In [None]:
pop._people.iloc[0]._gcp

In [None]:
pop._people.iloc[0]._dementia

In [None]:
#dir(pop._people.iloc[0])

In [None]:
pop._people.iloc[0].advance_treatments(pop._risk_model_repository, pop._treatmentStrategies)

In [None]:
pop._people.iloc[0]._antiHypertensiveCount

In [None]:
pop._people.iloc[0]._statin

In [None]:
myPerson.riskFactors

In [None]:
pop._risk_model_repository

In [None]:
pop._risk_model_repository._repository["age"] = AgeModel()

In [None]:
pop._risk_model_repository._repository

In [None]:
pop._repositories

In [None]:
from microsim.outcome import OutcomeType
pop._people.iloc[0]._outcomes[OutcomeType.STROKE]

In [None]:
#dir(pop._people.iloc[0])

In [None]:
pop._people.iloc[0].has_outcome_during_simulation(OutcomeType.STROKE)