This notebook includes the development of code that implements the stroke phenotype models (stroke type, stroke subtype, stroke nihss, stroke disability, stroke location). The TestCaseOne person uses data obtained for the validation of the gcp stroke model (see test_gcp_stroke_model).

In [1]:
import os
import json
import pandas as pd

from microsim.regression_model import RegressionModel
from microsim.statsmodel_linear_risk_factor_model import StatsModelLinearRiskFactorModel
from microsim.statsmodel_logistic_risk_factor_model import StatsModelLogisticRiskFactorModel
from microsim.stroke_outcome import StrokeType, StrokeSubtype, Localization

from microsim.person import Person
from microsim.population import Population
from microsim.education import Education
from microsim.gender import NHANESGender
from microsim.smoking_status import SmokingStatus
from microsim.alcohol_category import AlcoholCategory
from microsim.race_ethnicity import NHANESRaceEthnicity
from microsim.outcome import Outcome, OutcomeType

In [2]:
class TestCaseOne(Person):

    def __init__(self):

        #make the lists that will be used to define the person's path in the simulation
        ageAtStroke = 74.6 + 0.389117043*10 
        indexStroke = 2
        ageList = [ageAtStroke + 3.17 + x for x in range(-5,1)]
        ageList.insert(indexStroke, ageAtStroke)
        sbpMeanPrestroke = 140.4 -1.9*10
        sbpMean = -1.*10 +134.9
        sbpList = [sbpMeanPrestroke] * (indexStroke+1) + [sbpMean]*4
        dbpList = [80]*7
        a1cMeanPrestroke = Person.convert_fasting_glucose_to_a1c(112.8 + 0.5*10)
        a1cMean = Person.convert_fasting_glucose_to_a1c(108.1 + 2.2*10)
        a1cList = [a1cMeanPrestroke] * (indexStroke+1) + [a1cMean]*4
        hdlList = [50]*7
        ldlMeanPrestroke = 126.4 + 3.8*10
        ldlMean = 2.6*10 + 94.1
        ldlList = [ldlMeanPrestroke] * (indexStroke+1) + [ldlMean]*4
        trigList = [150]*7
        totCholList = [hdlList[i]+ldlList[i]+trigList[i] for i in range(len(ldlList))]
        bmiMedianPrestroke = 27.2 + 1.467401286
        bmiList = [bmiMedianPrestroke] * (indexStroke+1) + [bmiMedianPrestroke+x for x in range(10,50,10)]
        waistMedianPrestroke = 97.5 - 1.618*10
        waistList = [waistMedianPrestroke] * (indexStroke+1) + [waistMedianPrestroke+x for x in range(10,50,10)]
        antiHypertensiveCountList = [1]*7
        statinList = [1]*7
        otherLipidLoweringMedicationCountList = [0]*7
        creatinineList = [1]*7
        anyPhysicalActivityList=[1]*7
        medianGcpPrestroke = 6.104780222+52.7
        gcpList = [medianGcpPrestroke]*7
    
        #create the person
        super().__init__(
            age= ageList[0],                                          #agemed10
            gender=NHANESGender.FEMALE,                               #female0
            raceEthnicity=NHANESRaceEthnicity.NON_HISPANIC_WHITE,     #black
            sbp= sbpList[0],                                          #bs_sbpstkcog
            dbp= dbpList[0],                                          #same as TestGCPModel      
            a1c=a1cList[0],                                           #bs_glucosefmed10
            hdl= hdlList[0],                                          #same as TestGCPModel
            totChol=totCholList[0],                                   #?? hdl+ldl+0.2*trig
            ldl= ldlList[0],                                          #bs_cholldlmed10
            trig=trigList[0],                                         #same as TestGCPModel
            bmi= bmiList[0],                                          #bs_bmimed
            waist= waistList[0],                                      #bs_waistcmmed10
            anyPhysicalActivity=anyPhysicalActivityList[0],           #physact
            education=Education.SOMEHIGHSCHOOL,                       #educ2,educ3,educ4
            smokingStatus=SmokingStatus.FORMER,                        #currsmoker
            alcohol=AlcoholCategory.NONE,                             #alcperwk
            antiHypertensiveCount=antiHypertensiveCountList[0],       #htntx
            statin=statinList[0],                                     #choltx
            otherLipidLoweringMedicationCount=otherLipidLoweringMedicationCountList[0],#same as TestGCPModel
            creatinine=creatinineList[0],                             #same as TestGCPModel
            initializeAfib=None,                                      #hxafib
           )
        
        #assign history to person
        self._age = ageList[0:indexStroke+1]                          #when age reaches ageAtStroke, then 
        self.add_outcome_event(Outcome(OutcomeType.STROKE, False))    #add the stroke outcome
        self._age = ageList
        self._sbp = sbpList
        self._dbp = dbpList
        self._a1c = a1cList
        self._hdl = hdlList
        self._totChol = totCholList
        self._ldl = ldlList
        self._trig = trigList
        self._bmi = bmiList
        self._waist = waistList
        self._anyPhysicalActivity = anyPhysicalActivityList
        self._antiHypertensiveCount = antiHypertensiveCountList
        self._statin = statinList
        self._otherLipidLoweringMedicationCount = otherLipidLoweringMedicationCountList
        self._creatinine = creatinineList
        self._gcp = gcpList

        #expected results based on model
        self._expectedGcp = 58.45580379
        self._expectedYhat = 57.4753
        self._expectedResidual = 0.9805041
        self._expectedLinearPredictor = (self._expectedYhat +
                                         0.05502 * 0 -          #the alcperwk contribution in microsim
                                         0.05502 * 0. +         #the alcperwk contribution on the model
                                         (238.*(-5.2897)+332.*(-3.7359)+101.*(-2.8168)) / (238.+332.+101.+311.) -  #weighted group average
                                         0. -                   #group
                                         0.4819 -               #income
                                         0. -                   #diabetes treatment
                                         0.*(-0.03788) -        #diabetes treatment * t_gcp_stk
                                         1.989499 -             #reffect2
                                         0.0925494*3.170431211) #reffect1*t_gcp_stk
    
    @property
    def _gfr(self):
        return 80.61366 #override the gfr calculation and use the actual measurement 

#make the person and the dataframe (with a single row) for testing
test_case_one=TestCaseOne()
test_case_one_pop = Population(pd.Series(test_case_one))
test_case_one_df = test_case_one_pop.get_people_current_state_and_summary_as_dataframe()
#test_case_one_df.iloc[0].tolist()

In [3]:
#at the beginning I defined these dictionaries...I could use these to export the models to json files (if we decide to use json files)

In [4]:
strokeType = {"coefficients": {
    "Intercept":            0.97240658,    # _cons + (-46.7)*glucose_sim + aric
    "age":                  0.012586,      #stroke_age
    "gender[T.2]":         -0.0174109,     #female0_sim
    "gender[T.1]":          0,
    "education[T.1]":       0.0211369,     #educ0_sim*1
    "education[T.2]":       0.0211369,     #educ0_sim*1
    "education[T.3]":       0.0422738,     #educ0_sim*2
    "education[T.4]":       0.0634107,     #educ0_sim*3
    "education[T.5]":       0.0845476,     #educ0_sim*4
    "smokingStatus[T.2]":   0.0658974,     #currsmoker_sim
    "anyPhysicalActivity":  0.0501468,     #physact_sim, boolean
    "afib":                -0.0615242,     #hxafib_sim, boolean
    "current_bp_treatment": 0.4574039,     #htntx_sim, boolean
    "statin":              -0.1934335,     #choltx_sim, boolean
    "sbp":                 -0.0045951,     #sbpstkcog_sim
    "dbp":                  0.0039562,     #dbpstkcog_sim
    "hdl":                 -0.0425458,     #cholhdl_sim
    "totChol":              0.0380974,     #choltot_sim
    "bmi":                 -0.0067788,     #bmi_sim
    "alcoholPerWeek":      -0.07774356,    #alcperwk_sim [1/oz alcohol] * 0.6 [oz alcohol/drink] 
    "ldl":                 -0.0386822,     #cholldl_sim
    "trig":                -0.0070134,     #trig_sim
    "a1c":                  0.10999562,    #glucose_sim*28.7    
    "waist":                0.0038266,     #waistcm_sim
    "creatinine":          -0.0641744      #creatin_sim
},
             "coefficient_standard_errors": {} ,
             "residual_mean": {},
             "residual_standard_deviation": {} }

In [5]:
#cardioembolic
strokeSubtypeCE = {"coefficients": {
    "Intercept":          -3.13182975,  #_cons + aric + glucose_sim * (-46.7)
    "age":                 0.0499947,   #stroke_age
    "gender[T.2]":        -0.3813221,   #female0_sim
    "gender[T.1]":         0.,
    "education[T.1]":     -0.0143641,   #educ0_sim * 1, 1=not a HS graduate
    "education[T.2]":     -0.0143641,   #educ0_sim * 1, 1=not a HS graduate
    "education[T.3]":     -0.0287282,   #educ0_sim * 2, 2=HS graduate
    "education[T.4]":     -0.0430923,   #educ0_sim * 3, 3=some college
    "education[T.5]":     -0.0574564,   #educ0_sim * 4, 4=college graduate
    "smokingStatus[T.2]":  0.0464248,   #currsmoker_sim
    "anyPhysicalActivity": 0.171529,    #physact_sim
    "afib":                1.858444,    #hxafib_sim     
    "current_bp_treatment":0.1267211,   #htntx_sim
    "statin":              0.0405856,   #choltx_sim
    "sbp":                 0.0044229,   #sbpstkcog_sim
    "dbp":                -0.0211307,   #dbpstkcog_sim 
    "hdl":                 0.0076896,   #cholhdl_sim
    "totChol":            -0.0033463,   #choltot_sim
    "bmi":                 0.0421033,   #bmi_sim
    "alcoholPerWeek":     -0.00259902,  #alcperwk_sim * 0.6
    "ldl":                -0.0031348,   #cholldl_sim
    "trig":               -0.0000179,   #trig_sim 
    "a1c":                -0.11501525,  #glucose_sim * 28.7
    "waist":               0.0011406,   #waistcm_sim
    "creatinine":         -0.1197203    #creatin_sim
     },
                   "coefficient_standard_errors": {} ,
                   "residual_mean": {},
                   "residual_standard_deviation": {} }

#LA_atherosclerosis
strokeSubtypeLV = {"coefficients": {
    "Intercept":          -2.14462214,  #_cons + aric + glucose_sim * (-46.7)
    "age":                 0.00203,     #stroke_age
    "gender[T.2]":        -0.3026703,   #female0_sim
    "gender[T.1]":         0.,
    "education[T.1]":      0.0204964,   #educ0_sim * 1, 1=not a HS graduate
    "education[T.2]":      0.0204964,   #educ0_sim * 1, 1=not a HS graduate
    "education[T.3]":      0.0409928,   #educ0_sim * 2, 2=HS graduate
    "education[T.4]":      0.0614892,   #educ0_sim * 3, 3=some college
    "education[T.5]":      0.0819856,   #educ0_sim * 4, 4=college graduate
    "smokingStatus[T.2]": -0.4087069,   #currsmoker_sim
    "anyPhysicalActivity": 0.3283082,   #physact_sim
    "afib":                0.7040827,   #hxafib_sim     
    "current_bp_treatment":0.1878509,   #htntx_sim
    "statin":              0.1681594,   #choltx_sim
    "sbp":                 0.0105225,   #sbpstkcog_sim
    "dbp":                -0.0154197,   #dbpstkcog_sim 
    "hdl":                -0.0253099 ,  #cholhdl_sim
    "totChol":             0.01298,     #choltot_sim
    "bmi":                -0.0042248,   #bmi_sim
    "alcoholPerWeek":      0.03367986,  #alcperwk_sim * 0.6
    "ldl":                -0.009429,    #cholldl_sim
    "trig":               -0.0005325,   #trig_sim 
    "a1c":                -0.04051866,  #glucose_sim * 28.7
    "waist":               0.0017584,   #waistcm_sim
    "creatinine":         -0.6562215    #creatin_sim
     },
                   "coefficient_standard_errors": {} ,
                   "residual_mean": {},
                   "residual_standard_deviation": {} }

#SV_occlusion
strokeSubtypeSV = {"coefficients": {
    "Intercept":            0.17084136,  #_cons + aric + glucose_sim * (-46.7)
    "age":                 -0.008983,    #stroke_age
    "gender[T.2]":          0.1107677,   #female0_sim
    "gender[T.1]":          0.,
    "education[T.1]":      -0.0357296,   #educ0_sim*1, 1=not a HS graduate
    "education[T.2]":      -0.0357296,   #educ0_sim*1, 1=not a HS graduate
    "education[T.3]":      -0.0714592,   #educ0_sim*2, 2=HS graduate
    "education[T.4]":      -0.1071888,   #educ0_sim*3, 3=some college
    "education[T.5]":      -0.1429184,   #educ0_sim*4, 4=college graduate
    "smokingStatus[T.2]":   0.0207556,   #currsmoker_sim
    "anyPhysicalActivity":  0.1915707,   #physact_sim
    "afib":                -0.0698982,   #hxafib_sim     
    "current_bp_treatment":-0.0168805,   #htntx_sim
    "statin":              -0.4778607,   #choltx_sim
    "sbp":                  0.0101797,   #sbpstkcog_sim
    "dbp":                 -0.0110669,   #dbpstkcog_sim 
    "hdl":                 -0.032421,    #cholhdl_sim
    "totChol":              0.0257892,   #choltot_sim
    "bmi":                 -0.0029924,   #bmi_sim
    "alcoholPerWeek":       0.0877935,   #alcperwk_sim * 0.6
    "ldl":                 -0.0302675,   #cholldl_sim
    "trig":                -0.0058004,   #trig_sim 
    "a1c":                 -0.00062566,  #glucose_sim * 28.7
    "waist":                0.0118192,   #waistcm_sim
    "creatinine":          -0.5225816    #creatin_sim
     },
                   "coefficient_standard_errors": {} ,
                   "residual_mean": {},
                   "residual_standard_deviation": {} }


In [6]:
#NIHSS
strokeNihss = {"coefficients": {
    "Intercept":           -2.6063356,   #_cons + aric + glucose_sim * (-46.7)
    "age":                  0.0771289,   #stroke_age
    "gender[T.2]":          0.0512374,   #female0_sim
    "gender[T.1]":          0.,
    "education[T.1]":       0.1658821,   #educ0_sim*1, 1=not a HS graduate
    "education[T.2]":       0.1658821,   #educ0_sim*1, 1=not a HS graduate
    "education[T.3]":       0.3317642,   #educ0_sim*2, 2=HS graduate
    "education[T.4]":       0.4976463,   #educ0_sim*3, 3=some college
    "education[T.5]":       0.6635284,   #educ0_sim*4, 4=college graduate
    "smokingStatus[T.2]":  -0.1257199 ,  #currsmoker_sim
    "anyPhysicalActivity": -0.3476088,   #physact_sim
    "afib":                 1.794008,    #hxafib_sim     
    "current_bp_treatment": 0.6804093,   #htntx_sim
    "statin":              -0.1660485,   #choltx_sim
    "sbp":                  0.0131255 ,  #sbpstkcog_sim
    "dbp":                 -0.0144218,   #dbpstkcog_sim 
    "hdl":                 -0.2642685 ,  #cholhdl_sim
    "totChol":              0.2577162,   #choltot_sim
    "bmi":                 -0.0197889,   #bmi_sim
    "alcoholPerWeek":      -0.26697522,  #alcperwk_sim * 0.6
    "ldl":                 -0.2564912,   #cholldl_sim
    "trig":                -0.0556979,   #trig_sim 
    "a1c":                  0.2374925,   #glucose_sim * 28.7
    "waist":               -0.002543 ,   #waistcm_sim
    "creatinine":           0.2782118    #creatin_sim
     },
                   "coefficient_standard_errors": {} ,
                   "residual_mean": {},
                   "residual_standard_deviation": {} }

In [7]:
#with open("strokeType.json", "w") as outfile:
#    json.dump(strokeType, outfile)

In [8]:
#classes that end in -Model implement the models, one model at a time
#classes that end in -Repository implement the logic that determines what stroke type, subtype, nihss, location, disability is obtained for each person

In [9]:
#class strokeTypeIschemicModel:
class strokeTypeIschemicModel(StatsModelLogisticRiskFactorModel):
    def __init__(self):
        
        self._model = {"coefficients": {
            "Intercept":            0.97240658,    # _cons + (-46.7)*glucose_sim + aric
            "age":                  0.012586,      #stroke_age
            "gender[T.2]":         -0.0174109,     #female0_sim
            "gender[T.1]":          0,
            "education[T.1]":       0.0211369,     #educ0_sim*1
            "education[T.2]":       0.0211369,     #educ0_sim*1
            "education[T.3]":       0.0422738,     #educ0_sim*2
            "education[T.4]":       0.0634107,     #educ0_sim*3
            "education[T.5]":       0.0845476,     #educ0_sim*4
            "smokingStatus[T.2]":   0.0658974,     #currsmoker_sim
            "anyPhysicalActivity":  0.0501468,     #physact_sim, boolean in sim
            "afib":                -0.0615242,     #hxafib_sim, boolean in sim
            "current_bp_treatment": 0.4574039,     #htntx_sim, boolean in sim
            "statin":              -0.1934335,     #choltx_sim, boolean in sim
            "sbp":                 -0.0045951,     #sbpstkcog_sim
            "dbp":                  0.0039562,     #dbpstkcog_sim
            "hdl":                 -0.0425458,     #cholhdl_sim
            "totChol":              0.0380974,     #choltot_sim
            "bmi":                 -0.0067788,     #bmi_sim
            "alcoholPerWeek":      -0.07774356,    #alcperwk_sim [1/oz alcohol] * 0.6 [oz alcohol/drink] 
            "ldl":                 -0.0386822,     #cholldl_sim
            "trig":                -0.0070134,     #trig_sim
            "a1c":                  0.10999562,    #glucose_sim*28.7    
            "waist":                0.0038266,     #waistcm_sim
            "creatinine":          -0.0641744      #creatin_sim
        },
                "coefficient_standard_errors": {} ,
                "residual_mean": {},
                "residual_standard_deviation": {} }
        
        self._regressionModel = RegressionModel(**self._model)
        #self._regressionModel = load_regression_model("strokeTypeModel")
        #self._logRegressionModel = StatsModelLogisticRiskFactorModel(self._regressionModel)
        super().__init__(self._regressionModel)
        
    def estimate_next_risk(self, person):
        #return self._logRegressionModel.estimate_next_risk(person)
        return super().estimate_next_risk(person)
    
    def estimate_next_risk_vectorized(self, person):
        #return self._logRegressionModel.estimate_next_risk(person)
        return super().estimate_next_risk_vectorized(person)

In [10]:
class strokeTypeModelRepository(strokeTypeIschemicModel):
    
    def __init__(self):
        super().__init__()
    
    def get_stroke_type(self, person):
        return StrokeType.ISCHEMIC if (super().estimate_next_risk(person)>0.5) else StrokeType.ICH
    
    def get_stroke_type_vectorized(self, person):
        return StrokeType.ISCHEMIC if (super().estimate_next_risk_vectorized(person)>0.5) else StrokeType.ICH

In [11]:
(strokeTypeModelRepository().get_stroke_type(test_case_one),
strokeTypeModelRepository().get_stroke_type_vectorized(test_case_one_df.iloc[0]))

(<StrokeType.ISCHEMIC: 'ischemic'>, <StrokeType.ISCHEMIC: 'ischemic'>)

In [12]:
class strokeSubtypeCEModel(StatsModelLogisticRiskFactorModel):
    def __init__(self):
        
        #cardioembolic
        self._model = {"coefficients": {
           "Intercept":          -3.13182975,  #_cons + aric + glucose_sim * (-46.7)
           "age":                 0.0499947,   #stroke_age
           "gender[T.2]":        -0.3813221,   #female0_sim
           "gender[T.1]":         0.,
           "education[T.1]":     -0.0143641,   #educ0_sim * 1, 1=not a HS graduate
           "education[T.2]":     -0.0143641,   #educ0_sim * 1, 1=not a HS graduate
           "education[T.3]":     -0.0287282,   #educ0_sim * 2, 2=HS graduate
           "education[T.4]":     -0.0430923,   #educ0_sim * 3, 3=some college
           "education[T.5]":     -0.0574564,   #educ0_sim * 4, 4=college graduate
           "smokingStatus[T.2]":  0.0464248,   #currsmoker_sim
           "anyPhysicalActivity": 0.171529,    #physact_sim
           "afib":                1.858444,    #hxafib_sim     
           "current_bp_treatment":0.1267211,   #htntx_sim
           "statin":              0.0405856,   #choltx_sim
           "sbp":                 0.0044229,   #sbpstkcog_sim
           "dbp":                -0.0211307,   #dbpstkcog_sim 
           "hdl":                 0.0076896,   #cholhdl_sim
           "totChol":            -0.0033463,   #choltot_sim
           "bmi":                 0.0421033,   #bmi_sim
           "alcoholPerWeek":     -0.00259902,  #alcperwk_sim * 0.6
           "ldl":                -0.0031348,   #cholldl_sim
           "trig":               -0.0000179,   #trig_sim 
           "a1c":                -0.11501525,  #glucose_sim * 28.7
           "waist":               0.0011406,   #waistcm_sim
           "creatinine":         -0.1197203    #creatin_sim
           },
               "coefficient_standard_errors": {} ,
               "residual_mean": {},
               "residual_standard_deviation": {} }
        
        self._regressionModel = RegressionModel(**self._model)
        #self._regressionModel = load_regression_model("strokeSubtypeCEModel")
        #self._logRegressionModel = StatsModelLogisticRiskFactorModel(self._regressionModel)
        super().__init__(self._regressionModel)

    def estimate_next_risk(self, person):
        #return self._logRegressionModel.estimate_next_risk(person)
        return super().estimate_next_risk(person)
    
    def estimate_next_risk_vectorized(self, person):
        #return self._logRegressionModel.estimate_next_risk(person)
        return super().estimate_next_risk_vectorized(person)

In [13]:
class strokeSubtypeLVModel(StatsModelLogisticRiskFactorModel):
    def __init__(self):
        
        #LA_atherosclerosis
        self._model = {"coefficients": {
            "Intercept":          -2.14462214,  #_cons + aric + glucose_sim * (-46.7)
            "age":                 0.00203,     #stroke_age
            "gender[T.2]":        -0.3026703,   #female0_sim
            "gender[T.1]":         0.,
            "education[T.1]":      0.0204964,   #educ0_sim * 1, 1=not a HS graduate
            "education[T.2]":      0.0204964,   #educ0_sim * 1, 1=not a HS graduate
            "education[T.3]":      0.0409928,   #educ0_sim * 2, 2=HS graduate
            "education[T.4]":      0.0614892,   #educ0_sim * 3, 3=some college
            "education[T.5]":      0.0819856,   #educ0_sim * 4, 4=college graduate
            "smokingStatus[T.2]": -0.4087069,   #currsmoker_sim
            "anyPhysicalActivity": 0.3283082,   #physact_sim
            "afib":                0.7040827,   #hxafib_sim     
            "current_bp_treatment":0.1878509,   #htntx_sim
            "statin":              0.1681594,   #choltx_sim
            "sbp":                 0.0105225,   #sbpstkcog_sim
            "dbp":                -0.0154197,   #dbpstkcog_sim 
            "hdl":                -0.0253099 ,  #cholhdl_sim
            "totChol":             0.01298,     #choltot_sim
            "bmi":                -0.0042248,   #bmi_sim
            "alcoholPerWeek":      0.03367986,  #alcperwk_sim * 0.6
            "ldl":                -0.009429,    #cholldl_sim
            "trig":               -0.0005325,   #trig_sim 
            "a1c":                -0.04051866,  #glucose_sim * 28.7
            "waist":               0.0017584,   #waistcm_sim
            "creatinine":         -0.6562215    #creatin_sim
            },
                "coefficient_standard_errors": {} ,
                "residual_mean": {},
                "residual_standard_deviation": {} }     

        self._regressionModel = RegressionModel(**self._model)
        #self._regressionModel = load_regression_model("strokeSubtypeLVModel")
        #self._logRegressionModel = StatsModelLogisticRiskFactorModel(self._regressionModel)
        super().__init__(self._regressionModel)

    def estimate_next_risk(self, person):
        #return self._logRegressionModel.estimate_next_risk(person)
        return super().estimate_next_risk(person)
    
    def estimate_next_risk_vectorized(self, person):
        #return self._logRegressionModel.estimate_next_risk(person)
        return super().estimate_next_risk_vectorized(person)

In [14]:
class strokeSubtypeSVModel(StatsModelLogisticRiskFactorModel):
    def __init__(self):
        
        #SV_occlusion
        self._model = {"coefficients": {
            "Intercept":            0.17084136,  #_cons + aric + glucose_sim * (-46.7)
            "age":                 -0.008983,    #stroke_age
            "gender[T.2]":          0.1107677,   #female0_sim
            "gender[T.1]":          0.,
            "education[T.1]":      -0.0357296,   #educ0_sim*1, 1=not a HS graduate
            "education[T.2]":      -0.0357296,   #educ0_sim*1, 1=not a HS graduate
            "education[T.3]":      -0.0714592,   #educ0_sim*2, 2=HS graduate
            "education[T.4]":      -0.1071888,   #educ0_sim*3, 3=some college
            "education[T.5]":      -0.1429184,   #educ0_sim*4, 4=college graduate
            "smokingStatus[T.2]":   0.0207556,   #currsmoker_sim
            "anyPhysicalActivity":  0.1915707,   #physact_sim
            "afib":                -0.0698982,   #hxafib_sim     
            "current_bp_treatment":-0.0168805,   #htntx_sim
            "statin":              -0.4778607,   #choltx_sim
            "sbp":                  0.0101797,   #sbpstkcog_sim
            "dbp":                 -0.0110669,   #dbpstkcog_sim 
            "hdl":                 -0.032421,    #cholhdl_sim
            "totChol":              0.0257892,   #choltot_sim
            "bmi":                 -0.0029924,   #bmi_sim
            "alcoholPerWeek":       0.0877935,   #alcperwk_sim * 0.6
            "ldl":                 -0.0302675,   #cholldl_sim
            "trig":                -0.0058004,   #trig_sim 
            "a1c":                 -0.00062566,  #glucose_sim * 28.7
            "waist":                0.0118192,   #waistcm_sim
            "creatinine":          -0.5225816    #creatin_sim
            },
                "coefficient_standard_errors": {} ,
                "residual_mean": {},
                "residual_standard_deviation": {} }
        
        self._regressionModel = RegressionModel(**self._model)
        #self._regressionModel = load_regression_model("strokeSubtypeSVModel")
        #self._logRegressionModel = StatsModelLogisticRiskFactorModel(self._regressionModel)
        super().__init__(self._regressionModel)

    def estimate_next_risk(self, person):
        #return self._logRegressionModel.estimate_next_risk(person)
        return super().estimate_next_risk(person)
    
    def estimate_next_risk_vectorized(self, person):
        #return self._logRegressionModel.estimate_next_risk(person)
        return super().estimate_next_risk_vectorized(person)

In [15]:
class strokeSubtypeModelRepository:
    
    def __init__(self):
        pass
    
    def get_stroke_subtype(self, person):
        
        ceRisk = strokeSubtypeCEModel().estimate_next_risk(person)
        lvRisk = strokeSubtypeLVModel().estimate_next_risk(person)
        svRisk = strokeSubtypeSVModel().estimate_next_risk(person)
        
        subtypeRisks = {StrokeSubtype.CARDIOEMBOLIC: ceRisk,
                        StrokeSubtype.LARGE_VESSEL: lvRisk,
                        StrokeSubtype.SMALL_VESSEL: svRisk}
        
        #returns stroke subtype with max risk, first occurence if there is a tie
        return max(subtypeRisks, key=subtypeRisks.get)  
    
    def get_stroke_subtype_vectorized(self, person):
        
        ceRisk = strokeSubtypeCEModel().estimate_next_risk_vectorized(person)
        lvRisk = strokeSubtypeLVModel().estimate_next_risk_vectorized(person)
        svRisk = strokeSubtypeSVModel().estimate_next_risk_vectorized(person)
        
        subtypeRisks = {StrokeSubtype.CARDIOEMBOLIC: ceRisk,
                        StrokeSubtype.LARGE_VESSEL: lvRisk,
                        StrokeSubtype.SMALL_VESSEL: svRisk}
        
        #returns stroke subtype with max risk, first occurence if there is a tie
        return max(subtypeRisks, key=subtypeRisks.get) 

In [16]:
(strokeSubtypeModelRepository().get_stroke_subtype(test_case_one),
strokeSubtypeModelRepository().get_stroke_subtype_vectorized(test_case_one_df.iloc[0]))

(<StrokeSubtype.SMALL_VESSEL: 'smallVessel'>,
 <StrokeSubtype.SMALL_VESSEL: 'smallVessel'>)

In [17]:
#NIHSS
class strokeNihssModel(StatsModelLinearRiskFactorModel):
    def __init__(self):
        
        self._model = {"coefficients": {
            "Intercept":           -2.6063356,   #_cons + aric + glucose_sim * (-46.7)
            "age":                  0.0771289,   #stroke_age
            "gender[T.2]":          0.0512374,   #female0_sim
            "gender[T.1]":          0.,
            "education[T.1]":       0.1658821,   #educ0_sim*1, 1=not a HS graduate
            "education[T.2]":       0.1658821,   #educ0_sim*1, 1=not a HS graduate
            "education[T.3]":       0.3317642,   #educ0_sim*2, 2=HS graduate
            "education[T.4]":       0.4976463,   #educ0_sim*3, 3=some college
            "education[T.5]":       0.6635284,   #educ0_sim*4, 4=college graduate
            "smokingStatus[T.2]":  -0.1257199 ,  #currsmoker_sim
            "anyPhysicalActivity": -0.3476088,   #physact_sim
            "afib":                 1.794008,    #hxafib_sim     
            "current_bp_treatment": 0.6804093,   #htntx_sim
            "statin":              -0.1660485,   #choltx_sim
            "sbp":                  0.0131255 ,  #sbpstkcog_sim
            "dbp":                 -0.0144218,   #dbpstkcog_sim 
            "hdl":                 -0.2642685 ,  #cholhdl_sim
            "totChol":              0.2577162,   #choltot_sim
            "bmi":                 -0.0197889,   #bmi_sim
            "alcoholPerWeek":      -0.26697522,  #alcperwk_sim * 0.6
            "ldl":                 -0.2564912,   #cholldl_sim
            "trig":                -0.0556979,   #trig_sim 
            "a1c":                  0.2374925,   #glucose_sim * 28.7
            "waist":               -0.002543 ,   #waistcm_sim
            "creatinine":           0.2782118    #creatin_sim
            },
                "coefficient_standard_errors": {} ,
                "residual_mean": {},
                "residual_standard_deviation": {} }
        
        self._regressionModel = RegressionModel(**self._model)
        #self._regressionModel = load_regression_model("strokeTypeModel")
        #self._logRegressionModel = StatsModelLogisticRiskFactorModel(self._regressionModel)
        super().__init__(self._regressionModel)
        
    def estimate_next_risk(self, person):
        #return self._logRegressionModel.estimate_next_risk(person)
        return super().estimate_next_risk(person)
    
    def estimate_next_risk_vectorized(self, person):
        return super().estimate_next_risk_vectorized(person)

In [18]:
(strokeNihssModel().estimate_next_risk(test_case_one),
 strokeNihssModel().estimate_next_risk_vectorized(test_case_one_df.iloc[0]))

(34.75771830066993, 34.75771830066993)

In [19]:
#in the next class I started implementing all models in a single class but I found that to make the class
#too complex and decided to break things in separate classes (see above)

In [20]:
class strokeSubtypeModel:
    def __init__(self):
        
        #cardioembolic
        self._modelCE = {"coefficients": {
           "Intercept":          -3.13182975,  #_cons + aric + glucose_sim * (-46.7)
           "age":                 0.0499947,   #stroke_age
           "gender[T.2]":        -0.3813221,   #female0_sim
           "gender[T.1]":         0.,
           "education[T.1]":     -0.0143641,   #educ0_sim * 1, 1=not a HS graduate
           "education[T.2]":     -0.0143641,   #educ0_sim * 1, 1=not a HS graduate
           "education[T.3]":     -0.0287282,   #educ0_sim * 2, 2=HS graduate
           "education[T.4]":     -0.0430923,   #educ0_sim * 3, 3=some college
           "education[T.5]":     -0.0574564,   #educ0_sim * 4, 4=college graduate
           "smokingStatus[T.2]":  0.0464248,   #currsmoker_sim
           "anyPhysicalActivity": 0.171529,    #physact_sim
           "afib":                1.858444,    #hxafib_sim     
           "current_bp_treatment":0.1267211,   #htntx_sim
           "statin":              0.0405856,   #choltx_sim
           "sbp":                 0.0044229,   #sbpstkcog_sim
           "dbp":                -0.0211307,   #dbpstkcog_sim 
           "hdl":                 0.0076896,   #cholhdl_sim
           "totChol":            -0.0033463,   #choltot_sim
           "bmi":                 0.0421033,   #bmi_sim
           "alcoholPerWeek":     -0.00259902,  #alcperwk_sim * 0.6
           "ldl":                -0.0031348,   #cholldl_sim
           "trig":               -0.0000179,   #trig_sim 
           "a1c":                -0.11501525,  #glucose_sim * 28.7
           "waist":               0.0011406,   #waistcm_sim
           "creatinine":         -0.1197203    #creatin_sim
           },
               "coefficient_standard_errors": {} ,
               "residual_mean": {},
               "residual_standard_deviation": {} }

        #LA_atherosclerosis
        self._modelLV = {"coefficients": {
            "Intercept":          -2.14462214,  #_cons + aric + glucose_sim * (-46.7)
            "age":                 0.00203,     #stroke_age
            "gender[T.2]":        -0.3026703,   #female0_sim
            "gender[T.1]":         0.,
            "education[T.1]":      0.0204964,   #educ0_sim * 1, 1=not a HS graduate
            "education[T.2]":      0.0204964,   #educ0_sim * 1, 1=not a HS graduate
            "education[T.3]":      0.0409928,   #educ0_sim * 2, 2=HS graduate
            "education[T.4]":      0.0614892,   #educ0_sim * 3, 3=some college
            "education[T.5]":      0.0819856,   #educ0_sim * 4, 4=college graduate
            "smokingStatus[T.2]": -0.4087069,   #currsmoker_sim
            "anyPhysicalActivity": 0.3283082,   #physact_sim
            "afib":                0.7040827,   #hxafib_sim     
            "current_bp_treatment":0.1878509,   #htntx_sim
            "statin":              0.1681594,   #choltx_sim
            "sbp":                 0.0105225,   #sbpstkcog_sim
            "dbp":                -0.0154197,   #dbpstkcog_sim 
            "hdl":                -0.0253099 ,  #cholhdl_sim
            "totChol":             0.01298,     #choltot_sim
            "bmi":                -0.0042248,   #bmi_sim
            "alcoholPerWeek":      0.03367986,  #alcperwk_sim * 0.6
            "ldl":                -0.009429,    #cholldl_sim
            "trig":               -0.0005325,   #trig_sim 
            "a1c":                -0.04051866,  #glucose_sim * 28.7
            "waist":               0.0017584,   #waistcm_sim
            "creatinine":         -0.6562215    #creatin_sim
            },
                "coefficient_standard_errors": {} ,
                "residual_mean": {},
                "residual_standard_deviation": {} }

        #SV_occlusion
        self._modelSV = {"coefficients": {
            "Intercept":            0.17084136,  #_cons + aric + glucose_sim * (-46.7)
            "age":                 -0.008983,    #stroke_age
            "gender[T.2]":          0.1107677,   #female0_sim
            "gender[T.1]":          0.,
            "education[T.1]":      -0.0357296,   #educ0_sim*1, 1=not a HS graduate
            "education[T.2]":      -0.0357296,   #educ0_sim*1, 1=not a HS graduate
            "education[T.3]":      -0.0714592,   #educ0_sim*2, 2=HS graduate
            "education[T.4]":      -0.1071888,   #educ0_sim*3, 3=some college
            "education[T.5]":      -0.1429184,   #educ0_sim*4, 4=college graduate
            "smokingStatus[T.2]":   0.0207556,   #currsmoker_sim
            "anyPhysicalActivity":  0.1915707,   #physact_sim
            "afib":                -0.0698982,   #hxafib_sim     
            "current_bp_treatment":-0.0168805,   #htntx_sim
            "statin":              -0.4778607,   #choltx_sim
            "sbp":                  0.0101797,   #sbpstkcog_sim
            "dbp":                 -0.0110669,   #dbpstkcog_sim 
            "hdl":                 -0.032421,    #cholhdl_sim
            "totChol":              0.0257892,   #choltot_sim
            "bmi":                 -0.0029924,   #bmi_sim
            "alcoholPerWeek":       0.0877935,   #alcperwk_sim * 0.6
            "ldl":                 -0.0302675,   #cholldl_sim
            "trig":                -0.0058004,   #trig_sim 
            "a1c":                 -0.00062566,  #glucose_sim * 28.7
            "waist":                0.0118192,   #waistcm_sim
            "creatinine":          -0.5225816    #creatin_sim
            },
                "coefficient_standard_errors": {} ,
                "residual_mean": {},
                "residual_standard_deviation": {} }
        
        self._regressionModelCE = RegressionModel(**self._modelCE)
        self._regressionModelLV = RegressionModel(**self._modelLV)
        self._regressionModelSV = RegressionModel(**self._modelSV)
        
        #self._regressionModelCE = load_regression_model("strokeSubtypeCEModel")
        #self._regressionModelLV = load_regression_model("strokeSubtypeLVModel")
        #self._regressionModelSV = load_regression_model("strokeSubtypeSVModel")
        
        self._logRegressionModelCE = StatsModelLogisticRiskFactorModel(self._regressionModelCE)
        self._logRegressionModelLV = StatsModelLogisticRiskFactorModel(self._regressionModelLV)
        self._logRegressionModelSV = StatsModelLogisticRiskFactorModel(self._regressionModelSV)

In [21]:
#some various tests that helped me understand how to work with the super classes....

In [22]:
strokeTypeRM = RegressionModel(**strokeType)

In [23]:
strokeTypeRFM = StatsModelLinearRiskFactorModel(strokeTypeRM)

In [24]:
strokeTypeLogRFM = StatsModelLogisticRiskFactorModel(strokeTypeRM)

In [25]:
strokeTypeRFM.estimate_next_risk(test_case_one)

7.042740613194445

In [26]:
#male - female

In [27]:
7.042740613194445 - 7.025329713194445

0.017410899999999785

In [28]:
#male - female

In [29]:
7.060151513194445 -   7.04274061319444

0.017410900000004226

In [30]:
for coeff_name, coeff_val in strokeTypeRFM.non_intercept_params.items():
    print(coeff_name, coeff_val)

age 0.012586
gender[T.2] -0.0174109
gender[T.1] 0
education[T.1] 0.0211369
education[T.2] 0.0211369
education[T.3] 0.0422738
education[T.4] 0.0634107
education[T.5] 0.0845476
smokingStatus[T.2] 0.0658974
anyPhysicalActivity 0.0501468
afib -0.0615242
current_bp_treatment 0.4574039
statin -0.1934335
sbp -0.0045951
dbp 0.0039562
hdl -0.0425458
totChol 0.0380974
bmi -0.0067788
alcoholPerWeek -0.07774356
ldl -0.0386822
trig -0.0070134
a1c 0.10999562
waist 0.0038266
creatinine -0.0641744


In [31]:
strokeTypeRFM.get_model_argument_for_coeff_name("afib", test_case_one)

False

In [32]:
strokeTypeRFM.get_model_argument_for_coeff_name("gender[T.1]", test_case_one)

0

In [33]:
test_case_one._current_bp_treatment

True

In [34]:
testBoolean = strokeTypeRFM.get_model_argument_for_coeff_name("current_bp_treatment", test_case_one)
testBoolean

True

In [35]:
testBoolean *5

5

In [36]:
False * 5

0

In [37]:
strokeTypeLogRFM.estimate_next_risk(test_case_one)

0.9991270340099478