In [None]:
# widely used imports
import pandas as pd
import numpy as np
import os
import numpy.random as random
import time
import functools
import datetime

# working directory
os.chdir("/Users/burke/Documents/research/scrooge")

# import key NHATS fields
nhanesDF = pd.read_stata("nhanesForScrooge.dta")

# simple weighting schema using the NHATS weights and turnign into simple probability weights
nhanesDF['probWeight'] = nhanesDF.WTINT2YR / np.sum(nhanesDF.WTINT2YR) 
nhanesDF.patientID.astype("int64")

# load the file mapping screening services to patients
screeningRules = pd.read_excel("simplifiedPreventiveServices.xlsx")
screeningRules['screeningIndex'] = np.arange(0, len(screeningRules))
screeningRules['timeDelta'] = [datetime.timedelta(days=(int(365*float(value[1:])))) for value in screeningRules['Frequency']]

In [2]:
class Person:
    def __init__(self, patientID, gender, age, race, bmi, dm, htn, hl, smoking):
        self.patientID = patientID
        self.gender = gender
        self.age = age
        self.bmi = bmi
        self.dm = dm
        self.htn = htn
        self.hl = hl
        self.smoking = smoking

In [3]:
class ScreeningElement:
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):
        return true

class AgeScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):
        if (pd.notnull(screeningRule['minAge']) and visit.age < screeningRule['minAge']):
            return False
        if (pd.notnull(screeningRule['maxAge']) and visit.age > screeningRule['maxAge']):
            return False
        return True

class GenderScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):    
        if (pd.notnull(screeningRule['Gender']) and visit.gender != screeningRule['Gender']):
            return False
        return True

class SmokingScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):    
        if (pd.notnull(screeningRule['Smoking'])):
            if (screeningRule['Smoking'] == 'Current' and visit.smokingStatus != 1):
                return False
            if (screeningRule['Smoking'] == 'Former' and visit.smokingStatus == 0):
                return False
            if (screeningRule['Smoking']  == 'Never' and visit.smokingStatus != 0):
                return False
        return True

class VascularRiskFactorScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):    
        if (pd.notnull(screeningRule['VascularRiskFactor']) != None):
            if (screeningRule['VascularRiskFactor'] == 1 and (visit.selfReportHtn == 'No' or visit.selfReportHtn is None) 
                and (visit.selfReportHyperlipidemia == 'No' or visit.selfReportHyperlipidemia is None)
                and visit.selfReportDiabetes == 'No' or visit.selfReportDiabetes is None):
                return False
            if (screeningRule['VascularRiskFactor'] == 0 and (visit.selfReportHtn == 'Yes' or visit.selfReportHyperlipidemia == 'Yes' or visit.selfReportDiabetes == 'Yes')):
                return False
        return True
    
class BMIScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):    
        if ((pd.notnull(screeningRule['bmiOver']) and visit.bmi <= screeningRule['bmiOver']) or 
           (pd.notnull(screeningRule['bmiOver'])  and visit.bmi is None)):
            return False
        return True

class RiskProbabilityScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):    
        # this will set the same random seed for each patietn/screening rule cobination. so, a patient will have an 
        # independt "risk" for each separate rule, but when a separate visit is evaluated it'll be teh same risk as for the 
        # last visit
        random.seed(visit.patientID * screeningRule.screeningIndex)
        if (pd.notnull(screeningRule['proportionOfPopulationAtRisk']) and random.random() > (screeningRule['proportionOfPopulationAtRisk'])):
            return False
        return True

class TimingScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):    
        # next step is to check to see if the rule has been implemented since the last rule interval..        
        hasPriorScreeningsForService = (priorScreenings['Service'] == screeningRule['Service']).any()
        # if the screening rule is a one time rule and its ever been implemented, then don't do it again


        # ugly — there is really complicated branching logic here that i dont' like...it does ok on tests, so i'm 
        # not terrifed of it...but this is a good place to think about refactoring.
        # q-1 = perform one time and then never again.
        if (screeningRule['Frequency'] == "q-1" and hasPriorScreeningsForService):
            return False
        # if the screening rule is repeated...check whether its been repeated since a prior visit
        elif (hasPriorScreeningsForService):  
            priorScreeningsForThisService = priorScreenings.loc[(priorScreenings['Service'] == screeningRule['Service'])].sort_values("visitDate", ascending=False)

            if (visit.visitDate - screeningRule['timeDelta'] < priorScreeningsForThisService.iloc[0]['visitDate']):
                return False
            else:
                return True
        else:
            return True

In [4]:
parameter_annualPanelAttritionRate = 0.30
parameter_proportionOfAllVisitsToPCP = 0.51

# tuple indexed dictionaries. first tuple element is the lower bounds of an age group and second element is the upper bounds
# the mtached value is the # of visits per 100 members of the population within a given age/gender band
parameter_maleVisitRates = {(18,24) : 119.6, (25,44) : 127.3, (45,64) : 312.1, (65,74) : 559.5, (75,80): 799.2}
parameter_femaleVisitRates = {(18,24) : 235.3, (25,44) : 302.7, (45,64) : 417.3, (65,74) : 606.7, (75,80): 736.2}

visitColumns = ['visitDate', 'patientID', 'year', 'age', 'gender', 'raceEth', 'bmi',
                'smokingStatus', 'selfReportHtn', 'selfReportHyperlipidemia', 'selfReportDiabetes']

screeningColumns = ['patientID', 'Service', 'timeSpent',  'visitDate' 'screeningIndex']

# the major design decision is whether to build python objects for each of the conceptual steps —
# patient, visit, screening service...or whether to keep them all as data frames at the provider level.
# there isn't going to be a ton of logic at any of those levels, so its feasiable to just have a provider level 
# object. from a performace perspective, i'm sure that operations on dataframes are going to be better on memory
# and i suspect they'll probably also be a lot quicker.

class Provider:
    def __init__(self, panelSize, panelSource=nhanesDF):
        self.panelSize = panelSize
        self.panelSource = panelSource
        self.initPanel()
        self.visits = pd.DataFrame(data=None, columns=visitColumns)
        self.visits.patientID.astype("int")
    
        self.screeningServices = pd.DataFrame(data=None, columns=screeningColumns)
        self.startYear = 2018
        self.setYear(self.startYear)
        # this dict will keep a separate set of dataframes for the screening services for a given patient
        self.screeningsForPatientID = {}
        self.screeningElements = [AgeScreeningElement(), GenderScreeningElement(), VascularRiskFactorScreeningElement(),
                                  SmokingScreeningElement(), BMIScreeningElement(), RiskProbabilityScreeningElement(), TimingScreeningElement()]                                
    
    def setYear(self, newYear):
        self.year = newYear
        self.startOfYearTime = datetime.datetime.strptime("1/1/" +str(self.year) , "%m/%d/%Y")
        self.endOfYearTime = datetime.datetime.strptime("12/31/" +str(self.year) , "%m/%d/%Y")
                  
    def initPanel(self):
        rowIndices = random.choice(self.panelSource.index.values, size = self.panelSize, replace=True, p=self.panelSource.probWeight)
        self.panel = self.panelSource.iloc[rowIndices]
        self.panel = self.panel.reset_index(drop=True)
        # not sure that we'll need this...but, lifetime panel is going to keep track of every patient that was ever part of a panel — including those that fall out
        self.lifetimePanel = self.panel.copy()
    
    def addScreeningForPatient(self, patientID, screening):
        if patientID in self.screeningsForPatientID:
            self.screeningsForPatientID[patientID] = self.screeningsForPatientID[patientID].append(screening, ignore_index=True)
        else:
            self.screeningsForPatientID[patientID] = pd.DataFrame(data=screening, index=[0])

    def getScreeningsForPatient(self, patientID):
        if patientID in self.screeningsForPatientID:
            return self.screeningsForPatientID[patientID]
        else:
            return pd.DataFrame(data=None, columns=screeningColumns)
    
    def advancePanelByYear(self, years):
        for i in range(0, years):
            self.losePatientsToAttrition(parameter_annualPanelAttritionRate)
            self.addNewPatients(parameter_annualPanelAttritionRate)
            self.generateVisitHistoryForPanel()
            self.setYear(self.year + 1)
        
    def losePatientsToAttrition(self, attritionRate):
        self.panel = self.panel.drop(random.choice(self.panel.index.values, size=int(attritionRate * self.panelSize), replace=False)) 
    
    def addNewPatients(self, attritionRate):
        newRowIndices = random.choice(self.panelSource.index.values, size = int(attritionRate * self.panelSize), replace=True, p=self.panelSource.probWeight)
        self.panel = self.panel.append(self.panelSource.iloc[newRowIndices])
        self.lifetimePanel = self.lifetimePanel.append(self.panelSource.iloc[newRowIndices])
        self.panel = self.panel.reset_index(drop=True)
    
    def generateVisitHistoryForPanel(self):
        men = self.panel.loc[self.panel['gender'] == 'Male']
        women = self.panel.loc[self.panel['gender'] == 'Female']
        
        self.generateVisitsForGender(men, parameter_maleVisitRates)
        self.generateVisitsForGender(women, parameter_femaleVisitRates)
        self.applyScreeningsToVisits()              

    def applyScreeningRulesToVisit(self, visit):
        data = []
        for blank, screeningRule in screeningRules.iterrows():
            newScreening = self.applyScreeningRuleToVisit(visit, screeningRule)
            if (newScreening is not None):
                data.append(newScreening)
        return data
    
    def applyScreeningsToVisits(self):
        data = []
        for blank, visit in self.visits.iterrows():
            data.extend(self.applyScreeningRulesToVisit(visit))
        screeningDF = pd.DataFrame(data) 
        self.screeningServices = pd.concat([self.screeningServices, screeningDF])  
        
    def applyScreeningRuleToVisit(self, visit, screeningRule):
        applies = True
        priorScreenings = self.getScreeningsForPatient(visit.patientID)
        
        firstElementToFail = None
        for element in self.screeningElements:
            applies = element.elementAppliesToRule(screeningRule, visit, priorScreenings)
            '''
            # for troubleshooting
            if (applies is False and firstElementToFail is None):
                firstElementToFail = element
            '''
            # as soon as one element fails...then you don't have to check the rest
            if (applies is False):
                break            
        '''        
        # for troubleshootin
        if (firstElementToFail is not None):
            print "Failed at: " + str(firstElementToFail.__class__.__name__)
        
        '''
        if (applies):
            newScreeningService = {'patientID' : visit.patientID, 'Service' : screeningRule['Service'],
                                   'timeSpent' : screeningRule['Time'] if applies else 0,
                                   'visitDate' : visit.visitDate, 'screeningIndex' : screeningRule['screeningIndex']}
            self.addScreeningForPatient(visit.patientID, newScreeningService)
            return newScreeningService
    
    def generateVisitsForGender(self, patients, visitRatesByAge):
        for ageRange in visitRatesByAge.keys():
            patientsWithinAgeRange = patients.loc[(patients['age'] >= ageRange[0]) & (patients['age'] <= ageRange[1])]
            totalVisits = int(visitRatesByAge[ageRange] * len(patientsWithinAgeRange) * parameter_proportionOfAllVisitsToPCP/ 100)
            patientsForVisits = self.panel.loc[random.choice(self.panel.index.values, size=totalVisits, replace=True)]
            timesForVisits = [self.startOfYearTime + datetime.timedelta(days=(random.rand() * (self.endOfYearTime-self.startOfYearTime).total_seconds() / 86400)) for i in range(0,len(patientsForVisits))  ]
            newVisits = pd.DataFrame(data={'visitDate' : timesForVisits, 'patientID' : patientsForVisits.patientID.values, 
                                           'year' : [self.year] * len(patientsForVisits), 'age' : patientsForVisits.age.values,
                                           'gender' : patientsForVisits.gender.values, 'raceEth' : patientsForVisits.raceEth.values, 'bmi' : patientsForVisits.bmi.values,
                                           'smokingStatus' : patientsForVisits.smokingStatus.values, 'selfReportHtn' : patientsForVisits.selfReportHtn.values,
                                           'selfReportHyperlipidemia': patientsForVisits.selfReportHyperlipidemia.values,
                                           'selfReportDiabetes' : patientsForVisits.selfReportDiabetes.values})
            self.visits = pd.concat([self.visits, newVisits])


In [5]:
provider = Provider(panelSize=2000)
provider.advancePanelByYear(1)

In [7]:
print len(provider.screeningServices.loc[(provider.screeningServices.screeningIndex==14)])
print len(provider.visits['patientID'].unique())
print len(provider.visits)

467
1266
3541


In [None]:
print provider.panel.loc[provider.panel.patientID ==provider.screeningsForPatientID.keys()[0]]
df=pd.DataFrame(provider.screeningsForPatientID.values()[0])
df.head(5)

## Unit Tests of the Logic of applying screens to visits

In [None]:
import unittest

def generateScreeningRule(name, grade, freq, minAge, maxAge, gender, vrf, bmi, time, smoking, propPop, index):
    return pd.Series({"Service" : name, "Grade" : grade, "Frequency" : freq, "minAge" : minAge, "maxAge" : maxAge, 
                 "Gender" : gender, "VascularRiskFactor" : vrf, "bmiOver" : bmi, "Time" : time, "Smoking" : smoking,
                 "proportionOfPopulationAtRisk" : propPop, "screeningIndex" : index, "timeDelta" : datetime.timedelta(days=(int(365*float(freq[1:]))))})

ageScreeningRule = generateScreeningRule("AgeFilter","A","q1",18,40,"Male",None, None, 2, None, None,  42)
genderScreeningRule = generateScreeningRule("GenderFilter","A","q-1",None,None,"Male",None, None, 2, None, None, 42)
currentSmokerScreeningRule = generateScreeningRule("GenderFilter","A", "q1",None, None,  None,  None, None, 2, "Current",  None, 42)
formerSmokerScreeningRule = generateScreeningRule("formerSmokerFilter","A","q1",None,None,None,None,None,2,"Former",None,42)
neverSmokerScreeningRule = generateScreeningRule("neverSmoker","A","q1",None, None, None, None, None, 2, "Never", None, 42)
hasVFScreeningRule = generateScreeningRule("vfFil","A","q1",None, None,None,1,None,2, None,None,42)
hasNoVFScreeningRule = generateScreeningRule("noVF","A","q1",None,None,None, 0, None, 2, None, None, 42)
bmiScreeningRule = generateScreeningRule("bmi","A","q1",None,None,None,0,30,2,None, None,42)
completeRiskScreeningRule = generateScreeningRule("compRF","A","q1",None,None,None,None, None, 2,None, 1.0, 42)
zeroRiskScreeningRule = generateScreeningRule("zeroRF","A","q1",None,None,None,None, None, 2, None,0.0, 42)
universalOnceScreeningRule = generateScreeningRule("uni","A","q-1", None, None,None,None,None,2,None, None, 42)

baseVisitDate = datetime.datetime.strptime("1/1/2019"  , "%m/%d/%Y")
visit15YearOldMale = pd.Series({'visitDate' : baseVisitDate, 'patientID' : 123, 'year' : 2015, 'age' : 15, 'gender' : 'Male', 
                      'raceEth' : 1, 'bmi' : None, 'smokingStatus' : 1, 'selfReportHtn' : None ,
                      'selfReportHyperlipidemia' : "No", 'selfReportDiabetes' : "No"})

visit19YearOldMale = visit15YearOldMale.copy()
visit19YearOldMale['age'] = 19 
visit39YearOldMale = visit15YearOldMale.copy()
visit39YearOldMale['age'] = 39
visit41YearOldMale = visit15YearOldMale.copy()
visit41YearOldMale['age'] =  41
visit39YearOldFemale = visit39YearOldMale.copy()
visit39YearOldFemale['gender'] = 'Female'
visit15YearOldMaleFormerSmoker= visit15YearOldMale.copy()
visit15YearOldMaleFormerSmoker['smokingStatus'] = 2 # former
visit15YearOldMaleNeverSmoker= visit15YearOldMale.copy()
visit15YearOldMaleNeverSmoker['smokingStatus'] = 0 # never
visit15YearOldMaleHypertension= visit15YearOldMale.copy()
visit15YearOldMaleHypertension['selfReportHtn'] = "Yes"
visit15YearOldMaleHyperlipidemia= visit15YearOldMale.copy()
visit15YearOldMaleHyperlipidemia['selfReportHyperlipidemia'] = "Yes"
visit15YearOldMaleDiabetes= visit15YearOldMale.copy()
visit15YearOldMaleDiabetes['selfReportDiabetes'] = "Yes"
visit15YearOldMaleAllRiskFactors= visit15YearOldMale.copy()
visit15YearOldMaleAllRiskFactors['selfReportDiabetes'] = "Yes"
visit15YearOldMaleAllRiskFactors['selfReportHyperlipidemia'] = "Yes"
visit15YearOldMaleAllRiskFactors['selfReportHtn'] = "Yes"
visit15YearOldMaleLowBMI = visit15YearOldMale.copy()
visit15YearOldMaleLowBMI['bmi'] = 18
visit15YearOldMaleHighBMI = visit15YearOldMale.copy()
visit15YearOldMaleHighBMI['bmi'] = 35

dummyProvider = Provider(0)



class TestScreeningRules(unittest.TestCase):   
    def testTimingInterval(self):
        # apply the rule so, that there is a screening visit on 1/1/19
        self.assertIsNotNone(dummyProvider.applyScreeningRuleToVisit(visit19YearOldMale, ageScreeningRule))
        
        visit19YearOldMaleAdvance6Months = visit19YearOldMale.copy()
        visit19YearOldMaleAdvance6Months['visitDate'] = datetime.datetime.strptime("7/1/2019" , "%m/%d/%Y")
        self.assertIsNone( dummyProvider.applyScreeningRuleToVisit(visit19YearOldMaleAdvance6Months, ageScreeningRule))
        
        visit19YearOldMaleAdvance11Months = visit19YearOldMale.copy()
        visit19YearOldMaleAdvance11Months['visitDate'] = datetime.datetime.strptime("12/31/2019" , "%m/%d/%Y")
        self.assertIsNone( dummyProvider.applyScreeningRuleToVisit(visit19YearOldMaleAdvance11Months, ageScreeningRule))
        
        visit19YearOldMaleAdvance13Months = visit19YearOldMale.copy()
        visit19YearOldMaleAdvance13Months['visitDate'] = datetime.datetime.strptime("1/2/2020" , "%m/%d/%Y")

        self.assertIsNotNone( dummyProvider.applyScreeningRuleToVisit(visit19YearOldMaleAdvance13Months, ageScreeningRule))

        visit19YearOldMaleAdvance24Months = visit19YearOldMale.copy()
        visit19YearOldMaleAdvance24Months['visitDate'] = datetime.datetime.strptime("1/2/2021" , "%m/%d/%Y")
        self.assertIsNotNone( dummyProvider.applyScreeningRuleToVisit(visit19YearOldMaleAdvance24Months, ageScreeningRule))
    
    def testTimingOnce(self):
        # apply the rule the first time, and it shoudl go through...
        self.assertIsNotNone(dummyProvider.applyScreeningRuleToVisit(visit19YearOldMale, universalOnceScreeningRule))
        # apply it subsequent and it should fail
        self.assertIsNone(dummyProvider.applyScreeningRuleToVisit(visit19YearOldMale, universalOnceScreeningRule))

    # hard to do this test determinisstically, will just test the extreme probabilities
    def testRiskScreening(self):
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit19YearOldMale, completeRiskScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleAllRiskFactors, completeRiskScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleAllRiskFactors, zeroRiskScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleAllRiskFactors, zeroRiskScreeningRule))
    
    def testBMI(self):
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, bmiScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleLowBMI, bmiScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleHighBMI, bmiScreeningRule))
    
    def testVascularRiskFactorFilter(self):
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, hasNoVFScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, hasVFScreeningRule))

        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleHypertension, hasNoVFScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleHypertension, hasVFScreeningRule))

        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleHyperlipidemia, hasNoVFScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleHyperlipidemia, hasVFScreeningRule))

        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleDiabetes, hasNoVFScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleDiabetes, hasVFScreeningRule))

        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleAllRiskFactors, hasNoVFScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleAllRiskFactors, hasVFScreeningRule))
    
    def testSmokingFilter(self):
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, currentSmokerScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, formerSmokerScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, neverSmokerScreeningRule))

        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleFormerSmoker, currentSmokerScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleFormerSmoker, formerSmokerScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleFormerSmoker, neverSmokerScreeningRule))
        
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleNeverSmoker, currentSmokerScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleNeverSmoker, formerSmokerScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleNeverSmoker, neverSmokerScreeningRule))
    
    def testGenderFilter(self):
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, genderScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit19YearOldMale, genderScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit39YearOldMale, genderScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit39YearOldMale, genderScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit39YearOldFemale, genderScreeningRule))
    
    def testAgeFilter(self):
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, ageScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit19YearOldMale, ageScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit39YearOldMale, ageScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit41YearOldMale, ageScreeningRule))
        
    # on visual inspection — this didn't seem to be picking up and it shoudl have picked up in almost everybody...
    # it caught that my test cases used "None" while the loaded data used "NaN"
    def testIntimatePartnerVioloence(self):
        #print "**intimate partner"
        intimateParterRule = screeningRules.loc[20,]
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, intimateParterRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit39YearOldFemale, intimateParterRule))    
        
class TestVisitGeneration(unittest.TestCase):    
    # this test is probabilistic...it shoudl fail about 5% of the time by chance.
    def testGenerateVisitsForHomogeneousPopulation(self):
        pop = [visit39YearOldFemale.copy() for x in range(0,100) ]
        df = pd.DataFrame(data=pop)
        df['probWeight'] = float(1.0 / len(df))
        dummyProvider = Provider(100, df)
        dummyProvider.advancePanelByYear(1)
        
        # expect to have 302.7/100 [rate of visits in age/gender band] * 0.51 [propotion to PCP] * 100 (pop size) = 154
        # poisson 95% CI = 130-180...
        self.assertGreater(len(dummyProvider.visits),130)
        self.assertLess(len(dummyProvider.visits),  180)

   
suite = unittest.TestLoader().loadTestsFromTestCase(TestScreeningRules)
suite.addTest(unittest.makeSuite(TestVisitGeneration))
unittest.TextTestRunner(verbosity=2).run(suite)      