In [174]:
# widely used imports
import pandas as pd
import numpy as np
import os
import numpy.random as random
import time
import functools
import datetime

# working directory
os.chdir("/Users/burke/Documents/research/scrooge")

# data files (NHANES, screening rules as global objects)ea
# import key NHANES fields
nhanesDF = pd.read_stata("nhanesForScrooge.dta")

# simple weighting schema using the NHATS weights and turnign into simple probability weights
nhanesDF['probWeight'] = nhanesDF.WTINT2YR / np.sum(nhanesDF.WTINT2YR) 
nhanesDF.patientID.astype("int64")

# load the file mapping screening services to patients
screeningRules = pd.read_excel("simplifiedPreventiveServices.xlsx")
screeningRules['screeningIndex'] = np.arange(0, len(screeningRules))
screeningRules['timeDelta'] = [datetime.timedelta(days=(int(365*float(value[1:])))) for value in screeningRules['Frequency']]

In [175]:
class Person:
    def __init__(self, patientID, gender, age, race, bmi, dm, htn, hl, smoking):
        self.patientID = patientID
        self.gender = gender
        self.age = age
        self.bmi = bmi
        self.dm = dm
        self.htn = htn
        self.hl = hl
        self.smoking = smoking

In [176]:
class ScreeningElement:
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):
        return true

class AgeScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):
        if (pd.notnull(screeningRule['minAge']) and visit.age < screeningRule['minAge']):
            return False
        if (pd.notnull(screeningRule['maxAge']) and visit.age > screeningRule['maxAge']):
            return False
        return True

class GenderScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):    
        if (pd.notnull(screeningRule['Gender']) and visit.gender != screeningRule['Gender']):
            return False
        return True

class SmokingScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):    
        if (pd.notnull(screeningRule['Smoking'])):
            if (screeningRule['Smoking'] == 'Current' and visit.smokingStatus != 1):
                return False
            if (screeningRule['Smoking'] == 'Former' and visit.smokingStatus == 0):
                return False
            if (screeningRule['Smoking']  == 'Never' and visit.smokingStatus != 0):
                return False
        return True

class VascularRiskFactorScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):    
        if (pd.notnull(screeningRule['VascularRiskFactor'])):
            if (screeningRule['VascularRiskFactor'] == 1 and (visit.selfReportHtn == 'No' or visit.selfReportHtn is None) 
                and (visit.selfReportHyperlipidemia == 'No' or visit.selfReportHyperlipidemia is None)
                and visit.selfReportDiabetes == 'No' or visit.selfReportDiabetes is None):
                return False
            if (screeningRule['VascularRiskFactor'] == 0 and (visit.selfReportHtn == 'Yes' or visit.selfReportHyperlipidemia == 'Yes' or visit.selfReportDiabetes == 'Yes')):
                return False
        return True
    
class BMIScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):    
        if ((pd.notnull(screeningRule['bmiOver']) and visit.bmi <= screeningRule['bmiOver']) or 
           (pd.notnull(screeningRule['bmiOver'])  and visit.bmi is None)):
            return False
        return True

class RiskProbabilityScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):    
        # this will set the same random seed for each patietn/screening rule cobination. so, a patient will have an 
        # independt "risk" for each separate rule, but when a separate visit is evaluated it'll be teh same risk as for the 
        # last visit
        random.seed(visit.localPatientID * screeningRule.screeningIndex)
        if (pd.notnull(screeningRule['proportionOfPopulationAtRisk']) and random.random() > (screeningRule['proportionOfPopulationAtRisk'])):
            return False
        return True

class TimingScreeningElement(ScreeningElement):
    def elementAppliesToRule(self, screeningRule, visit, priorScreenings):    
        # next step is to check to see if the rule has been implemented since the last rule interval..        
        hasPriorScreeningsForService = (priorScreenings['Service'] == screeningRule['Service']).any()
        # if the screening rule is a one time rule and its ever been implemented, then don't do it again


        # ugly — there is really complicated branching logic here that i dont' like...it does ok on tests, so i'm 
        # not terrifed of it...but this is a good place to think about refactoring.
        # q-1 = perform one time and then never again.
        if (hasPriorScreeningsForService and screeningRule['Frequency'] == "q-1"):
            return False
        # if the screening rule is repeated...check whether its been repeated since a prior visit
        elif (hasPriorScreeningsForService):  
            # visits should return in sorted order with teh most recent visits last
            priorScreeningsForThisService = priorScreenings.loc[(priorScreenings['Service'] == screeningRule['Service'])]

            if (visit.visitDate - screeningRule['timeDelta'] < priorScreeningsForThisService.iloc[-1]['visitDate']):
                return False
        return True

In [198]:
class Parameters:
    annualPanelAttritionRate = 0.30
    proportionOfAllVisitsToPCP = 0.51

    averageHoursPerYear = 2541
    seHoursPerYear = 450

    meanProportionOfClinicalFaceTime = 0.27
    seProportionOfClinicalFaceTime = 0.007

    meanPanelSize = 2000
    sdPanelSize = 400
    
    meanVisitDuration = 22.2
    sdVisitDuration = 7

    # tuple indexed dictionaries. first tuple element is the lower bounds of an age group and second element is the upper bounds
    # the mtached value is the # of visits per 100 members of the population within a given age/gender band
    maleVisitRates = {(18,24) : 119.6, (25,44) : 127.3, (45,64) : 312.1, (65,74) : 559.5, (75,80): 799.2}
    sdMaleVisitRates = {(18,24) : 13.8, (25,44) : 10.4, (45,64) : 24.8, (65,74) : 41.9, (75,80): 81.1}

    femaleVisitRates = {(18,24) : 235.3, (25,44) : 302.7, (45,64) : 417.3, (65,74) : 606.7, (75,80): 736.2}
    sdFemaleVisitRates = {(18,24) : 29.5, (25,44) : 26.0, (45,64) : 30.1, (65,74) : 42.4, (75,80): 65.8}


# the major design decision is whether to build python objects for each of the conceptual steps —
# patient, visit, screening service...or whether to keep them all as data frames at the provider level.
# there isn't going to be a ton of logic at any of those levels, so its feasiable to just have a provider level 
# object. from a performace perspective, i'm sure that operations on dataframes are going to be better on memory
# and i suspect they'll probably also be a lot quicker.

class Provider:
    visitColumns = ['visitDate', 'localPatientID', 'year', 'age', 'gender', 'raceEth', 'bmi',
                'smokingStatus', 'selfReportHtn', 'selfReportHyperlipidemia', 'selfReportDiabetes',
                'visitDuration']

    screeningColumns = ['localPatientID', 'Service', 'timeSpent',  'visitDate', 'screeningIndex', 'grade']
    
    def __init__(self, panelSize=None, panelSource=nhanesDF):
        if (panelSize is None):
            self.panelSize = int(np.random.normal(Parameters.meanPanelSize, Parameters.sdPanelSize, 1))
        else:
            self.panelSize = panelSize
        self.panelSource = panelSource
        self.initPanel()
        self.visits = pd.DataFrame(data=None, columns=self.visitColumns)
        self.visits.localPatientID.astype("int")
        
        self.hoursWorkedPerYear = np.random.normal(Parameters.averageHoursPerYear, Parameters.seHoursPerYear)
        self.proportionClinicalFaceTime = np.random.normal(Parameters.meanProportionOfClinicalFaceTime, 
                                                           Parameters.seProportionOfClinicalFaceTime)

    
        self.screeningServices = pd.DataFrame(data=None, columns=self.screeningColumns)
        self.startYear = 2018
        self.setYear(self.startYear)
        # this dict will keep a separate set of dataframes for the screening services for a given patient
        self.screeningsForPatientID = {}
        self.screeningElements = [AgeScreeningElement(), GenderScreeningElement(), VascularRiskFactorScreeningElement(),
                                  SmokingScreeningElement(), BMIScreeningElement(), RiskProbabilityScreeningElement(), TimingScreeningElement()]          
    
    def setYear(self, newYear):
        self.year = newYear
        self.startOfYearTime = datetime.datetime.strptime("1/1/" +str(self.year) , "%m/%d/%Y")
        self.endOfYearTime = datetime.datetime.strptime("12/31/" +str(self.year) , "%m/%d/%Y")
                  
    def initPanel(self):
        rowIndices = random.choice(self.panelSource.index.values, size = self.panelSize, replace=True, p=self.panelSource.probWeight)
        self.panel = self.panelSource.iloc[rowIndices]
        self.panel.index = np.arange(1, self.panelSize + 1)
        self.panel.localPatientID = self.panel.index
        self.panel.rename({"patientID" : "nhanesPatientID"})
        #self.panel = self.panel.reset_index(drop=True)
        # not sure that we'll need this...but, lifetime panel is going to keep track of every patient that was ever part of a panel — including those that fall out
        self.lifetimePanel = self.panel.copy()
    
    def addScreeningForPatient(self, patientID, screening):
        if patientID in self.screeningsForPatientID:
            self.screeningsForPatientID[patientID] = self.screeningsForPatientID[patientID].append(screening, ignore_index=True)
        else:
            self.screeningsForPatientID[patientID] = pd.DataFrame(data=screening, index=[0])

    def getScreeningsForPatient(self, patientID):
        if patientID in self.screeningsForPatientID:
            return self.screeningsForPatientID[patientID]
        else:
            return pd.DataFrame(data=None, columns=self.screeningColumns)
    
    def advancePanelByYear(self, years):
        for i in range(0, years):
            self.panel['age'] += 1
            self.losePatientsToAttrition(Parameters.annualPanelAttritionRate)
            self.addNewPatients(Parameters.annualPanelAttritionRate)
            self.generateVisitHistoryForPanel()
            self.setYear(self.year + 1)
            #print "advanced " + str(i+1) + " years"
        
    def losePatientsToAttrition(self, attritionRate):
        self.panel = self.panel.drop(random.choice(self.panel.index.values, size=int(attritionRate * self.panelSize), replace=False)) 
        
    def addNewPatients(self, attritionRate):
        newRowIndices = random.choice(self.panelSource.index.values, size = int(attritionRate * self.panelSize), replace=True, p=self.panelSource.probWeight)
        newPatients = self.panelSource.iloc[newRowIndices]
        newStartingIndex = self.panelSize * (self.year-self.startYear+1)
        newPatients.index = np.arange(newStartingIndex, newStartingIndex + len(newPatients))
        self.panel = pd.concat([self.panel, newPatients])        
        self.lifetimePanel = self.lifetimePanel.append(newPatients)
        #self.panel = self.panel.reset_index(drop=True)
    
    def generateVisitHistoryForPanel(self):
        men = self.panel.loc[self.panel['gender'] == 'Male']
        women = self.panel.loc[self.panel['gender'] == 'Female']
        
        maleVisits = self.generateVisitsForGender(men, Parameters.maleVisitRates, Parameters.sdMaleVisitRates)
        femaleVisits = self.generateVisitsForGender(women, Parameters.femaleVisitRates, Parameters.sdFemaleVisitRates)
        self.applyScreeningsToVisits(pd.concat([maleVisits, femaleVisits]))              

    def applyScreeningRulesToVisit(self, visit):
        data = []
        for blank, screeningRule in screeningRules.iterrows():
            newScreening = self.applyScreeningRuleToVisit(visit, screeningRule)
            if (newScreening is not None):
                data.append(newScreening)
        return data
    
    def applyScreeningsToVisits(self, newVisits):
        data = []
        for blank, visit in newVisits.iterrows():
            data.extend(self.applyScreeningRulesToVisit(visit))
        screeningDF = pd.DataFrame(data) 
        self.screeningServices = pd.concat([self.screeningServices, screeningDF])  
        
    def applyScreeningRuleToVisit(self, visit, screeningRule):
        applies = True
        priorScreenings = self.getScreeningsForPatient(visit.localPatientID)
        
        firstElementToFail = None
        for element in self.screeningElements:
            applies = element.elementAppliesToRule(screeningRule, visit, priorScreenings)
            '''
            # for troubleshooting
            if (applies is False and firstElementToFail is None):
                firstElementToFail = element
            '''
            # as soon as one element fails...then you don't have to check the rest
            if (applies is False):
                break            
        '''        
        # for troubleshootin
        if (firstElementToFail is not None):
            print "Failed at: " + str(firstElementToFail.__class__.__name__)
        
        '''
        if (applies):
            newScreeningService = {'localPatientID' : visit.localPatientID, 'Service' : screeningRule['Service'],
                                   'timeSpent' : screeningRule['Time'] if applies else 0,
                                   'visitDate' : visit.visitDate, 'screeningIndex' : screeningRule['screeningIndex'],
                                    'grade' : screeningRule['Grade'] }
            self.addScreeningForPatient(visit.localPatientID, newScreeningService)
            return newScreeningService
    
    def generateVisitsForGender(self, patients, visitRatesByAge, visitRateSEByAge):
        for ageRange in visitRatesByAge.keys():
            patientsWithinAgeRange = patients.loc[(patients['age'] >= ageRange[0]) & (patients['age'] <= ageRange[1])]
            visitRateForAgeGroup = np.random.normal(visitRatesByAge[ageRange], visitRateSEByAge[ageRange],1)
            totalVisits = int(visitRateForAgeGroup * len(patientsWithinAgeRange) * Parameters.proportionOfAllVisitsToPCP/ 100)
            patientsForVisits = self.panel.loc[random.choice(self.panel.index.values, size=totalVisits, replace=True)]
            # by building the visits in sorted order and then iterating through them in that order
            # we can ensure that when filtering - most recent visits will be first...
            timesForVisits = sorted([self.startOfYearTime + datetime.timedelta(days=(random.rand() * (self.endOfYearTime-self.startOfYearTime).total_seconds() / 86400)) for i in range(0,len(patientsForVisits))  ])
            newVisits = pd.DataFrame(data={'visitDate' : timesForVisits, 'localPatientID' : patientsForVisits.index.values, 
                                           'year' : [self.year] * len(patientsForVisits), 'age' : patientsForVisits.age.values,
                                           'gender' : patientsForVisits.gender.values, 'raceEth' : patientsForVisits.raceEth.values, 'bmi' : patientsForVisits.bmi.values,
                                           'smokingStatus' : patientsForVisits.smokingStatus.values, 'selfReportHtn' : patientsForVisits.selfReportHtn.values,
                                           'selfReportHyperlipidemia': patientsForVisits.selfReportHyperlipidemia.values,
                                           'selfReportDiabetes' : patientsForVisits.selfReportDiabetes.values,
                                          'visitDuration' : np.random.normal(Parameters.meanVisitDuration, Parameters.sdVisitDuration, len(patientsForVisits))})
            self.visits = pd.concat([self.visits, newVisits])
        return newVisits


## Build a single Provider...and export the data

In [199]:
pd.options.mode.chained_assignment = 'warn'
provider = Provider()
provider.advancePanelByYear(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [None]:
provider.panel.to_excel("SingleProvider10YearPanel.xlsx")
provider.lifetimePanel.to_excel("SingleProvider10YearLifetimePanel.xlsx")
provider.visits.to_excel("SingleProvider10YearLifetimeVisits.xlsx")
provider.screeningServices.to_excel("SingleProvider10YearLifetimeScreeningServices.xlsx")

## Build a pool of providers

In [204]:
def buildOneProvider(x):
    provider = Provider()
    provider.advancePanelByYear(10)
    #pbar.update(1)
    if (x % 10 == 0):
        print x

    return provider

In [206]:
from multiprocessing import Pool
from tqdm import tqdm

tasks = xrange(1000)
pbar = tqdm(total=len(tasks))
pool = Pool(12)

providers = pool.imap(buildOneProvider, tasks)

#rs = p.map_async(do_work, xrange(num_tasks))
#pool.close() # No more work
#pool.join()
#pbar.close()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas 

10
0
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
510
520
530
540
550
560
570
580
590
600
610
620
630
640
650
660
670
680
690
700
710
720
730
740
750
760
770
780
790
800
810
820
830
840
850
860
870
880
890
900
910
920
940
930
950
960
970
980
990


Process PoolWorker-39:
Process PoolWorker-47:
Process PoolWorker-38:
Process PoolWorker-43:
Process PoolWorker-45:
Process PoolWorker-46:
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
KeyboardInterrupt
KeyboardInterrupt
    racquire()
    racquire()
    racquire()
    racquire()
    racquire()
    racquire()
    racquire()
    task = get()
    racquire()
    task = get()
    task = get()
    task = get()
    task = get()
    task = get()
    task = get()
    task = get()
  File "/anaconda2/lib/python2.7/multiprocessing/queues.py", line 374, in get
  File "/anaconda2/lib/python2.7/multiprocessing/queues.py", line 374, in get
  File "/anaconda2/

In [None]:
providers.

In [None]:
providerID = 1
providerFields = []
allVisits = pd.DataFrame(data=None, columns=Provider.visitColumns.extend("ProviderID"))
allScreenings = pd.DataFrame(data=None, columns=Provider.screeningColumns.extend("ProviderID"))

for provider in providers:
    providerFields.append({"providerID" : providerID, "hoursWorked" : provider.hoursWorkedPerYear, "proportionClinical" : provider.proportionClinicalFaceTime})
    providerID += 1

allProviders = pd.DataFrame(providerFields)
allProviders.to_excel("allProviders.xlsx")

for provider in providers:
    allVisits = pd.concat([allVisits, provider.visits])
allVisits.to_excel("allVisits.xlsx")

for provider in providers:
    allScreenings = pd.concat([allScreenings, provider.screeningServices])
allScreenings.to_excel("allScreenings.xlsx")


Process PoolWorker-65:
Process PoolWorker-57:
Process PoolWorker-56:
Process PoolWorker-59:
Process PoolWorker-72:
Process PoolWorker-76:
Process PoolWorker-73:
Process PoolWorker-55:
Process PoolWorker-53:
Process PoolWorker-54:
Process PoolWorker-82:
Process PoolWorker-64:
Process PoolWorker-62:
Process PoolWorker-60:
Process PoolWorker-61:
Process PoolWorker-78:
Process PoolWorker-67:
Process PoolWorker-69:
Process PoolWorker-50:
Process PoolWorker-75:
Process PoolWorker-66:
Process PoolWorker-74:
Process PoolWorker-79:
Process PoolWorker-63:
Process PoolWorker-49:
Process PoolWorker-77:
Process PoolWorker-58:
Process PoolWorker-51:
Process PoolWorker-68:
Process PoolWorker-52:
Process PoolWorker-71:
Process PoolWorker-70:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most rec

  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/proce

  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
    self._target(*self._args, **self._kwargs)
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker

    racquire()
    racquire()
    self._target(*self._args, **self._kwargs)
    racquire()
    racquire()
    racquire()
    racquire()
    return recv()
    racquire()
    self._target(*self._args, **self._kwargs)
    racquire()
    racquire()
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
    self._target(*self._args, **self._kwargs)
    racquire()
    racquire()
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
    return recv()
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/

  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/anaconda2/lib/python2.7/multiprocessing/proce

    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in w

  File "/anaconda2/lib/python2.7/multiprocessing/queues.py", line 374, in get
  File "/anaconda2/lib/python2.7/multiprocessing/queues.py", line 374, in get
  File "/anaconda2/lib/python2.7/multiprocessing/queues.py", line 374, in get
  File "/anaconda2/lib/python2.7/multiprocessing/queues.py", line 374, in get
  File "/anaconda2/lib/python2.7/multiprocessing/queues.py", line 374, in get
  File "/anaconda2/lib/python2.7/multiprocessing/queues.py", line 374, in get
  File "/anaconda2/lib/python2.7/multiprocessing/queues.py", line 374, in get
    racquire()
  File "/anaconda2/lib/python2.7/multiprocessing/queues.py", line 374, in get
    racquire()
  File "/anaconda2/lib/python2.7/multiprocessing/queues.py", line 376, in get
  File "/anaconda2/lib/python2.7/multiprocessing/queues.py", line 374, in get
    racquire()
    racquire()
    racquire()
    racquire()
    racquire()
    racquire()
    racquire()
    racquire()
    racquire()
    racquire()
    racquire()
    racquire()
    racqui

## Unit Tests of the Logic of applying screens to visits

In [29]:
import unittest

def generateScreeningRule(name, grade, freq, minAge, maxAge, gender, vrf, bmi, time, smoking, propPop, index):
    return pd.Series({"Service" : name, "Grade" : grade, "Frequency" : freq, "minAge" : minAge, "maxAge" : maxAge, 
                 "Gender" : gender, "VascularRiskFactor" : vrf, "bmiOver" : bmi, "Time" : time, "Smoking" : smoking,
                 "proportionOfPopulationAtRisk" : propPop, "screeningIndex" : index, "timeDelta" : datetime.timedelta(days=(int(365*float(freq[1:]))))})

ageScreeningRule = generateScreeningRule("AgeFilter","A","q1",18,40,"Male",None, None, 2, None, None,  42)
genderScreeningRule = generateScreeningRule("GenderFilter","A","q-1",None,None,"Male",None, None, 2, None, None, 42)
currentSmokerScreeningRule = generateScreeningRule("GenderFilter","A", "q1",None, None,  None,  None, None, 2, "Current",  None, 42)
formerSmokerScreeningRule = generateScreeningRule("formerSmokerFilter","A","q1",None,None,None,None,None,2,"Former",None,42)
neverSmokerScreeningRule = generateScreeningRule("neverSmoker","A","q1",None, None, None, None, None, 2, "Never", None, 42)
hasVFScreeningRule = generateScreeningRule("vfFil","A","q1",None, None,None,1,None,2, None,None,42)
hasNoVFScreeningRule = generateScreeningRule("noVF","A","q1",None,None,None, 0, None, 2, None, None, 42)
bmiScreeningRule = generateScreeningRule("bmi","A","q1",None,None,None,0,30,2,None, None,42)
completeRiskScreeningRule = generateScreeningRule("compRF","A","q1",None,None,None,None, None, 2,None, 1.0, 42)
zeroRiskScreeningRule = generateScreeningRule("zeroRF","A","q1",None,None,None,None, None, 2, None,0.0, 42)
universalOnceScreeningRule = generateScreeningRule("uni","A","q-1", None, None,None,None,None,2,None, None, 42)

baseVisitDate = datetime.datetime.strptime("1/1/2019"  , "%m/%d/%Y")
visit15YearOldMale = pd.Series({'visitDate' : baseVisitDate, 'patientID' : 123, 'year' : 2015, 'age' : 15, 'gender' : 'Male', 
                      'raceEth' : 1, 'bmi' : None, 'smokingStatus' : 1, 'selfReportHtn' : None ,
                      'selfReportHyperlipidemia' : "No", 'selfReportDiabetes' : "No"})

visit19YearOldMale = visit15YearOldMale.copy()
visit19YearOldMale['age'] = 19 
visit39YearOldMale = visit15YearOldMale.copy()
visit39YearOldMale['age'] = 39
visit41YearOldMale = visit15YearOldMale.copy()
visit41YearOldMale['age'] =  41
visit39YearOldFemale = visit39YearOldMale.copy()
visit39YearOldFemale['gender'] = 'Female'
visit15YearOldMaleFormerSmoker= visit15YearOldMale.copy()
visit15YearOldMaleFormerSmoker['smokingStatus'] = 2 # former
visit15YearOldMaleNeverSmoker= visit15YearOldMale.copy()
visit15YearOldMaleNeverSmoker['smokingStatus'] = 0 # never
visit15YearOldMaleHypertension= visit15YearOldMale.copy()
visit15YearOldMaleHypertension['selfReportHtn'] = "Yes"
visit15YearOldMaleHyperlipidemia= visit15YearOldMale.copy()
visit15YearOldMaleHyperlipidemia['selfReportHyperlipidemia'] = "Yes"
visit15YearOldMaleDiabetes= visit15YearOldMale.copy()
visit15YearOldMaleDiabetes['selfReportDiabetes'] = "Yes"
visit15YearOldMaleAllRiskFactors= visit15YearOldMale.copy()
visit15YearOldMaleAllRiskFactors['selfReportDiabetes'] = "Yes"
visit15YearOldMaleAllRiskFactors['selfReportHyperlipidemia'] = "Yes"
visit15YearOldMaleAllRiskFactors['selfReportHtn'] = "Yes"
visit15YearOldMaleLowBMI = visit15YearOldMale.copy()
visit15YearOldMaleLowBMI['bmi'] = 18
visit15YearOldMaleHighBMI = visit15YearOldMale.copy()
visit15YearOldMaleHighBMI['bmi'] = 35

dummyProvider = Provider(panelSize=0)



class TestScreeningRules(unittest.TestCase):   
    def testTimingInterval(self):
        # apply the rule so, that there is a screening visit on 1/1/19
        self.assertIsNotNone(dummyProvider.applyScreeningRuleToVisit(visit19YearOldMale, ageScreeningRule))
        
        visit19YearOldMaleAdvance6Months = visit19YearOldMale.copy()
        visit19YearOldMaleAdvance6Months['visitDate'] = datetime.datetime.strptime("7/1/2019" , "%m/%d/%Y")
        self.assertIsNone( dummyProvider.applyScreeningRuleToVisit(visit19YearOldMaleAdvance6Months, ageScreeningRule))
        
        visit19YearOldMaleAdvance11Months = visit19YearOldMale.copy()
        visit19YearOldMaleAdvance11Months['visitDate'] = datetime.datetime.strptime("12/31/2019" , "%m/%d/%Y")
        self.assertIsNone( dummyProvider.applyScreeningRuleToVisit(visit19YearOldMaleAdvance11Months, ageScreeningRule))
        
        visit19YearOldMaleAdvance13Months = visit19YearOldMale.copy()
        visit19YearOldMaleAdvance13Months['visitDate'] = datetime.datetime.strptime("1/2/2020" , "%m/%d/%Y")

        self.assertIsNotNone( dummyProvider.applyScreeningRuleToVisit(visit19YearOldMaleAdvance13Months, ageScreeningRule))

        visit19YearOldMaleAdvance24Months = visit19YearOldMale.copy()
        visit19YearOldMaleAdvance24Months['visitDate'] = datetime.datetime.strptime("1/2/2021" , "%m/%d/%Y")
        self.assertIsNotNone( dummyProvider.applyScreeningRuleToVisit(visit19YearOldMaleAdvance24Months, ageScreeningRule))
    
    def testTimingOnce(self):
        # apply the rule the first time, and it shoudl go through...
        self.assertIsNotNone(dummyProvider.applyScreeningRuleToVisit(visit19YearOldMale, universalOnceScreeningRule))
        # apply it subsequent and it should fail
        self.assertIsNone(dummyProvider.applyScreeningRuleToVisit(visit19YearOldMale, universalOnceScreeningRule))

    # hard to do this test determinisstically, will just test the extreme probabilities
    def testRiskScreening(self):
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit19YearOldMale, completeRiskScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleAllRiskFactors, completeRiskScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleAllRiskFactors, zeroRiskScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleAllRiskFactors, zeroRiskScreeningRule))
    
    def testBMI(self):
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, bmiScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleLowBMI, bmiScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleHighBMI, bmiScreeningRule))
    
    def testVascularRiskFactorFilter(self):
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, hasNoVFScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, hasVFScreeningRule))

        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleHypertension, hasNoVFScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleHypertension, hasVFScreeningRule))

        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleHyperlipidemia, hasNoVFScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleHyperlipidemia, hasVFScreeningRule))

        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleDiabetes, hasNoVFScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleDiabetes, hasVFScreeningRule))

        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleAllRiskFactors, hasNoVFScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleAllRiskFactors, hasVFScreeningRule))
    
    def testSmokingFilter(self):
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, currentSmokerScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, formerSmokerScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, neverSmokerScreeningRule))

        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleFormerSmoker, currentSmokerScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleFormerSmoker, formerSmokerScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleFormerSmoker, neverSmokerScreeningRule))
        
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleNeverSmoker, currentSmokerScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleNeverSmoker, formerSmokerScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMaleNeverSmoker, neverSmokerScreeningRule))
    
    def testGenderFilter(self):
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, genderScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit19YearOldMale, genderScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit39YearOldMale, genderScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit39YearOldMale, genderScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit39YearOldFemale, genderScreeningRule))
    
    def testAgeFilter(self):
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, ageScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit19YearOldMale, ageScreeningRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit39YearOldMale, ageScreeningRule))
        self.assertIsNone(Provider(0).applyScreeningRuleToVisit(visit41YearOldMale, ageScreeningRule))
        
    # on visual inspection — this didn't seem to be picking up and it shoudl have picked up in almost everybody...
    # it caught that my test cases used "None" while the loaded data used "NaN"
    def testIntimatePartnerVioloence(self):
        #print "**intimate partner"
        intimateParterRule = screeningRules.loc[20,]
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit15YearOldMale, intimateParterRule))
        self.assertIsNotNone(Provider(0).applyScreeningRuleToVisit(visit39YearOldFemale, intimateParterRule))  
        
# this is another probabilistic test — it'll fail on one of these parameters 5% of the time.
class TestProviderParameters(unittest.TestCase):
    def testBaselineProviderPanel(self):
        dummyProvider = Provider()
        self.assertGreater(dummyProvider.panelSize, Parameters.meanPanelSize - Parameters.sdPanelSize* 1.96)
        self.assertLess(dummyProvider.panelSize, Parameters.meanPanelSize + Parameters.sdPanelSize* 1.96)

    def testBaselineProviderClinicalFaceTime(self):
        dummyProvider = Provider()
        self.assertGreater(dummyProvider.proportionClinicalFaceTime, Parameters.meanProportionOfClinicalFaceTime - Parameters.seProportionOfClinicalFaceTime* 1.96)
        self.assertLess(dummyProvider.proportionClinicalFaceTime, Parameters.meanProportionOfClinicalFaceTime + Parameters.seProportionOfClinicalFaceTime* 1.96)

        
class TestVisitGeneration(unittest.TestCase):    
    def testConsistentPanelSizeOverYears(self):
        pop = [visit39YearOldFemale.copy() for x in range(0,100) ]
        df = pd.DataFrame(data=pop)
        df.index = np.arange(1, len(df)+1)
        df['probWeight'] = float(1.0 / len(df))
        dummyProvider = Provider(100, df)
        dummyProvider.advancePanelByYear(1)
        self.assertEquals(len(dummyProvider.panel), 100)
    
    # this test is probabilistic...it shoudl fail about 5% of the time by chance.
    def testGenerateVisitsForHomogeneousPopulation(self):
        pop = [visit39YearOldFemale.copy() for x in range(0,100) ]
        df = pd.DataFrame(data=pop)
        df['probWeight'] = float(1.0 / len(df))
        dummyProvider = Provider(100, df)
        dummyProvider.advancePanelByYear(1)
        
        print len(dummyProvider.panel)
        print len(dummyProvider.visits)
        
        # expect to have 302.7/100 [rate of visits in age/gender band] * 0.51 [propotion to PCP] * 100 (pop size) = 154
        # poisson 95% CI = 130-180...
        self.assertGreater(len(dummyProvider.visits),130)
        self.assertLess(len(dummyProvider.visits),  180)

   
suite = unittest.TestLoader().loadTestsFromTestCase(TestScreeningRules)
suite.addTest(unittest.makeSuite(TestVisitGeneration))
suite.addTest(unittest.makeSuite(TestProviderParameters))
unittest.TextTestRunner(verbosity=2).run(suite)      

testAgeFilter (__main__.TestScreeningRules) ... ok
testBMI (__main__.TestScreeningRules) ... ok
testGenderFilter (__main__.TestScreeningRules) ... ok
testIntimatePartnerVioloence (__main__.TestScreeningRules) ... ok
testRiskScreening (__main__.TestScreeningRules) ... ok
testSmokingFilter (__main__.TestScreeningRules) ... ok
testTimingInterval (__main__.TestScreeningRules) ... ok
testTimingOnce (__main__.TestScreeningRules) ... ok
testVascularRiskFactorFilter (__main__.TestScreeningRules) ... ok
testConsistentPanelSizeOverYears (__main__.TestVisitGeneration) ... ok
testGenerateVisitsForHomogeneousPopulation (__main__.TestVisitGeneration) ... ok
testBaselineProviderClinicalFaceTime (__main__.TestProviderParameters) ... ok
testBaselineProviderPanel (__main__.TestProviderParameters) ... 

100
140


ok

----------------------------------------------------------------------
Ran 13 tests in 14.922s

OK


<unittest.runner.TextTestResult run=13 errors=0 failures=0>

## Some profiling code

In [None]:
from pycallgraph import PyCallGraph
from pycallgraph.output import GraphvizOutput

with PyCallGraph(output=GraphvizOutput()):
    cProfile.run('provider.advancePanelByYear(1)')

In [None]:
import cProfile
cProfile.run('provider.advancePanelByYear(1)')

In [197]:
np.random.normal(5,2,1)

array([4.25975382])