In [1]:
import math
import numpy as np
import os
import pandas as pd
from itertools import islice

# CLASSES

  ## main data
   ### model.df['mof','sf','mofbp','sfbp']
   ### model.dates.df_dates['dow','block','date','wkend','day','ts']
   ### model.dates.mat
   
how do we deal with an imperfect model? it's much harder to troubleshoot when all the pieces are laid out. It requires systematically looking for viable trades. 
- identify residents with not enough shifts. They need mores shifts. Have them "search" for shifts occupied by people with >8 shifts and try to take them over (just don't take buddy shifts)
- just don't trade buddy shifts
- identify calls that could not be filled

In [356]:
# access date-related information]
os.chdir("/users/jkgerdts/Documents/")
residents = pd.read_csv('residents.csv')
assignments = np.genfromtxt('assignments.csv',delimiter=",",skip_header=1)
rotations = pd.read_csv('rotations.csv')

rotations.set_index('rotID',inplace=True)
residents.set_index('resID',inplace=True)

# for a specific resident, can make the rotations DF


class selector:
    
    def __init__(self,arr):
        self.arr = arr
        self.index = np.array([i for i in range(len(arr))])
        self.counts = np.array([0 for i in range(len(arr))])
        self.shuffle()
        self.s=0
        
    def shuffle(self):
        it = np.array([])
        for i in range(max(self.counts)+1):
            if i in self.counts:
                x = self.index[self.counts==i]
                np.random.shuffle(x)
                it = np.concatenate([it,x])
        self.it = iter(list(it.astype(int)))
        
    def sel(self):
        try:
            self.s = next(self.it)
            return self.arr[self.s]
        except StopIteration:
            self.shuffle()
            self.s = next(self.it)
            return self.arr[self.s]
        
    def count(self):
        self.counts[self.s]=self.counts[self.s]+1
        

class res:
    def __init__(self,rotations,assignments,residents,resID):
        ### PROPERTIES
        # rotations is a pandas dataframe with columns:
            # block (index), rotID, name, weight, inpatient(bool)
        # assignments is a 33x24 numerical matrix

        
        self.resID=resID
        self.name = residents.loc[resID]['name']
        self.junior=residents.loc[resID]['junior']
        self.year=residents.loc[resID]['year']
        self.block = rotations.loc[[x for x in assignments[resID-1,:]]].copy()
        self.block['block']=np.array(range(1,25)) # sets block integer for indexing
        self.block = self.block.reset_index() # this step allows keeping the rotID column
        self.block.set_index('block',inplace=True)
            # block (index), rotID, name, weight, inpatient
        self.dates = dates()
        self.calls = pd.DataFrame(columns=['dateID','site','buddy','dow','month','wkend','block'])
        self.requests = [] # can add day off requests as a customization
        
        
    def check_date(self,dateID,site):
        # returns boolean to accept or reject date and reason for failure
        
        block = self.dates.get_block(dateID)
        wkend = self.dates.get_wkend(dateID)
        dow = self.dates.get_dow(dateID)
        weight = self.block.loc[block]['weight']
        month = self.dates.get_month(dateID)
        

            # acceptable date based on "weight" in block table
        if weight==0:
            return (0,'block')

            # two calls same_weekend
        if wkend in self.calls['wkend'].values:
            return (0,'same_wkend')
        
            # two weeks in a row
        if wkend-1 in self.calls['wkend'].values:
            return (0,'prior_wkend')
        
        if wkend+1 in self.calls['wkend'].values:
            return (0, 'next_wkend')
        
            # repeat same DOW in a month
        if len(self.calls[(self.calls['dow']==dow) & (self.calls['month']==month)])>0:
            return (0, 'same_dow_month')
        
            # repeat same site in a month
        if len(self.calls[(self.calls['site']==site) & (self.calls['month']==month)])>0:
            return (0, 'same_site_month')
        
            # repeat same site/DOW as buddy shifts
        if self.isbuddy(site):
            if len(self.calls[(self.calls['site']==site) & (self.calls['dow']==dow) & (self.calls['buddy']==True)])>0:
                return (0, 'same_site_dow_buddy')
        
            # repeat same site/DOW > 3x for year
        if len(self.calls[(self.calls['site']==site) & (self.calls['dow']==dow)])>2:
            return (0, 'same_site_dow_year_3')
        
            # repeat same DOW > 5x for year
        if len(self.calls[self.calls['dow']==dow])>4:
            return (0, 'same_dow_year_5')
        
            # repeat same site > 5x for year
        if len(self.calls[self.calls['site']==site])>4:
            return (0, 'same_site_year_5')
        
            # more than 9 calls for year
        if len(self.calls)>8:
            return (0, 'total_calls_9')
        
            # last day before inpatient
        last = {(k+1):v for k,v in enumerate([15,14,15,15,15,15,15,15,15,15,15,15])}
        if last[month]==self.dates.get_ts(dateID).day:
            if block<24:
                if self.block.loc[block+1]['inpatient']:
                    return (0, 'last_day_before_inpt')
                
            # late_buddy
        if self.junior:
            if self.dates.get_ts(dateID) > pd.to_datetime('3/1/20'):
                if self.isbuddy(site):
                    return (0, 'late_buddy')
                
            # early solo
        if self.junior:
            if not self.isbuddy(site):
                if self.isbuddy({'sf':'mof','mof':'sf'}[site]):
                    return (0, 'early_solo')
                
            # birthday ##############################################
            
        return (weight, 'N/A') # no disqualifiers found
    
    def isbuddy(self,site):
        # returns True if next shift would be buddy for a given site
        return self.junior and len(self.calls[self.calls['site']==site])<2

    def set_call(self,dateID,site):
        
        block = self.dates.get_block(dateID)
        wkend = self.dates.get_wkend(dateID)
        dow = self.dates.get_dow(dateID)
        weight = self.block.loc[block]['weight']
        buddy = self.isbuddy(site)
        month = self.dates.get_month(dateID)
        
        self.calls = self.calls.append({'dateID':dateID,'site':site,'buddy':buddy,'dow':dow,
                           'month':month,'wkend':wkend,'block':block},ignore_index=True)
        return buddy

    
    def disp(self):
        print('Resident: {}'.format(self.name))
        print('  junior={}'.format(self.junior))
        print('  year={}'.format(self.year))
        print('  isbuddy moffitt: {} sfgh: {}'.format(self.isbuddy('mof'),self.isbuddy('sf')))
        print(self.block)
        print(self.calls)
        


        
    class dates:
        def __init__(self):
            os.chdir("/users/jkgerdts/Documents/")
            # dates is a matrix of dates, day of week, timestamps, week of year, weekend of year
            df_dates = pd.read_csv('dates.csv')
            df_dates['ts']=pd.to_datetime(df_dates['date'])
            df_dates['month']=[x.month for x in df_dates['ts']]
            # mat is matrix of acceptability generated outside
            mat = np.genfromtxt('mat.csv',delimiter=",",skip_header=1)
            assignments = np.genfromtxt('assignments.csv',delimiter=",",skip_header=1)
            rotations = pd.read_csv('rotations.csv')

            self.df_dates=df_dates
            self.mat = mat
            self.assignments=assignments
            self.rotations=rotations

        def get_val(self,resID,dateID):
            return self.mat[dateID-1,resID-1]

        def get_next(self,resID,dateID):
            if dateID==104:
                return 1
            else:
                return self.mat[dateID,resID-1]

        def get_block(self,dateID):
            return self.df_dates['block'][dateID-1]

        def get_wkend(self,dateID):
            return self.df_dates['wkend'][dateID-1]

        def get_month(self,dateID):
            return self.df_dates['month'][dateID-1]

        def get_ts(self,dateID):
            return self.df_dates['ts'][dateID-1]

        def get_dow(self,dateID):
            return self.df_dates['dow'][dateID-1]

        def get_assignment(self,resID,dateID):
            return self.assignments[resID-1,self.get_block(dateID)-1]

        def get_inpatient(self,resID,dateID):
            if dateID>104:
                return False
            else:
                return self.rotations['inpatient'][self.get_assignment(resID,dateID)-1]

In [166]:
R = res(rotations,assignments,residents,1)

In [360]:
juniors = selector([res(rotations,assignments,residents,i) for i in range(1,14)])
seniors = selector([res(rotations,assignments,residents,i) for i in range(14,34)])

log = pd.DataFrame(columns=['filled','dateID','site','resident','reason'])

def tryfill(dateID,site,pool,n):
    log = pd.DataFrame(columns=['filled','dateID','site','resident','reason'])
    filled = False
    buddy = False
    for i in range(40):
        R = pool.sel()
        check = R.check_date(dateID,site)
        if check[0]>0:
            pool.count()
            buddy = R.set_call(dateID,site)
            filled = True
            log = log.append({'filled':True,'dateID':dateID,'site':site,'resident':R.name,'reason':check[1]},ignore_index=True)
            break
        else:
            log = log.append({'filled':False,'dateID':dateID,'site':site,'resident':R.name,'reason':check[1]},ignore_index=True)
            #log.append({'dateID':dateID,'site':site,'resident':R.name,'reason':check[1]},ignore_index=True)
    return (filled,buddy,log)
            

for dateID in range(1,105):
    for site in ['mof','sf']:
        result = tryfill(dateID,site,juniors,28)
        log = pd.concat([log,result[2]],0)
        if result[0]:
            if result[1]:
                #buddyshifts.append((dateID,site))
                result = tryfill(dateID,site,seniors,38)
                log = pd.concat([log,result[2]],0)
        else:
            result = tryfill(dateID,site,seniors,38)
            log = pd.concat([log,result[2]],0)
            if not result[0]:
                print('failed to fill date {}'.format(dateID))
        

failed to fill date 91


In [None]:
with pd.option_context('display.max_rows', None):
    print(log[log['dateID']==91])

In [359]:
log.to_csv('log.csv')

In [368]:
juniors.shuffle()
for i in range(len(juniors.arr)):
    juniors.sel().disp()

Resident: Mehta_N
  junior=True
  year=R2
  isbuddy moffitt: False sfgh: False
       rotID           name  weight  inpatient
block                                         
1          5  Physicianship    0.00       True
2          5  Physicianship    0.00       True
3          9             NV    0.00       True
4          9             NV    0.00       True
5         13            Vac    0.00       True
6         34    QI Training    1.00      False
7          8         UCC Jr    0.00       True
8          4       VA NPhys    0.75      False
9          7          UC Jr    0.00       True
10         7          UC Jr    0.00       True
11        14       VA Rehab    0.75      False
12         2           Head    1.00      False
13        15         NSvc-M    0.00       True
14        10  Selective-TBD    1.00      False
15         6         ZSF Jr    0.00       True
16         6         ZSF Jr    0.00       True
17        26      UC Clinic    1.00      False
18        10  Selective-TBD 

In [None]:
seniors.shuffle()
for i in range(len(seniors.arr)):
    seniors.sel().disp()

In [372]:
R = juniors.arr[9]
for col in ['mof','sf']:
    for dateID in range(1,105):
        if R.check_date(dateID,site)[0]>0:
            print('date: {}   site: {}'.format(dateID,site))

date: 51   site: sf
date: 51   site: sf


Unnamed: 0,mof,sf,mofbp,sfbp
0,0.0,1.0,1.0,1.0
1,0.0,0.0,0.0,1.0
2,0.0,0.0,0.75,0.0
3,0.0,0.0,0.0,0.0
4,0.75,0.0,0.75,1.0


Unnamed: 0,mof,sf,mofbp,sfbp
0,22,23,0,0
1,2,6,24,30
2,10,2,18,22
3,8,8,28,27
4,9,16,32,0


# key functions

In [474]:
# shuffle positions ------------



    
    


In [475]:
D = dates()

In [477]:
D.get_val(4,1)

0.75

# 

np.cumsum([1,2,3,4])

In [423]:
# buddy call after november 0.1


#solo before 4 buddies 0
# lastbuddy, firstsolo
for x in range(1,14):
    lastbuddy = max([[j for j,i in enumerate(mof) if i==x][1],[j for j,i in enumerate(sf) if i==x][1]])
    firstsolo = min([[j for j,i in enumerate(mof) if i==x][2],[j for j,i in enumerate(sf) if i==x][2]])
    print('{}, {}'.format(lastbuddy,firstsolo))
    if firstsolo<lastbuddy:
        print('--problem: {}'.format(x))
    if lastbuddy>44:
        print('--lastbuddy > 44')


#two weekends in a row 0.1
    #twice same weekend 0
    # given column wkendID
wkendID = [i for i in range(104)]

for x in range(1,34):
    wkends = []
    for i,j in enumerate(mof):
        if j==x:
            wkends.append(wkendID[i])
    for i,j in enumerate(sf):
        if j==x:
            wkends.append(wkendID[i])
    for i,j in enumerate(mofbp):
        if j==x:
            wkends.append(wkendID[i])
    for i,j in enumerate(sfbp):
        if j==x:
            wkends.append(wkendID[i])
    wkends.sort()
    deltas = [j-i for i,j in zip(wkends[0:-1],wkends[1:])]
    print(deltas)
    if 0 in deltas:
        print('--two shifts same weekend')
    if 1 in deltas:
        print('--two weekends in a row...')

#saturday before starting night float 0


#one day before inpatient 0


#3 calls one month 0


# two calls same block 0


#same site twice same month 0.1


#same DOW 2x same month 0.5

[6, 10, 7, 7, 1, 9, 18]
--two weekends in a row...
[1, 24, 4, 10, 12, 6, 14]
--two weekends in a row...
[1, 29, 5, 7, 11, 10, 14]
--two weekends in a row...
[9, 14, 3, 37, 9, 10, 1]
--two weekends in a row...
[20, 11, 6, 7, 9, 10, 19]
[2, 6, 19, 21, 6, 9, 13]
[5, 16, 7, 21, 23, 10, 0]
--two shifts same weekend
[27, 4, 3, 26, 5, 3, 1]
--two weekends in a row...
[12, 4, 3, 26, 4, 26, 18]
[1, 6, 14, 46, 9, 1, 15]
--two weekends in a row...
[3, 2, 13, 16, 10, 10, 23]
[14, 30, 0, 3, 11, 7, 25]
--two shifts same weekend
[45, 0, 11, 8, 8, 18, 0]
--two shifts same weekend
[20, 25, 1, 7, 1, 11, 10]
--two weekends in a row...
[18, 7, 6, 2, 4, 4, 6]
[5, 5, 20, 24, 3, 7, 32]
[10, 14, 3, 34, 0, 14, 4]
--two shifts same weekend
[6, 4, 10, 11, 4, 19, 3]
[13, 1, 8, 3, 7, 12, 26]
--two weekends in a row...
[3, 19, 5, 13, 14, 32, 6]
[12, 27, 0, 20, 8, 2, 31]
--two shifts same weekend
[3, 7, 27, 1, 26, 15, 10]
--two weekends in a row...
[17, 2, 2, 27, 4, 37, 6]
[4, 12, 3, 33, 11, 7, 2]
[16, 2, 14, 1, 1, 

In [411]:
[i-j for i,j in zip(wkends[0:],wkends[:-1])]

[0, 0, 0, 0, 0, 0]

# Import data from CSV

In [433]:
ts = df_dates['ts'][0]

In [440]:
# if last day of block and going to inpatient
if all([ts.month==2,ts.day==14]) or all([ts.month not in [2],ts.day==15]):
    

7

In [443]:
any([ts.month==2,ts.day==14])

False

# extracting features

# OLD

In [None]:

class reslist:
    def __init__(self,low,high,reps):
        self.arr = []
        for j in range(reps):
            for i in range(low,high+1):
                self.arr.append(i)
        self.discard = []
        self.returncount = 0
        self.arr = np.array(self.arr)
        np.random.shuffle(self.arr)
        self.arr = list(self.arr)
        
    def shuffle(self):
        self.arr = np.array(self.arr)
        np.random.shuffle(self.arr)
        self.arr = list(self.arr)
        
    def sel(self):
        if len(self.arr)<1:
            self.arr=self.discard[:]
            self.discard = []
            self.returncount=self.returncount+1
        s = self.arr.pop()
        return s
    
    def ret(self,value):
        self.discard.append(value)
        
    def report(self):
        print('len(arr)={}, len(discard)={}, returncount={}'.format(len(self.arr),len(self.discard),self.returncount))
        
seniors = reslist(14,33,8)
juniors = reslist(1,13,8)

In [480]:
df_dates

Unnamed: 0,date,dow,block,wkend,day,ts
0,July 5 2019,6,1,1,1,2019-07-05
1,July 6 2019,7,1,1,2,2019-07-06
2,July 12 2019,6,1,2,3,2019-07-12
3,July 13 2019,7,1,2,4,2019-07-13
4,July 19 2019,6,2,3,5,2019-07-19
5,July 20 2019,7,2,3,6,2019-07-20
6,July 26 2019,6,2,4,7,2019-07-26
7,July 27 2019,7,2,4,8,2019-07-27
8,August 2 2019,6,3,5,9,2019-08-02
9,August 3 2019,7,3,5,10,2019-08-03


In [488]:
df

Unnamed: 0,mof,sf,mofbp,sfbp
0,16,13,0,28
1,5,8,24,33
2,32,28,0,0
3,13,20,17,0
4,11,1,30,20
5,7,15,18,0
6,15,4,0,24
7,14,1,0,15
8,12,11,18,33
9,24,22,0,0


In [625]:
df['mof','sf','mofbp'][4:5].values.flatten()

SyntaxError: invalid syntax (<ipython-input-625-74824a0fa0e8>, line 1)

In [614]:
ts = M.dates.get_ts(1)

In [615]:
ts.day

5

In [627]:
M.df[(M.dates.df_dates['dow']==6) & (M.dates.df_dates['day']<20)]

Unnamed: 0,mof,sf,mofbp,sfbp
0,4,33,26,0
2,15,25,0,0
4,7,7,19,15
6,11,19,25,0
8,7,32,25,0
10,25,32,0,0
12,31,14,0,0
14,32,29,0,0
16,18,7,0,0
18,15,17,0,0


In [640]:
next((x for i,x in enumerate(range(5)) if x in [1,3]))

TypeError: 'int' object is not subscriptable

In [648]:
from itertools import islice
[i for i in islice((x for i,x in enumerate(range(5))),3)]

[0, 1, 2]

[1, 3, 5]

In [664]:
M.dates.df_dates['dow'][[j for j in islice((i for i,x in enumerate(M.df['mof'].values) if x==1),2)]].values

array([7, 6])

In [665]:
arr = np.array([6,7])

In [666]:
np.unique(arr)

array([6, 7])

In [667]:
arr[0]==arr[1]

False

In [683]:
rotations = pd.read_csv('rotations.csv')
rotations

Unnamed: 0,rotID,name,weight,inpatient
0,1,ZSF Cons,0.0,True
1,2,Head,1.0,False
2,3,VA Jr,0.0,True
3,4,VA NPhys,0.75,False
4,5,Physicianship,0.0,True
5,6,ZSF Jr,0.0,True
6,7,UC Jr,0.0,True
7,8,UCC Jr,0.0,True
8,9,NV,0.0,True
9,10,Selective-TBD,1.0,False


In [682]:
rotations['inpatient'][rotations['rotID']>4].values

array([ True,  True,  True,  True,  True, False,  True,  True,  True,
       False,  True,  True, False,  True,  True, False,  True, False,
       False, False, False, False, False, False, False, False,  True,
       False, False, False, False,  True,  True, False, False, False,
       False])

In [688]:
test = M.df.copy()

In [690]:
test[:]=0

In [692]:
M.df

Unnamed: 0,mof,sf,mofbp,sfbp
0,25,6,0,16
1,31,11,0,18
2,21,3,0,20
3,18,9,0,15
4,1,1,30,24
5,8,20,25,0
6,10,13,19,27
7,21,17,0,0
8,26,25,0,0
9,4,30,26,0


In [746]:
M.df[:][M.dates.df_dates['dow']==7]

Unnamed: 0,mof,sf,mofbp,sfbp
1,2,4,24,19
3,4,13,14,19
5,11,17,29,0
7,19,12,0,26
9,14,26,0,0
11,32,4,0,22
13,9,5,22,17
15,6,25,15,0
17,31,11,0,25
19,1,1,14,27


In [750]:
sum([x==1 for x in M.df[:][M.dates.df_dates['dow']==7].values.flatten()])

2

In [765]:
test = [0,1]
test[::-1]

[1, 0]

In [767]:
M.df['mof'][M.dates.df_dates['wkend']==2]

2    28
3     8
Name: mof, dtype: int64

In [768]:
vals = M.df['mof'][M.dates.df_dates['wkend']==2].values
M.df['mof'][M.dates.df_dates['wkend']==2]=vals[::-1]

In [769]:
M.df['mof'][M.dates.df_dates['wkend']==2]

2     8
3    28
Name: mof, dtype: int64

In [773]:
counter = iter(range(100))

In [778]:
next(counter)

2

AttributeError: 'numpy.ndarray' object has no attribute 'random'

In [954]:
missing = []
missing.append(('test',1))

In [955]:
missing

[('test', 1)]

In [957]:
missing[0][1]

1

In [1059]:
D.rotations


Unnamed: 0,rotID,name,weight,inpatient
0,1,ZSF Cons,0.0,True
1,2,Head,1.0,False
2,3,VA Jr,0.0,True
3,4,VA NPhys,0.75,False
4,5,Physicianship,0.0,True
5,6,ZSF Jr,0.0,True
6,7,UC Jr,0.0,True
7,8,UCC Jr,0.0,True
8,9,NV,0.0,True
9,10,Selective-TBD,1.0,False


<bound method NDFrame.keys of     resID              name  junior year
0       1        Adjepong_K    True   R2
1       2      Breithaupt_A    True   R2
2       3          Chiang_S    True   R2
3       4            Diaz_M    True   R2
4       5         Dilwali_S    True   R2
5       6         Harnenz_Z    True   R2
6       7           Hines_H    True   R2
7       8         Kaufman_S    True   R2
8       9         Mahmood_N    True   R2
9      10           Mehta_N    True   R2
10     11         Moseley_C    True   R2
11     12             Pet_D    True   R2
12     13          Vassar_R    True   R2
13     14        Goslinga_J   False   R3
14     15          Holmes_B   False   R3
15     16    Lane-Donovan_C   False   R3
16     17       Lindquist_B   False   R3
17     18          Martin_P   False   R3
18     19         Mefford_A   False   R3
19     20        Nylander_A   False   R3
20     21       Parikshak_N   False   R3
21     22          Sriram_S   False   R3
22     23  Zahed Kargaran_H

In [4]:
os.chdir("/users/jkgerdts/Documents/")
residents = pd.read_csv('residents.csv')
assignments = np.genfromtxt('assignments.csv',delimiter=",",skip_header=1)
rotations = pd.read_csv('rotations.csv')
rotations.set_index('rotID',inplace=True)
residents.set_index('resID',inplace=True)


In [38]:

test

Unnamed: 0_level_0,rotID,name,weight,inpatient
block,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2,Head,1.0,False
2,1,ZSF Cons,0.0,True
3,4,VA NPhys,0.75,False
4,2,Head,1.0,False
5,7,UC Jr,0.0,True
6,7,UC Jr,0.0,True
7,3,VA Jr,0.0,True
8,3,VA Jr,0.0,True
9,29,Psych,1.0,False
10,29,Psych,1.0,False


In [49]:
{'dateID':4,'site':4,'buddy':4,
     'dow':4,'month':4,
     'wkend':4,'block':4}

{'dateID': 4,
 'site': 4,
 'buddy': 4,
 'dow': 4,
 'month': 4,
 'wkend': 4,
 'block': 4}

Unnamed: 0,dateID,site,buddy,dow,month,wkend,block
0,1,mof,True,6,5,22,7


In [56]:
len(residents[(residents['junior']==False) & (residents['year']=='R2')])

0

'sf'

In [73]:
format('test{}test{}'.format(R.isbuddy('mof'),R.isbuddy('sf')))

'testTruetestTrue'

In [None]:
class model:
    def __init__(self,dates):
        self.dates=dates
        
        ############ another version:
        mof = []
        sf = []
        buddy = []
        remainder = []
        for i in range(1,5):
            for j in range(1,14):
                mof.append(j)
                sf.append(j)
        for i in range(1,3):
            for j in range(14,34):
                mof.append(j)
                sf.append(j)
        for i in range(1,3):
            for j in range(14,34):
                buddy.append(j)

        for j in range(2):
            remainder=np.array([i for i in range(14,34)])
            np.random.shuffle(remainder)
            remainder = list(remainder)
            r = iter(remainder)
            for i in range(6):
                mof.append(next(r))
                sf.append(next(r))
                buddy.append(next(r))


        # shuffle all:
        def shuffle(lst):
            n = np.array(lst)
            np.random.shuffle(n)
            return list(n)

        mof = shuffle(mof)
        sf = shuffle(sf)
        buddy= shuffle(buddy)
        mofb = buddy[:26]
        sfb = buddy[26:]

        # pad the buddy lists with zeros
        mof_counters={}
        for i in range(1,14):
            mof_counters[i]=0
        sf_counters={}
        for i in range(1,14):
            sf_counters[i]=0

        mofbp = []
        sfbp = []
        for i in range(104):
            if mof[i]<14:
                if mof_counters[mof[i]]<2:
                    mofbp.append(mofb.pop())
                    mof_counters[mof[i]]=mof_counters[mof[i]]+1
                else:
                    mofbp.append(0)
            else:
                mofbp.append(0)
            if sf[i]<14:
                if sf_counters[sf[i]]<2:
                    sfbp.append(sfb.pop())
                    sf_counters[sf[i]]=sf_counters[sf[i]]+1
                else:
                    sfbp.append(0)
            else:
                sfbp.append(0)

        final = numpy.concatenate([[mof],[sf],[mofbp],[sfbp]])

        df = pd.DataFrame(final.transpose(),columns=['mof','sf','mofbp','sfbp'])
        
        self.df = df
        
        
    def count_block(self,resID,dateID):
        return sum([x==resID for x in self.df[:][self.dates.df_dates['block']==self.dates.get_block(dateID)].values.flatten()])
            
    def count_wkend(self,resID,dateID):
        return sum([x==resID for x in self.df[:][self.dates.df_dates['wkend']==self.dates.get_wkend(dateID)].values.flatten()])
        
    def prior_wkend(self,resID,dateID):
        # returns boolean indicating whether resident worked prior weekend
        if self.dates.get_wkend(dateID)==1:
            return False
        else:
            return sum([x==resID for x in self.df[:][self.dates.df_dates['wkend']==self.dates.get_wkend(dateID)-1].values.flatten()])>0
        
    def latebuddy(self,resID,dateID,col):
        if resID < 15 and self.dates.get_ts(dateID) > pd.to_datetime('11/1/19'):

            # ensure that there are indeed 14 R2s next year ##################################################

            # if R2 and it's November or later, ensure there have been at least 2 prior shifts
            return 2>sum([x==resID for x in self.df[col][self.dates.df_dates['day']<dateID].values])
        else:
            return False
        
    def earlysolo(self,resID,dateID,col):
        # if junior and specified date is solo, check to see if there have been at least two prior calls at other site
        if resID < 15 and 2<=sum([x==resID for x in self.df[col][self.dates.df_dates['day']<dateID].values]) and col in ['mof','sf']:
            othercol = {'mof':'sf','sf':'mof'}[col]
            return 2>sum([x==resID for x in self.df[othercol][self.dates.df_dates['day']<dateID].values])
        else:
            return False
        
    ############ last day before inpatient
    def day_before_inpt(self,resID,dateID):
        ###################
        last = {(k+1):v for k,v in enumerate([15,14,15,15,15,15,15,15,15,15,15,15])}
        if last[self.dates.get_month(dateID)]==self.dates.get_ts(dateID).day:
            return self.dates.get_inpatient(resID,dateID+1)
        else:
            return False
            
    
    ###############################################
    # still need to balance DOW
    def dow_issue_mat(self):
        scores = self.df.copy()
        scores[:]=1.0
        for col in ['mof','sf','mofbp','sfbp']:
            for row in range(104):
                dateID = row+1
                resID = self.df[col][row]
                if resID>0:
                    scores[col][row]=self.check_dow(resID,dateID,col)
        return scores
        
        # create a boolean matrix of DOW issues
            # 1: >5 of a given DOW for the year
            # 2: >1 DOW in same month
            # 3: both buddy shifts at a site are same DOW

    
    def check_dow(self,resID,dateID,col):
        dow = self.dates.get_dow(dateID)
        month=self.dates.get_month(dateID)
        countyear = sum([x==resID for x in self.df[:][self.dates.df_dates['dow']==dow].values.flatten()])
        monthsame = 1<sum([x==resID for x in self.df[:][(self.dates.df_dates['dow']==dow) and (self.dates.df_dates['month']==month)].values.flatten()])
        if resID>14:
            buddysame = False
        else:
            # filter by 
            sel = [j for j in islice((i for i,x in enumerate(self.df[col].values) if x==resID),2)]
            buddy = self.dates.df_dates['dow'][sel].values
            buddysame = 1<len(np.unique(buddy))
        if buddysame:
            #print('buddysame')
            return 0
        else:
            if any([monthsame,countyear>5]):
                return 0.1
            else:
                return 1
                 
    def dow_swap(self,wkend,col):
        vals = self.df[col][self.dates.df_dates['wkend']==wkend].values
        self.df[col][self.dates.df_dates['wkend']==wkend]=vals[::-1]
        
    
    def multi_dow_swap(self):
        dowmat = self.dow_issue_mat()
        d = {vu:sum([v==vu for v in dowmat.values.flatten()]) for vu in np.unique(dowmat.values.flatten())}
        print(d)
        imperfect = sum([v<1 for v in dowmat.values.flatten()])
        
            
        for col in ['mof','sf','mofbp','sfbp']:
            for row in range(104):
                wkend = self.dates.df_dates['wkend'][row]
                if dowmat[col][row]==0:
                    if np.random.rand()<1.0/(imperfect/4.0):
                        print('swap')
                        self.dow_swap(wkend,col)
                elif dowmat[col][row]<0.2:
                    if np.random.rand()<1.0/(imperfect/2.0):
                        print('swap')
                        self.dow_swap(wkend,col)
                elif dowmat[col][row]<1:
                    if np.random.rand()<1.0/imperfect:
                        print('swap')
                        self.dow_swap(wkend,col)
                        
        

        
    def score_matrix(self):
        ### create a matrix of scores for each current assignment
        #scores = pd.DataFrame(columns=['mof','sf','mofbp','sfbp'],)
        categories = ['countblock','countwkend','priorwkend','latebuddy','earlysolo','daybefore','other']
        catdic = {k:0 for k in categories}
        scores = self.df.copy()
        scores[:]=1.0
        for col in ['mof','sf','mofbp','sfbp']:
            for row in range(104):
                dateID = row+1
                resID = int(self.df[col][row])
                if resID>0:
                    #print('resID={}, dateID={}, row={}, col={}'.format(resID,dateID,row,col))
                    score = self.dates.mat[row,resID-1]
                    # *** here enter additional filters to adjust score
                    factors = [self.count_block(resID,dateID)>1]
                    factors.append(self.count_wkend(resID,dateID)>1)
                    factors.append(self.prior_wkend(resID,dateID))
                    factors.append(self.latebuddy(resID,dateID,col))
                    factors.append(self.earlysolo(resID,dateID,col))
                    factors.append(self.day_before_inpt(resID,dateID))
                    #print(factors)
                    if any(factors):
                        score = 0
                        for i in range(len(factors)):
                            if factors[i]:
                                catdic[categories[i]]=catdic[categories[i]]+1
                    scores[col][row]=score
        print(catdic)
        return scores
    
    def check_single(self,col,row):
        categories = ['countblock','countwkend','priorwkend','latebuddy','earlysolo','daybefore','other']
        dateID = row+1
        resID = int(self.df[col][row])
        if resID>0:
            score = self.dates.mat[row,resID-1]
            factors = [self.count_block(resID,dateID)>1]
            factors.append(self.count_wkend(resID,dateID)>1)
            factors.append(self.prior_wkend(resID,dateID))
            factors.append(self.latebuddy(resID,dateID,col))
            factors.append(self.earlysolo(resID,dateID,col))
            factors.append(self.day_before_inpt(resID,dateID))
            #print(factors)
            if any(factors):
                score = 0
                for i in range(len(factors)):
                    if factors[i]:
                        catdic[categories[i]]=catdic[categories[i]]+1
    
    def shuffle(self,verbose):
        counts = {0:0,0.2:0,0.5:0}
        scores = self.score_matrix()
        for col in ['mof','sf','mofbp','sfbp']:
            sel = []
            for row in range(104):
                if scores[col][row]==0:
                    counts[0]=counts[0]+1
                    if np.random.rand()>0.5:
                        sel.append(row)
                elif scores[col][row]<0.25:
                    counts[0.2]=counts[0.2]+1
                    if np.random.rand()>0.95:
                        sel.append(row)
                elif scores[col][row]<0.8:
                    counts[0.5]=counts[0.5]+1
                    if np.random.rand()>0.97:
                        sel.append(row)
                    if np.random.rand()>0.99:
                        sel.append(row)

            self.swap(sel,[col for i in range(len(sel))])
            if verbose:
                print('moved {} positions'.format(len(sel)))
        
        # now move buddy shifts to account for movements
        for col,buddycol in {'mof':'mofbp','sf':'sfbp'}.items():
            # have to identify buddy spots once again
            buddyrows = []
            for res in range(14):
                c = 0
                for row in range(104):
                    if self.df[col][row]==res:
                        buddyrows.append(row)
                        c=c+1
                        if c>1:
                            break
            #####

            missing = []
            extra = []
            vals = []
            for row in range(104):
                if self.df[buddycol][row]>0:
                    if row not in buddyrows:
                        extra.append((buddycol,row))
                        vals.append(self.df[buddycol][row])
                        self.df[buddycol][row]=0
                if row in buddyrows:
                    if self.df[buddycol][row]<1:
                        missing.append((buddycol,row))
        if verbose:
            print('---missing---{}'.format(missing))
            print('---extra---{}'.format(extra))
            print(counts)
        vals = np.array(vals)
        np.random.shuffle(vals)
        for i in range(len(vals)):
            self.df[missing[i][0]][missing[i][1]]=vals[i]
            

            
            
            
    def swap(self,rows,cols):
        vals = []
        for i in range(len(rows)):
            vals.append(self.df[cols[i]][rows[i]])
        vals = np.array(vals)
        np.random.shuffle(vals)
        for i in range(len(rows)):
            self.df[cols[i]][rows[i]]=vals[i]



In [198]:
test = selector(['A','B','C'])

In [339]:
test = pd.DataFrame(columns=['A','B'])
test = test.append({'A':1,'B':2},ignore_index=True)
test2 = pd.DataFrame(columns=['A','B'])
test2 = test2.append({'A':3,'B':4},ignore_index=True)

In [340]:
test3 = pd.concat([test,test2],0)

In [341]:
test3

Unnamed: 0,A,B
0,1,2
0,3,4
