In [1]:
import numpy as np
import pandas as pd
from scipy.ndimage.interpolation import shift

In [2]:
class timetable:
    # Arrays for calculating the costs
    cost_multiplier = np.array([0,0,9,9,9,18,18,36,36,235])
    cost_additive = np.array([0,50,50,100,200,200,300,300,400,500])

    cost_multiplier_matrix = np.resize(cost_multiplier, (100,10))
    cost_additive_matrix = np.resize(cost_additive, (100, 10))

    # Init a 100 day by 5000 families by 10 choices matrix as the timetable
    def __init__(self):
        self.timetable = np.zeros((100, 5000, 10))
        
    # Function for assigning a family their specific choice
    def assign_family_by_choice(self, fam_id, choice):
        self.timetable[:,fam_id,:] = np.zeros((100,10))
        day = df.iloc[fam_id, choice + 1]
        self.timetable[day - 1, fam_id, choice] = df.loc[fam_id, 'n_people']
    
    # Function for assigning a family a specific day
    def assign_family_by_day(self, fam_id, day):
        self.timetable[:,fam_id,:] = np.zeros((100,10))
        choice = np.where(df.iloc[fam_id] == day)[0][0]
        self.timetable[day - 1, fam_id, choice] = df.loc[fam_id, 'n_people']
        
    # Function for calculating the preference cost to santa of the current timetable
    def get_preference_cost(self):
        multiple_cost = np.sum(self.timetable, axis=1) * self.cost_multiplier_matrix
        additive_cost = np.count_nonzero(self.timetable, axis=1) * self.cost_additive_matrix
        
        return np.sum(multiple_cost + additive_cost)
    
    # Function for calculating the tax cost to Santa
    def get_tax_cost(self):
        occupancy_days = np.sum(np.sum(self.timetable, axis=1), axis=1)
        prior_day_occupancy = shift(occupancy_days, -1, cval=100)
        
        return np.sum(((occupancy_days-125)/400)*(occupancy_days**(0.5 + (abs(occupancy_days - prior_day_occupancy) / 50))))
    
    # Function to reuturn the total number of people on each day, vector [100]
    def get_day_totals(self):
        return np.sum(np.sum(schedule.timetable, axis = 1), axis = 1)
    
    def get_specific_day(self, day):
        return self.get_day_totals()[day]
    
    # Function for calculating the total cost to Santa
    def get_total_cost(self):
        return self.get_tax_cost() + self.get_preference_cost()

In [3]:
df = pd.read_csv('family_data.csv')
df.head()

Unnamed: 0,family_id,choice_0,choice_1,choice_2,choice_3,choice_4,choice_5,choice_6,choice_7,choice_8,choice_9,n_people
0,0,52,38,12,82,33,75,64,76,10,28,4
1,1,26,4,82,5,11,47,38,6,66,61,4
2,2,100,54,25,12,27,82,10,89,80,33,3
3,3,2,95,1,96,32,6,40,31,9,59,2
4,4,53,1,47,93,26,3,46,16,42,39,4


In [8]:
# Create timetable object
schedule = timetable()

In [38]:
# assign a family one of their choices
schedule.assign_family_by_choice(fam_id=0, choice=3)
print(schedule.timetable[83,0])

[0. 0. 0. 4. 0. 0. 0. 0. 0. 0.]


In [39]:
# calculate the preference cost
schedule.get_preference_cost()

136.0

In [36]:
def test_day_limit(day, choice, npeople):
    if schedule.get_specific_day(day-1) + npeople >= 300:
        return False, choice + 1
    else:
        return True, choice

In [9]:
schedule = timetable()
for key, row in df.iterrows():
    fam_id = row.family_id
    choice = 0
    day = row.iloc[choice + 1]
    npeople = row.n_people
    
    # check current choice is within limits
    acceptable, choice = test_day_limit(day, choice, npeople)
    
    while choice < 8 and not acceptable:
        choice += 1
        day = row.iloc[choice + 1]
        acceptable, choice = test_day_limit(day, choice, npeople)
    else:
        if not acceptable:
            print("No choices are acceptable for, " + str(fam_id))
        else:
            schedule.assign_family_by_choice(fam_id=fam_id, choice=choice)
    

No choices are acceptable for, 3993
No choices are acceptable for, 4294
No choices are acceptable for, 4351
No choices are acceptable for, 4370
No choices are acceptable for, 4403
No choices are acceptable for, 4451
No choices are acceptable for, 4507
No choices are acceptable for, 4541
No choices are acceptable for, 4549
No choices are acceptable for, 4563
No choices are acceptable for, 4566
No choices are acceptable for, 4590
No choices are acceptable for, 4599
No choices are acceptable for, 4638
No choices are acceptable for, 4645
No choices are acceptable for, 4678
No choices are acceptable for, 4687
No choices are acceptable for, 4742
No choices are acceptable for, 4748
No choices are acceptable for, 4754
No choices are acceptable for, 4774
No choices are acceptable for, 4776
No choices are acceptable for, 4795
No choices are acceptable for, 4815
No choices are acceptable for, 4816
No choices are acceptable for, 4821
No choices are acceptable for, 4835
No choices are acceptable fo

In [14]:
schedule.get_day_totals() < 125

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False,  True, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False,  True,  True,
        True,  True, False, False, False,  True,  True,  True,  True,
       False, False, False,  True,  True,  True,  True, False, False,
       False,  True,  True,  True,  True, False, False, False,  True,
        True,  True,  True, False, False, False,  True,  True,  True,
        True])

In [21]:
days = np.arange(100)
low_popularity_days = days[schedule.get_day_totals() < 125]

In [40]:
schedule = timetable()
for key, row in df.iterrows():
    fam_id = row.family_id
    npeople = row.n_people
    
    # if n people > 6, give them their first or second choice at random
    # otherwise check if any of the family's first three choices are in the low popularity days
    if npeople > 6:
        round(np.random.uniform(0, 1),0)
        
    elif row.choice_0 in low_popularity_days:
        choice = 0
    elif row.choice_1 in low_popularity_days:
        choice = 1
    elif row.choice_2 in low_popularity_days:
        choice = 2
    elif row.choice_3 in low_popularity_days:
        choice = 3
    else:
        round(np.random.uniform(0, 1),0)
    
    # select the day based on the preference choice of the family   
    day = row.iloc[choice + 1] # need plus one to skip the id column
    
    # check current day is within acceptable limits of people
    acceptable, choice = test_day_limit(day, choice, npeople)
    
    while choice < 8 and not acceptable:
        choice += 1
        day = row.iloc[choice + 1]
        acceptable, choice = test_day_limit(day, choice, npeople)
    else:
        if not acceptable:
            print("No choices are acceptable for, " + str(fam_id))
        else:
            schedule.assign_family_by_choice(fam_id=fam_id, choice=choice)
    

No choices are acceptable for, 2162
No choices are acceptable for, 2250
No choices are acceptable for, 2461
No choices are acceptable for, 2464
No choices are acceptable for, 2832
No choices are acceptable for, 2951
No choices are acceptable for, 2977
No choices are acceptable for, 3210
No choices are acceptable for, 3261
No choices are acceptable for, 3598
No choices are acceptable for, 3754
No choices are acceptable for, 4090
No choices are acceptable for, 4092
No choices are acceptable for, 4138
No choices are acceptable for, 4237
No choices are acceptable for, 4378
No choices are acceptable for, 4457
No choices are acceptable for, 4494
No choices are acceptable for, 4512
No choices are acceptable for, 4519
No choices are acceptable for, 4521
No choices are acceptable for, 4543
No choices are acceptable for, 4544
No choices are acceptable for, 4594
No choices are acceptable for, 4632
No choices are acceptable for, 4661
No choices are acceptable for, 4662
No choices are acceptable fo

IndexError: single positional indexer is out-of-bounds

In [30]:
round(np.random.uniform(0, 1),0)

0.0

In [41]:
choice

11

In [43]:
day

3