In [1]:
import numpy as np
import pandas as pd
from scipy.ndimage.interpolation import shift

In [2]:
class timetable:
    # Arrays for calculating the costs
    cost_multiplier = np.array([0,0,9,9,9,18,18,36,36,235])
    cost_additive = np.array([0,50,50,100,200,200,300,300,400,500])

    cost_multiplier_matrix = np.resize(cost_multiplier, (100,10))
    cost_additive_matrix = np.resize(cost_additive, (100, 10))

    # Init a 100 day by 5000 families by 10 choices matrix as the timetable
    def __init__(self):
        self.timetable = np.zeros((100, 5000, 10))
        
    # Function for assigning a family their specific choice
    def assign_family_by_choice(self, fam_id, choice):
        self.timetable[:,fam_id,:] = np.zeros((100,10))
        day = df.iloc[fam_id, choice + 1] # add one to skip the fam id column
        self.timetable[day - 1, fam_id, choice] = df.loc[fam_id, 'n_people']
    
    # Function for assigning a family a specific day
    def assign_family_by_day(self, fam_id, day):
        self.timetable[:,fam_id,:] = np.zeros((100,10))
        choice = np.where(df.iloc[fam_id] == day)[0][0]
        self.timetable[day - 1, fam_id, choice] = df.loc[fam_id, 'n_people']
        
    # Function for calculating the preference cost to santa of the current timetable
    def get_preference_cost(self):
        multiple_cost = np.sum(self.timetable, axis=1) * self.cost_multiplier_matrix
        additive_cost = np.count_nonzero(self.timetable, axis=1) * self.cost_additive_matrix
        
        return np.sum(multiple_cost + additive_cost)
    
    # Function for calculating the tax cost to Santa
    def get_tax_cost(self):
        occupancy_days = self.get_day_totals()
        prior_day_occupancy = shift(occupancy_days, -1, cval=occupancy_days[99])
        
        return np.sum(((occupancy_days-125)/400)*(occupancy_days**(0.5 + (abs(occupancy_days - prior_day_occupancy) / 50))))
    
    # Function to reuturn the total number of people on each day, vector [100]
    def get_day_totals(self):
        return np.sum(np.sum(schedule.timetable, axis = 1), axis = 1)
    
    def get_specific_day(self, day):
        return self.get_day_totals()[day]
    
    # Function for calculating the total cost to Santa
    def get_total_cost(self):
        return self.get_tax_cost() + self.get_preference_cost()

In [3]:
df = pd.read_csv('family_data.csv')
df.head()

Unnamed: 0,family_id,choice_0,choice_1,choice_2,choice_3,choice_4,choice_5,choice_6,choice_7,choice_8,choice_9,n_people
0,0,52,38,12,82,33,75,64,76,10,28,4
1,1,26,4,82,5,11,47,38,6,66,61,4
2,2,100,54,25,12,27,82,10,89,80,33,3
3,3,2,95,1,96,32,6,40,31,9,59,2
4,4,53,1,47,93,26,3,46,16,42,39,4


In [40]:
# Create timetable object
schedule = timetable()

In [41]:
# assign a family one of their choices
schedule.assign_family_by_choice(fam_id=0, choice=3)
schedule.assign_family_by_choice(fam_id=1, choice=2)
print(schedule.timetable[81,0])
print(schedule.timetable[81,1])

[0. 0. 0. 4. 0. 0. 0. 0. 0. 0.]
[0. 0. 4. 0. 0. 0. 0. 0. 0. 0.]


In [44]:
# calculate the preference cost
schedule.get_preference_cost()

222.0

In [43]:
schedule.get_day_totals()

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 8., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [45]:
df.n_people.sum()

21003

In [46]:
21003/100

210.03

In [7]:
def test_day_limit(day, choice, npeople):
    if schedule.get_specific_day(day-1) + npeople >= 300:
        return False, choice + 1
    else:
        return True, choice

In [49]:
schedule = timetable()
for key, row in df.iterrows():
    fam_id = row.family_id
    choice = 0
    day = row.iloc[choice + 1]
    npeople = row.n_people
    
    # check current choice is within limits
    acceptable, choice = test_day_limit(day, choice, npeople)
    
    while choice < 9 and not acceptable:
        day = row.iloc[choice + 1]
        acceptable, choice = test_day_limit(day, choice, npeople)
    else:
        if not acceptable:
            print("No choices are acceptable for, " + str(fam_id))
        else:
            schedule.assign_family_by_choice(fam_id=fam_id, choice=choice)
    

No choices are acceptable for, 4515
No choices are acceptable for, 4599
No choices are acceptable for, 4679
No choices are acceptable for, 4754
No choices are acceptable for, 4774
No choices are acceptable for, 4815
No choices are acceptable for, 4826
No choices are acceptable for, 4846
No choices are acceptable for, 4851
No choices are acceptable for, 4894
No choices are acceptable for, 4935
No choices are acceptable for, 4956
No choices are acceptable for, 4990


In [50]:
schedule.get_day_totals() < 125

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
        True, False, False, False, False, False, False,  True,  True,
        True,  True, False, False, False,  True,  True,  True,  True,
       False, False, False,  True,  True,  True,  True, False, False,
       False,  True,  True,  True,  True, False, False, False,  True,
        True,  True,  True, False, False, False,  True,  True,  True,
        True])

In [48]:
schedule.get_day_totals()

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 8., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [51]:
days = np.arange(100)
low_popularity_days = days[schedule.get_day_totals() < 125]

In [59]:
schedule = timetable()
df.sort_values(by='n_people')
for key, row in df.iterrows():
    fam_id = row.family_id
    npeople = row.n_people
    
    # if n people > 6, give them their first or second choice at random
    # otherwise check if any of the family's first three choices are in the low popularity days
    if npeople > 6:
        choice = round(np.random.uniform(0, 1),0)
        
    elif row.choice_0 in low_popularity_days:
        choice = 0
    elif row.choice_1 in low_popularity_days:
        choice = 1
    elif row.choice_2 in low_popularity_days:
        choice = 2
    elif row.choice_3 in low_popularity_days:
        choice = 3
    elif row.choice_4 in low_popularity_days:
        choice = 4
    elif row.choice_5 in low_popularity_days:
        choice = 5
    elif row.choice_6 in low_popularity_days:
        choice = 6
    elif row.choice_6 in low_popularity_days:
        choice = 6
    elif row.choice_7 in low_popularity_days:
        choice = 7
    elif row.choice_8 in low_popularity_days:
        choice = 8
    else:
        choice = round(np.random.uniform(0, 1),0)
    
    choice = int(choice)
    # select the day based on the preference choice of the family   
    day = row.iloc[choice + 1] # need plus one to skip the id column
    
    # check current day is within acceptable limits of people
    acceptable, choice = test_day_limit(day, choice, npeople)
    
    while choice < 9 and not acceptable:
        day = row.iloc[choice + 1]
        acceptable, choice = test_day_limit(day, choice, npeople)
    else:
        if not acceptable:
            print("No choices are acceptable for, " + str(fam_id))
        else:
            schedule.assign_family_by_choice(fam_id=fam_id, choice=min(choice, 9))
    

No choices are acceptable for, 4894
No choices are acceptable for, 4935


In [57]:
int(choice)

1

In [60]:
schedule.get_total_cost()

71918142475.32771

In [65]:
schedule.get_day_totals()
days[schedule.get_day_totals() < 125]

array([35, 36, 40, 41, 42, 47, 48, 54, 55, 57, 64, 69, 71, 78, 85, 92, 96,
       99])

In [19]:
very_unpopular_days = days[schedule.get_day_totals() < 125]
very_unpopular_days

array([33, 34, 35, 36, 40, 41, 48, 54, 64, 69, 71, 78, 85, 92, 96, 99])

In [None]:
# We must prioritise the occupation of these very low popularity days, it looks like it may be hard to get 125 people on some of them
# the cost may be great.

In [32]:
df

Unnamed: 0,family_id,choice_0,choice_1,choice_2,choice_3,choice_4,choice_5,choice_6,choice_7,choice_8,choice_9,n_people
0,0,52,38,12,82,33,75,64,76,10,28,4
1,1,26,4,82,5,11,47,38,6,66,61,4
2,2,100,54,25,12,27,82,10,89,80,33,3
3,3,2,95,1,96,32,6,40,31,9,59,2
4,4,53,1,47,93,26,3,46,16,42,39,4
5,5,32,59,12,3,60,26,35,50,5,2,4
6,6,88,4,1,3,91,32,39,57,28,99,2
7,7,25,11,52,48,10,17,88,50,95,66,5
8,8,18,60,1,12,89,33,16,10,53,67,4
9,9,1,88,39,50,26,18,96,47,46,28,7
