In [10]:
import pandas as pd
import numpy as np

In [11]:
data = pd.read_csv('../data/family_data.csv', index_col='family_id')

In [12]:
N_DAYS = 100
MAX_OCCUPANCY = 300
MIN_OCCUPANCY = 125

In [13]:
family_size_dict = data[['n_people']].to_dict()['n_people']

cols = [f'choice_{i}' for i in range(10)]
choice_dict = data[cols].to_dict()

days = list(range(N_DAYS,0,-1))

In [14]:
def cost_function(prediction):

    penalty = 0

    # We'll use this to count the number of people scheduled each day
    daily_occupancy = {k:0 for k in days}
    
    # Looping over each family; d is the day for each family f
    for f, d in enumerate(prediction):

        # Using our lookup dictionaries to make simpler variable names
        n = family_size_dict[f]
        choice_0 = choice_dict['choice_0'][f]
        choice_1 = choice_dict['choice_1'][f]
        choice_2 = choice_dict['choice_2'][f]
        choice_3 = choice_dict['choice_3'][f]
        choice_4 = choice_dict['choice_4'][f]
        choice_5 = choice_dict['choice_5'][f]
        choice_6 = choice_dict['choice_6'][f]
        choice_7 = choice_dict['choice_7'][f]
        choice_8 = choice_dict['choice_8'][f]
        choice_9 = choice_dict['choice_9'][f]

        # add the family member count to the daily occupancy
        daily_occupancy[d] += n

        # Calculate the penalty for not getting top preference
        if d == choice_0:
            penalty += 0
        elif d == choice_1:
            penalty += 50
        elif d == choice_2:
            penalty += 50 + 9 * n
        elif d == choice_3:
            penalty += 100 + 9 * n
        elif d == choice_4:
            penalty += 200 + 9 * n
        elif d == choice_5:
            penalty += 200 + 18 * n
        elif d == choice_6:
            penalty += 300 + 18 * n
        elif d == choice_7:
            penalty += 300 + 36 * n
        elif d == choice_8:
            penalty += 400 + 36 * n
        elif d == choice_9:
            penalty += 500 + 36 * n + 199 * n
        else:
            penalty += 500 + 36 * n + 398 * n

    # for each date, check total occupancy
    #  (using soft constraints instead of hard constraints)
    for _, v in daily_occupancy.items():
        if (v > MAX_OCCUPANCY) or (v < MIN_OCCUPANCY):
            penalty += 100000000

    # Calculate the accounting cost
    # The first day (day 100) is treated special
    accounting_cost = (daily_occupancy[days[0]]-125.0) / 400.0 * daily_occupancy[days[0]]**(0.5)
    
    # using the max function because the soft constraints might allow occupancy to dip below 125
    accounting_cost = max(0, accounting_cost)
    
    # Loop over the rest of the days, keeping track of previous count
    yesterday_count = daily_occupancy[days[0]]
    for day in days[1:]:
        today_count = daily_occupancy[day]
        diff = abs(today_count - yesterday_count)
        accounting_cost += max(0, (daily_occupancy[day]-125.0) / 400.0 * daily_occupancy[day]**(0.5 + diff / 50.0))
        yesterday_count = today_count

    penalty += accounting_cost

    return penalty,

def mutFlipBit(individual, indpb):
    
    for family_id, gene in enumerate(individual):
        if np.random.rand() < indpb:
            new_gene = individual[family_id]
            
            key = np.random.randint(0, 10)
            choice = f'choice_{key}'
            new_gene = choice_dict[choice][family_id]
                
            individual[family_id] = new_gene
    
    return individual,

In [15]:
submission = pd.read_csv('../data/submission.csv', index_col='family_id')

In [34]:
individual = submission.loc[:, 'assigned_day'].tolist()
score = cost_function(individual)[0]

print('cs:', score)

cs: 306959.02706530236


In [24]:
def save_progress(iteration, individual):
    #672,254
    submission['assigned_day'] = individual

    score = cost_function(individual)[0]
    submission.to_csv(f'../data/submission.csv')

    ## 152064 top 150
    print(f'Score: {score} @ {iteration}')

In [35]:
print(f'Score: {score} @ 0')

for iteration in range(20):

    for _ in range(1000):
        new_individual = mutFlipBit(individual.copy(), indpb = .0005)[0]
        new_score = cost_function(new_individual)[0]

        if new_score < score:
            score = new_score
            individual = new_individual

    save_progress(iteration+1, individual)

Score: 306959.02706530236 @ 0
Score: 306866.03783746687 @ 1
Score: 306775.883562263 @ 2
Score: 306521.59377441433 @ 3
Score: 306454.42343050905 @ 4
Score: 306445.35513966443 @ 5
Score: 306127.05917865556 @ 6
Score: 305973.38905464456 @ 7
Score: 305800.3476680362 @ 8
Score: 305698.51953450474 @ 9
Score: 305687.09823579766 @ 10
Score: 305605.24214374216 @ 11
Score: 305453.24574198923 @ 12
Score: 305397.23329882737 @ 13
Score: 305287.85509499774 @ 14
Score: 305249.0276203258 @ 15
Score: 305234.7745246154 @ 16
Score: 305223.56381028256 @ 17
Score: 305223.56381028256 @ 18
Score: 305209.2349250498 @ 19
Score: 305141.2314293301 @ 20
