In [55]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sb

In [56]:
types = ['ball', 'bike', 'blocks', 'book', 'doll', 'horse', 'train']
key_to_item = dict((key, index) for key, index in zip(types, range(len(types))))

In [57]:
weight_gen_map = {
    'horse': lambda: max(0, np.random.normal(5,2,1)[0]),
    'ball': lambda: max(0, 1 + np.random.normal(1,0.3,1)[0]),
    'bike': lambda: max(0, np.random.normal(20,10,1)[0]),
    'train': lambda: max(0, np.random.normal(10,5,1)[0]),
    'coal': lambda: 47 * np.random.beta(0.5,0.5,1)[0],
    'book': lambda: np.random.chisquare(2,1)[0],
    'doll': lambda: np.random.gamma(5,1,1)[0],
    'blocks': lambda: np.random.triangular(5,10,20,1)[0],
    'gloves': lambda: 3.0 + np.random.rand(1)[0] if np.random.rand(1) < 0.3 else np.random.rand(1)[0]
}

In [58]:
step = 0.2
def normal_above_zero(x, mu, sigma):
    return 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(x - mu)**2 / (2 * sigma**2))

def normal(x, mu, sigma):
    if x > 0:
        return normal_above_zero(x, mu, sigma)
    return sum(normal_above_zero(new_x, mu, sigma) * 0.05 for new_x in np.arange(-30, step / 2, 0.05))

distribution_map = {
    'horse': lambda x: normal(x, *(5, 2)),
    'ball': lambda x: normal(x, *(2, 0.3)),
    'bike': lambda x: normal(x, *(20,10)),
    'train': lambda x: normal(x, *(10,5)),
    'coal': lambda x: ((x + (step / 2)) / 47)**(-0.5) * (1 - ((x - (step / 2)) / 47))**(-0.5) / (47 * np.pi) if x <= 47 else 0,
    'book': lambda x: np.exp(-(x / 2)) / 2,
    'doll': lambda x: (x**4) * np.exp(-x) / 25,
    'blocks': lambda x: (2 * (x - 5) / 75) if (5 < x and x <= 10) else (2 * (20 - x) / 150) if (10 < x and x < 20) else 0,
    'gloves': lambda x: 0.7 if (0 < x and x < 1) else 0.3 if (3 < x and x < 4) else 0
}

bound_map = {
    'horse': (0, 14),
    'ball': (0.5, 3.5),
    'bike': (0, 60),
    'train': (0, 30),
    'coal': (0, 50),
    'book': (0, 15),
    'doll': (0, 20),
    'blocks': (5, 20),
    'gloves': (0, 4),
}

In [59]:
def bag_to_key(bag):
    return tuple(bag)

def item_to_key(item):
    return bag_to_key(item_to_full_item(item))

def item_to_full_item(item):
    item_full = np.zeros((7,), dtype=np.int)
    item_full[item] += 1
    return item_full

In [60]:
weight_slices = np.arange(0, 50 + step, step)
weight_indices = list(range(251))
weight_map = {}
for key in types:
    probs = [step * distribution_map[key](weight) for weight in weight_slices]
    weight_map[item_to_key(key_to_item[key])] = probs

In [61]:
def get_expected(old_bag, item=-1):
    if item != -1:
        bag = old_bag.copy()
        bag[item] += 1
    else:
        bag = old_bag

    key = bag_to_key(bag)
    if key in expected_weight_map:
        return expected_weight_map[key]
    expected_weight = sum(prob * weight for prob, weight in zip(weight_map[key], weight_slices))
    expected_weight_map[key] = expected_weight
    return expected_weight

expected_weight_map = {(0,0,0,0,0,0,0): 0}
for key in types:
    expected_weight_map[item_to_key(key_to_item[key])] = get_expected(item_to_full_item(key_to_item[key]))

def get_combined_distribution(old_bag, item):
    bag = old_bag.copy()
    bag[item] += 1
    key = bag_to_key(bag)
    # already caculated
    if key in weight_map:
        return weight_map[key]
    
    distribution = np.zeros((251,), dtype=np.float)
    d1 = weight_map[bag_to_key(old_bag)]
    d2 = weight_map[item_to_key(item)]
    for p1, w1 in zip(d1, weight_indices):
        for p2, w2 in zip(d2, weight_indices):
            new_weight = w1 + w2
            if new_weight < 251:
                distribution[new_weight] += (p1 * p2)
    weight_map[key] = distribution
    return distribution

In [62]:
def add(bag, item):
    counts[item] -= 1
    if counts[item] < 0:
        print ('Out of {}'.format(types[item]))
    else:
        bag[item] += 1
        
def replace(bag, old, new):
    counts[new] -= 1
    counts[old] += 1
    if counts[new] < 0:
        print ('Out of {}'.format(types[new]))
    else:
        bag[old] -= 1
        bag[new] += 1

def score():
    assert(len(bags) == num_bags)
    score_sum = 0
    for bag in bags:
        weight = sum(weight_gen_map[item_type]() for item_type, item_num in zip(types, bag) for i in range(item_num))
        if weight > 50:
            continue
        score_sum += weight
    return score_sum

num_bags = 1000
def generate_bags_and_counts():
    gifts = pd.read_csv('data/gifts.csv').values
    counts = dict((name, 0) for name in types)
    for item in gifts:
        key = item[0].split('_')[0]
        if key in counts:
            counts[key] += 1
    counts = [counts[key] for key in types]
    bags = np.zeros((num_bags, 7), dtype=np.int)
    return counts, bags

In [63]:
def fill_bags():
    gifts = [gift for count, item in zip(counts, range(len(counts))) for gift in [item] * count] # , 'coal', 'gloves'
    np.random.shuffle(gifts)
    for item in gifts:
        for bag in bags:
            expected = get_expected(bag)
            
            #distributions = [get_combined_distribution(bag, test) for test in types]
            #new_expecteds = [get_expected(sorted(bag + [item])) for test in types]
            
            distribution = get_combined_distribution(bag, item)
            new_expected = get_expected(bag, item)
            
            if new_expected > expected: # and new_expected == max(new_expecteds):
                add(bag, item)
                break
    #gifts = [gift for key in ['bike', 'train', 'blocks', 'horse', 'doll', 'ball', 'book', 'coal', 'gloves'] for gift in [key] * counts[key]]
    #for item in gifts:
    #    for bag in bags:
    #        if 

In [64]:
counts, bags = generate_bags_and_counts()

In [None]:
%%time
fill_bags()
print (sum(score() for i in range(20)) / 20)
print (max ((score() for i in range(20))))

In [None]:
counts

In [None]:
test = sorted(expected_weight_map, key=lambda key: expected_weight_map[key])
test.reverse()

In [52]:
[(a, expected_weight_map[a]) for a in test][:20]

[((4, 0, 2, 3, 0, 1, 0), 42.717538896855388),
 ((4, 0, 2, 4, 0, 1, 0), 42.615048572202419),
 ((4, 0, 2, 2, 0, 1, 0), 41.449969203997938),
 ((5, 0, 2, 3, 0, 1, 0), 41.169070814299005),
 ((3, 0, 2, 2, 0, 1, 0), 40.58765378965586),
 ((2, 0, 2, 2, 0, 1, 0), 39.093197929239501),
 ((3, 0, 3, 1, 0, 0, 0), 38.839653830466233),
 ((0, 0, 3, 1, 0, 1, 0), 38.548890723477584),
 ((0, 0, 3, 2, 0, 1, 0), 38.402823756167884),
 ((1, 0, 2, 1, 0, 3, 0), 38.308222098752559),
 ((3, 0, 2, 2, 1, 1, 0), 38.257088252720948),
 ((3, 0, 3, 0, 0, 0, 0), 38.243082767398185),
 ((3, 0, 3, 2, 0, 0, 0), 38.05799008366521),
 ((1, 0, 2, 2, 0, 3, 0), 37.903332476622261),
 ((2, 0, 1, 3, 1, 1, 1), 37.872021428581931),
 ((2, 0, 1, 4, 1, 1, 1), 37.834333989157621),
 ((2, 0, 3, 0, 0, 0, 0), 37.754005925980515),
 ((4, 0, 3, 0, 0, 0, 0), 37.6041546564567),
 ((0, 0, 3, 0, 0, 1, 0), 37.479592037585974),
 ((1, 0, 2, 0, 0, 3, 0), 37.41292108678838)]

In [50]:
[(a, expected_weight_map[a]) for a in test if a[0] > 0][:10]

[((4, 0, 2, 3, 0, 1, 0), 42.717538896855388),
 ((4, 0, 2, 4, 0, 1, 0), 42.615048572202419),
 ((4, 0, 2, 2, 0, 1, 0), 41.449969203997938),
 ((5, 0, 2, 3, 0, 1, 0), 41.169070814299005),
 ((3, 0, 2, 2, 0, 1, 0), 40.58765378965586),
 ((2, 0, 2, 2, 0, 1, 0), 39.093197929239501),
 ((3, 0, 3, 1, 0, 0, 0), 38.839653830466233),
 ((1, 0, 2, 1, 0, 3, 0), 38.308222098752559),
 ((3, 0, 2, 2, 1, 1, 0), 38.257088252720948),
 ((3, 0, 3, 0, 0, 0, 0), 38.243082767398185)]

In [None]:
# ball, bike, block, book, doll, horse, train
a = (np.array([1, 0, 2, 2, 1, 2, 0]), 36.443725537916428)
b = (np.array([3, 0, 3, 1, 0, 0, 0]), 38.839653830466233)
c = (np.array([2, 0, 0, 4, 1, 1, 2]), 36.752024286774869)

In [None]:
def accumulate(rows):
    counts = np.array([0, 0, 0, 0, 0, 0, 0])
    score = 0
    for row in rows:
        counts += row[0]
        score += row[1]
    print (counts)
    print (score)