In [245]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sb

In [246]:
types = ['book', 'train', 'bike', 'coal', 'blocks', 'ball', 'gloves', 'horse', 'doll']

In [247]:
weight_gen_map = {
    'horse': lambda: max(0, np.random.normal(5,2,1)[0]),
    'ball': lambda: max(0, 1 + np.random.normal(1,0.3,1)[0]),
    'bike': lambda: max(0, np.random.normal(20,10,1)[0]),
    'train': lambda: max(0, np.random.normal(10,5,1)[0]),
    'coal': lambda: 47 * np.random.beta(0.5,0.5,1)[0],
    'book': lambda: np.random.chisquare(2,1)[0],
    'doll': lambda: np.random.gamma(5,1,1)[0],
    'blocks': lambda: np.random.triangular(5,10,20,1)[0],
    'gloves': lambda: 3.0 + np.random.rand(1)[0] if np.random.rand(1) < 0.3 else np.random.rand(1)[0]
}

In [248]:
step = 0.2
def normal_above_zero(x, mu, sigma):
    return 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(x - mu)**2 / (2 * sigma**2))

def normal(x, mu, sigma):
    if x > 0:
        return normal_above_zero(x, mu, sigma)
    return sum(normal_above_zero(new_x, mu, sigma) * 0.05 for new_x in np.arange(-30, step / 2, 0.05))

distribution_map = {
    'horse': lambda x: normal(x, *(5, 2)),
    'ball': lambda x: normal(x, *(2, 0.3)),
    'bike': lambda x: normal(x, *(20,10)),
    'train': lambda x: normal(x, *(10,5)),
    'coal': lambda x: ((x + (step / 2)) / 47)**(-0.5) * (1 - ((x - (step / 2)) / 47))**(-0.5) / (47 * np.pi) if x <= 47 else 0,
    'book': lambda x: np.exp(-(x / 2)) / 2,
    'doll': lambda x: (x**4) * np.exp(-x) / 25,
    'blocks': lambda x: (2 * (x - 5) / 75) if (5 < x and x <= 10) else (2 * (20 - x) / 150) if (10 < x and x < 20) else 0,
    'gloves': lambda x: 0.7 if (0 < x and x < 1) else 0.3 if (3 < x and x < 4) else 0
}

bound_map = {
    'horse': (0, 14),
    'ball': (0.5, 3.5),
    'bike': (0, 60),
    'train': (0, 30),
    'coal': (0, 50),
    'book': (0, 15),
    'doll': (0, 20),
    'blocks': (5, 20),
    'gloves': (0, 4),
}

In [249]:
weight_slices = np.arange(0, 50 + step, step)
weight_indices = list(range(251))
weight_map = {}
for key in distribution_map:
    probs = [step * distribution_map[key](weight, ) for weight in weight_slices]
    weight_map[key] = probs

In [250]:
def plot_weight(key):
    x = weight_slices
    y = weight_map[key]
    data = pd.DataFrame(np.flipud(np.rot90([x, y])), columns=['weight {}'.format(key), 'prob'])
    sb.factorplot(data=data, x='weight {}'.format(key), y='prob')

In [251]:
def get_expected(bag):
    key = '~'.join(bag)
    if key in expected_weight_map:
        return expected_weight_map[key]
    expected_weight = sum(prob * weight for prob, weight in zip(weight_map[key], weight_slices))
    expected_weight_map[key] = expected_weight
    return expected_weight

expected_weight_map = {'': 0}
for key in types:
    expected_weight_map[key] = get_expected([key])

def get_combined_distribution(bag, item):
    new_key = '~'.join(sorted(bag + [item]))
    # already caculated
    if new_key in weight_map:
        return weight_map[new_key]
    if len(bag) == 0:
        return weight_map[item]
    
    distribution = np.zeros((251,), dtype=np.float)
    d1 = weight_map['~'.join(bag)]
    d2 = weight_map[item]
    for p1, w1 in zip(d1, weight_indices):
        for p2, w2 in zip(d2, weight_indices):
            new_weight = w1 + w2
            if new_weight < 251:
                #print (p1 * p2)
                distribution[new_weight] += (p1 * p2)
    weight_map[new_key] = distribution
    return distribution

In [252]:
def add(bag, item):
    counts[item] -= 1
    if counts[item] < 0:
        print ('Out of {}'.format(item))
    else:
        bag.append(item)
        bag.sort()
        
def replace(bag, old, new):
    counts[new] -= 1
    counts[old] += 1
    if counts[new] < 0:
        print ('Out of {}'.format(item))
    else:
        bag.remove(old)
        bag.append(new)
        bag.sort()

def score():
    assert(len(bags) == 1000)
    score_sum = 0
    for bag in bags:
        weight = sum(weight_gen_map[item]() for item in bag)
        if weight < 50:
            score_sum += weight
    return score_sum

def generate_bags_and_counts():
    gifts = pd.read_csv('data/gifts.csv').values
    counts = dict((name, 0) for name in types)
    for item in gifts:
        counts[item[0].split('_')[0]] += 1
        #sum(item[0].split('_')[0] == name for item in gifts)
    bags = [[] for i in range(1000)]
    return counts, bags

In [266]:
def fill_bags():
    gifts = [gift for key in ['bike', 'train', 'blocks', 'horse', 'doll', 'ball', 'book', 'coal', 'gloves'] for gift in [key] * counts[key]]
    for item in gifts:
        for bag in bags:
            expected = get_expected(bag)
            
            #distributions = [get_combined_distribution(bag, test) for test in types]
            #new_expecteds = [get_expected(sorted(bag + [item])) for test in types]
            
            new_expected = get_expected(sorted(bag + [item]))
            if new_expected > expected# and new_expected == max(new_expecteds):
                add(bag, item)
                break
    #gifts = [gift for key in ['bike', 'train', 'blocks', 'horse', 'doll', 'ball', 'book', 'coal', 'gloves'] for gift in [key] * counts[key]]
    #for item in gifts:
    #    for bag in bags:
    #        if 

 'ball': 1100,
 'bike': 500,
 'blocks': 1000,
 'book': 1200,
 'coal': 166,
 'doll': 1000,
 'gloves': 200,
 'horse': 1000,
 'train': 1000

In [267]:
counts, bags = generate_bags_and_counts()

In [268]:
fill_bags()
print (sum(score() for i in range(20)) / 20)
max ((score() for i in range(20)))

32192.8443742


33158.585800463763

In [172]:
sum(counts_orig[key] * expected_weight_map[key] for key in types if not key in ['coal', 'gloves', 'bike'])

36111.064320427533

In [243]:
expected_weight_map['~'.join(['book', 'doll', 'doll', 'doll', 'doll', 'doll', 'doll', 'doll', 'doll'])]

27.187031943188451

In [230]:
counts

{'ball': 1100,
 'bike': 0,
 'blocks': 0,
 'book': 201,
 'coal': 166,
 'doll': 333,
 'gloves': 200,
 'horse': 0,
 'train': 0}

In [225]:
test = sorted(expected_weight_map, key=lambda key: expected_weight_map[key])
test.reverse()