In [2]:
%matplotlib inline
import pandas as pd
import numpy as np
from scipy.stats import norm
from scipy.stats import beta
from scipy.stats import chi2
from scipy.stats import gamma
from scipy.stats import triang
from pulp import LpVariable, LpInteger, LpProblem, LpMaximize, LpStatus, lpSum, value

In [3]:
types = ['ball', 'bike', 'blocks', 'book', 'coal', 'doll', 'gloves', 'horse', 'train']
num_types = len(types)
key_to_item = dict((key, index) for key, index in zip(types, range(len(types))))

In [4]:
weight_gens = [
    lambda: max(0, 1 + np.random.normal(1,0.3,1)[0]),
    lambda: max(0, np.random.normal(20,10,1)[0]),
    lambda: np.random.triangular(5,10,20,1)[0],
    lambda: np.random.chisquare(2,1)[0],
    lambda: 47 * np.random.beta(0.5,0.5,1)[0],
    lambda: np.random.gamma(5,1,1)[0],
    lambda: 3.0 + np.random.rand(1)[0] if np.random.rand(1) < 0.3 else np.random.rand(1)[0],
    lambda: max(0, np.random.normal(5,2,1)[0]),
    lambda: max(0, np.random.normal(10,5,1)[0])
]
num_tests = 2000

In [5]:
def save_expected_weights():
    expected_weight_list = [list(key) + [expected_weight_map[key]] for key in expected_weight_map]
    pd.DataFrame(expected_weight_list).to_csv('data/expected_weights', index=False)
    over_weight_list = [list(key) + [over_weight_map[key]] for key in over_weight_map]
    pd.DataFrame(over_weight_list).to_csv('data/over_weights', index=False)

def load_expected_weights():
    expected_weight_list = pd.read_csv('data/expected_weights')
    expected_weight_dict =  dict([(tuple(int(val) for val in row[:9]), row[9]) for row in expected_weight_list.values])
    over_weight_list = pd.read_csv('data/over_weights')
    over_weight_map = dict([(tuple(int(val) for val in row[:9]), row[9]) for row in over_weight_list.values])
    return expected_weight_dict, over_weight_map

In [6]:
def bag_to_key(bag):
    return tuple(bag)

def item_to_key(item):
    return tuple(1 if item == i else 0 for i in range(num_types))

def item_to_full_item(item):
    item_full = np.zeros((num_types,), dtype=np.int)
    item_full[item] += 1
    return item_full

In [7]:
def gen_weight(bag):
    weight = sum(weight_gen() for weight_gen, item_num in zip(weight_gens, bag) for j in range(item_num))
    return weight if weight <= 50 else 0

def get_expected(old_bag, item = None):
    bag = old_bag.copy()
    if item != None:
        bag[item] += 1
    
    key = bag_to_key(bag)
    # already caculated
    if key in expected_weight_map:
        return bag, None
    
    expected_weight = 0
    over_weights = 0
    for i in range(num_tests):
        weight = gen_weight(bag)
        expected_weight += weight
        if weight == 0:
            over_weights += 1
    expected_weight /= num_tests
    over_weight_fraction = (float(num_tests - over_weights) / num_tests)
    if over_weight_fraction > 0:
        expected_weight /= over_weight_fraction**0.8
    over_weight_map[key] = over_weight_fraction
    expected_weight_map[key] = expected_weight
    return bag, expected_weight

# TODOS
- actually submit

In [8]:
# use this to populate map with combinations
# TODO make this faster and lower trial area
def hydrate_map(iterations):
    last_bags = [(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]), 0)]
    new_bags = []
    for i in range(iterations):
        print ('Iteration {0} Layer Size {1}'.format(i, len(last_bags)))
        for bag, expected in last_bags:
            for j in range(num_types):
                new_bag, new_expected = get_expected(bag, j)
                if new_expected != None and new_expected > expected:
                    new_bags.append((new_bag, new_expected))
        last_bags = new_bags
        new_bags = []

In [9]:
def solve_best_bags(useable_bags):
    counts = [1100, 500, 1000, 1200, 166, 1000, 200, 1000, 1000]
    bag_keys = LpVariable.dicts("bag", [str(i) for i in range(len(useable_bags))], 0, None, LpInteger)
    bag_names = [bag_keys[str(i)] for i in range(len(useable_bags))]
    prob = LpProblem("The Santa Uncertain Bags Problem", LpMaximize)

    # Add bag expected values
    prob += lpSum([bag[1] * bag_name for bag, bag_name in zip(useable_bags, bag_names)]), "objective"

    # Add item max constraints
    for count, i in zip(counts, range(len(counts))):
        prob += lpSum([bag[0][i] * bag_name for bag, bag_name in zip(useable_bags, bag_names)]) <= count, ""

    # Add bag maximum constraint    
    prob += lpSum(bag_names) <= 1000, ""

    prob.solve()
    print ("Status:", LpStatus[prob.status])
    print ("Score:", value(prob.objective))
    return [(useable_bags[int(var.name.split('_')[1])][0], int(var.varValue)) for var in prob.variables() if var.varValue != 0]

In [10]:
def score_bag(bag_counts):
    assert (sum(count for bag, count in bag_counts) == 1000)
    score_sum = 0
    for bag, count in bag_counts:
        for i in range(count):
            score_sum += gen_weight(bag)
    return score_sum

In [None]:
%%time
expected_weight_map = {tuple([0] * num_types): 0}
over_weight_map = {tuple([0] * num_types): 0}
hydrate_map(35)
print (len(expected_weight_map))

In [23]:
expected_weight_map, over_weight_map = load_expected_weights()

In [27]:
for penalty in [0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15, 0.1, 0.5, 0]:
    print ('Penalty: {}'.format(penalty))
    useable_bags = [[np.array(key), expected_weight_map[key] / (over_weight_map[key] if over_weight_map[key] > 0 else 1)**penalty] for key in expected_weight_map if sum(key) > 2]
    bag_counts = solve_best_bags(useable_bags)
    print (sorted([max(score_bag(bag_counts) for i in range(70)) for j in range(30)]))
    print ('')

Penalty: 0.5
Status: Optimal
Score: 36959.83809802585
[35031.888250628668, 35102.46301605129, 35111.094167529947, 35125.903381395576, 35141.158085450974, 35170.750478976363, 35191.555552458776, 35250.832837975249, 35261.268894565954, 35271.700980369962, 35283.919605456977, 35306.739120375911, 35308.121018650476, 35322.246084689723, 35352.269277349042, 35363.992774757615, 35372.322195247703, 35376.24712865043, 35378.405761047296, 35381.260080222884, 35383.829387248523, 35395.702679864589, 35429.943138346127, 35434.935363075099, 35435.231537773172, 35447.104762900941, 35514.957281125113, 35522.658048799662, 35569.336521125915, 35604.779621683687]

Penalty: 0.45
Status: Optimal
Score: 36765.92418635625
[35447.672943693833, 35459.489298605731, 35465.933451131357, 35549.338188089547, 35554.466569323202, 35580.337392462294, 35619.944041738119, 35629.480080230009, 35643.444310338826, 35682.062034785667, 35683.11187541343, 35690.162853686372, 35690.617367089166, 35702.01249809139, 35711.908078