In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
from scipy.stats import norm
from scipy.stats import beta
from scipy.stats import chi2
from scipy.stats import gamma
from scipy.stats import triang
from pulp import LpVariable, LpInteger, LpProblem, LpMaximize, LpStatus, lpSum, value

In [2]:
types = ['ball', 'bike', 'blocks', 'book', 'coal', 'doll', 'gloves', 'horse', 'train']
num_types = len(types)
key_to_item = dict((key, index) for key, index in zip(types, range(len(types))))

In [3]:
weight_gens = [
    lambda: max(0, 1 + np.random.normal(1,0.3,1)[0]),
    lambda: max(0, np.random.normal(20,10,1)[0]),
    lambda: np.random.triangular(5,10,20,1)[0],
    lambda: np.random.chisquare(2,1)[0],
    lambda: 47 * np.random.beta(0.5,0.5,1)[0],
    lambda: np.random.gamma(5,1,1)[0],
    lambda: 3.0 + np.random.rand(1)[0] if np.random.rand(1) < 0.3 else np.random.rand(1)[0],
    lambda: max(0, np.random.normal(5,2,1)[0]),
    lambda: max(0, np.random.normal(10,5,1)[0])
]
num_tests = 2000

In [14]:
def save_expected_weights():
    expected_weight_list = [list(key) + [expected_weight_map[key]] for key in expected_weight_map]
    pd.DataFrame(expected_weight_list).to_csv('data/expected_weights', index=False)
    over_weight_list = [list(key) + [over_weight_map[key]] for key in over_weight_map]
    pd.DataFrame(over_weight_list).to_csv('data/over_weights', index=False)

def load_expected_weights():
    expected_weight_list = pd.read_csv('data/expected_weights')
    expected_weight_dict =  dict([(tuple(int(val) for val in row[:9]), row[9]) for row in expected_weight_list.values])
    over_weight_list = pd.read_csv('data/over_weights')
    over_weight_map = dict([(tuple(int(val) for val in row[:9]), row[9]) for row in over_weight_list.values])
    return expected_weight_dict, over_weight_map

In [5]:
def bag_to_key(bag):
    return tuple(bag)

def item_to_key(item):
    return tuple(1 if item == i else 0 for i in range(num_types))

def item_to_full_item(item):
    item_full = np.zeros((num_types,), dtype=np.int)
    item_full[item] += 1
    return item_full

In [6]:
def gen_weight(bag):
    weight = sum(weight_gen() for weight_gen, item_num in zip(weight_gens, bag) for j in range(item_num))
    return weight if weight <= 50 else 0

def get_expected(old_bag, item = None):
    bag = old_bag.copy()
    if item != None:
        bag[item] += 1
    
    key = bag_to_key(bag)
    # already caculated
    if key in expected_weight_map:
        return bag, None
    
    expected_weight = 0
    over_weights = 0
    for i in range(num_tests):
        weight = gen_weight(bag)
        expected_weight += weight
        if weight == 0:
            over_weights += 1
    expected_weight /= num_tests
    over_weight_fraction = (float(num_tests - over_weights) / num_tests)
    if over_weight_fraction > 0:
        expected_weight /= over_weight_fraction**0.8
    over_weight_map[key] = over_weight_fraction
    expected_weight_map[key] = expected_weight
    return bag, expected_weight

# TODOS
- try increasing variance by increasing expected value if high chance of going over (multiply expected by (1/chance_under)^(0.5))
- iteratively try different subset swaps
- actually submit

In [7]:
# use this to populate map with combinations
# TODO make this faster and lower trial area
def hydrate_map(iterations):
    last_bags = [(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]), 0)]
    new_bags = []
    for i in range(iterations):
        print ('Iteration {0} Layer Size {1}'.format(i, len(last_bags)))
        for bag, expected in last_bags:
            for j in range(num_types):
                new_bag, new_expected = get_expected(bag, j)
                if new_expected != None and new_expected > expected:
                    new_bags.append((new_bag, new_expected))
        last_bags = new_bags
        new_bags = []

In [8]:
def solve_best_bags(useable_bags):
    counts = [1100, 500, 1000, 1200, 166, 1000, 200, 1000, 1000]
    bag_keys = LpVariable.dicts("bag", [str(i) for i in range(len(useable_bags))], 0, None, LpInteger)
    bag_names = [bag_keys[str(i)] for i in range(len(useable_bags))]
    prob = LpProblem("The Santa Uncertain Bags Problem", LpMaximize)

    # Add bag expected values
    prob += lpSum([bag[1] * bag_name for bag, bag_name in zip(useable_bags, bag_names)]), "objective"

    # Add item max constraints
    for count, i in zip(counts, range(len(counts))):
        prob += lpSum([bag[0][i] * bag_name for bag, bag_name in zip(useable_bags, bag_names)]) <= count, ""

    # Add bag maximum constraint    
    prob += lpSum(bag_names) <= 1000, ""

    prob.solve()
    print ("Status:", LpStatus[prob.status])
    print ("Score:", value(prob.objective))
    return [(useable_bags[int(var.name.split('_')[1])][0], int(var.varValue)) for var in prob.variables() if var.varValue != 0]

In [9]:
def score_bag(bag_counts):
    assert (sum(count for bag, count in bag_counts) == 1000)
    score_sum = 0
    for bag, count in bag_counts:
        for i in range(count):
            score_sum += gen_weight(bag)
    return score_sum

In [10]:
%%time
expected_weight_map = {tuple([0] * num_types): 0}
over_weight_map = {tuple([0] * num_types): 0}
hydrate_map(35)
print (len(expected_weight_map))
useable_bags = [[np.array(key), expected_weight_map[key]] for key in expected_weight_map if sum(key) > 2]

Iteration 0 Layer Size 1
Iteration 1 Layer Size 9
Iteration 2 Layer Size 45
Iteration 3 Layer Size 148
Iteration 4 Layer Size 412
Iteration 5 Layer Size 937
Iteration 6 Layer Size 1735
Iteration 7 Layer Size 2800
Iteration 8 Layer Size 4022
Iteration 9 Layer Size 5145
Iteration 10 Layer Size 6231
Iteration 11 Layer Size 7191
Iteration 12 Layer Size 7730
Iteration 13 Layer Size 8013
Iteration 14 Layer Size 7870
Iteration 15 Layer Size 7711
Iteration 16 Layer Size 7227
Iteration 17 Layer Size 6532
Iteration 18 Layer Size 5765
Iteration 19 Layer Size 4786
Iteration 20 Layer Size 3938
Iteration 21 Layer Size 3140
Iteration 22 Layer Size 2402
Iteration 23 Layer Size 1770
Iteration 24 Layer Size 1217
Iteration 25 Layer Size 822
Iteration 26 Layer Size 510
Iteration 27 Layer Size 331
Iteration 28 Layer Size 189
Iteration 29 Layer Size 79
Iteration 30 Layer Size 50
Iteration 31 Layer Size 21
Iteration 32 Layer Size 9
Iteration 33 Layer Size 3
Iteration 34 Layer Size 1
369388
Status: Optimal
Sc

NameError: name 'scores_1' is not defined

In [15]:
save_expected_weights()

In [16]:
expected, over = load_expected_weights()

In [None]:
bag_counts = solve_best_bags(useable_bags)

In [None]:
score_bag(bag_counts)

In [None]:
save_expected_weights()

In [None]:
%%time
def get_expected(old_bag, item = None):
    bag = old_bag.copy()
    if item != None:
        bag[item] += 1
    
    key = bag_to_key(bag)
    # already caculated
    if key in expected_weight_map:
        return bag, None
    
    
    expected_weight = 0
    over_weights = 0
    for i in range(num_tests):
        weight = gen_weight(bag)
        expected_weight += weight
        if weight == 0:
            over_weights += 1
    expected_weight /= num_tests
    expected_weight /= (float(num_tests - over_weights) / num_tests)**0.3
    expected_weight_map[key] = expected_weight
    return bag, expected_weight

expected_weight_map = {tuple([0] * num_types): 0}
hydrate_map(50)
print (len(expected_weight_map))

useable_bags = [[np.array(key), expected_weight_map[key]] for key in expected_weight_map if sum(key) > 2]
bag_counts = solve_best_bags(useable_bags)
for i in range(500):
    scores_2.append(score_bag(bag_counts))

In [None]:
%%time
def get_expected(old_bag, item = None):
    bag = old_bag.copy()
    if item != None:
        bag[item] += 1
    
    key = bag_to_key(bag)
    # already caculated
    if key in expected_weight_map:
        return bag, None
    
    expected_weight = sum((gen_weight(bag)) for i in range(num_tests)) / num_tests
    expected_weight_map[key] = expected_weight
    return bag, expected_weight


expected_weight_map = {tuple([0] * num_types): 0}
hydrate_map(50)
print (len(expected_weight_map))

useable_bags = [[np.array(key), expected_weight_map[key]] for key in expected_weight_map if sum(key) > 2]
bag_counts = solve_best_bags(useable_bags)
for i in range(500):
    scores_3.append(score_bag(bag_counts))

In [None]:
usefulness = [0.8, 0, 2.3, 2.0, 0, 1.05, 0.5, 1.2, 0.8]
useable_bags = [[np.array(key), expected_weight_map[key]] for key in expected_weight_map if sum(key) > 2]
sorted_normalized = sorted(useable_bags, key=lambda row: row[1] - sum(float(item) * factor for item, factor in zip(row[0], usefulness)))
sorted_normalized.reverse()

len(sorted_normalized)

bag_counts = solve_best_bags(sorted_normalized[:60000])

for i in range(100):
    print (score_bag(bag_counts))