In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
from scipy.stats import norm
from scipy.stats import beta
from scipy.stats import chi2
from scipy.stats import gamma
from scipy.stats import triang

In [2]:
types = ['ball', 'bike', 'blocks', 'book', 'coal', 'doll', 'gloves', 'horse', 'train']
num_types = len(types)
key_to_item = dict((key, index) for key, index in zip(types, range(len(types))))
step = 0.1

In [23]:
def normal(x, mu, sigma):
    if x > 0:
        return norm.pdf(x, mu, sigma)
    return norm.cdf(0, mu, sigma)

weight_gen_map = {
    'horse': lambda: max(0, np.random.normal(5,2,1)[0]),
    'ball': lambda: max(0, 1 + np.random.normal(1,0.3,1)[0]),
    'bike': lambda: max(0, np.random.normal(20,10,1)[0]),
    'train': lambda: max(0, np.random.normal(10,5,1)[0]),
    'coal': lambda: 47 * np.random.beta(0.5,0.5,1)[0],
    'book': lambda: np.random.chisquare(2,1)[0],
    'doll': lambda: np.random.gamma(5,1,1)[0],
    'blocks': lambda: np.random.triangular(5,10,20,1)[0],
    'gloves': lambda: 3.0 + np.random.rand(1)[0] if np.random.rand(1) < 0.3 else np.random.rand(1)[0]
}

distribution_map = {
    'horse': lambda x: normal(x, *(5, 2)),
    'ball': lambda x: normal(x - 1, *(2, 0.3)),
    'bike': lambda x: normal(x, *(20,10)),
    'train': lambda x: normal(x, *(10,5)),
    'coal': lambda x: beta.pdf(x / 47 , 0.5, 0.5) / 47,
    'book': lambda x: chi2.pdf(x, 2),
    'doll': lambda x: (x**4) * np.exp(-x) / 25,
    'blocks': lambda x: triang.pdf(x, c = 1.0/3, loc = 5, scale = 15),
    'gloves': lambda x: 0.7 if (0 < x and x < 1) else 0.3 if (3 < x and x < 4) else 0
}

In [32]:
def save_expected_weights():
    expected_weight_list = [list(key) + [expected_weight_map[key]] for key in expected_weight_map]
    pd.DataFrame(expected_weight_list).to_csv('data/expected_weights', index=False)

def load_expected_weights():
    expected_weight_list = pd.read_csv('data/expected_weights')
    return dict([(tuple(int(val) for val in row[:9]), row[9]) for row in expected_weight_list.values])

In [5]:
def bag_to_key(bag):
    return tuple(bag)

def item_to_key(item):
    return tuple(1 if item == i else 0 for i in range(num_types))

def item_to_full_item(item):
    item_full = np.zeros((num_types,), dtype=np.int)
    item_full[item] += 1
    return item_full

In [6]:
def get_expected(bag):
    key = bag_to_key(bag)
    if key in expected_weight_map:
        return expected_weight_map[key]
    expected_weight = sum(prob * weight for prob, weight in zip(weight_map[key], weight_slices))
    expected_weight_map[key] = expected_weight
    return expected_weight

variance_reduce = 0.3
def get_expected_high_variance(bag):
    key = bag_to_key(bag)
    if key in expected_weight_map:
        return expected_weight_map[key]
    expected_weight = sum(prob * weight for prob, weight in zip(weight_map[key], weight_slices))
    chance_under = sum(weight_map[key])
    expected_weight /= (chance_under ** variance_reduce)
    expected_weight_map[key] = expected_weight
    return expected_weight

def get_chance_too_full(bag):
    key = bag_to_key(bag)
    if key in chance_too_full_map:
        return chance_too_full_map[key]
    chance_too_full = sum(weight_map[key])
    chance_too_full_map[key] = chance_too_full
    return 1 - chance_too_full

def calculate_combined_distribution(old_bag, item):
    bag = old_bag.copy()
    bag[item] += 1
    key = bag_to_key(bag)
    # already caculated
    if key in weight_map:
        return None
    
    distribution = np.zeros((500,), dtype=np.float)
    d1 = weight_map[bag_to_key(old_bag)]
    d2 = weight_array_singles[item]
    for p1, w1 in zip(d1, weight_indices):
        for p2, w2 in zip(d2, weight_indices):
            new_weight = w1 + w2
            if new_weight < 500:
                distribution[new_weight] += (p1 * p2)
    weight_map[key] = distribution
    return bag

# TODOS
- figure out how to do add_weights beter given that step/2 + step/2 isnt 3/2 step
- try increasing variance by increasing expected value if high chance of going over (multiply expected by (1/chance_under)^(0.5))
- iteratively try different subset swaps
- actually submit

In [7]:
# use this to populate map with combinations
# TODO make this faster and lower trial area
def hydrate_map(iterations=10):
    last_bags = [np.array([0, 0, 0, 0, 0, 0, 0, 0, 0])]
    new_bags = []
    for i in range(iterations):
        for bag in last_bags:
            expected = get_expected(bag)
            for j in range(num_types):
                new_bag = calculate_combined_distribution(bag, j)
                if new_bag != None and get_expected(new_bag) > expected:
                    new_bags.append(new_bag)
        last_bags = new_bags
        new_bags = []

In [36]:
weight_map = {tuple([0] * num_types): np.zeros((500,), dtype=np.float)}
weight_map[tuple([0] * num_types)][0] = 1
expected_weight_map = {tuple([0] * num_types): 0}
chance_too_full_map = {tuple([0] * num_types): 0}

weight_slices = np.arange(step / 2, 50, step)
weight_indices = list(range(500))
weight_array_singles = [np.array([step * distribution_map[key](weight) for weight in weight_slices]) for key in types]
for weight_single in weight_array_singles:
    total_prob = sum(weight_single)
    for i in range(len(weight_single)):
        weight_single[i] /= total_prob

In [37]:
%%time
hydrate_map(50)
print (len(weight_map))
#save_expected_weights()



116168
CPU times: user 2h 34min 9s, sys: 33 s, total: 2h 34min 42s
Wall time: 2h 39min 39s


In [49]:
#expected_weight_map = load_expected_weights()

In [108]:
useable_bags = [[np.array(key), expected_weight_map[key]] for key in expected_weight_map if sum(key) > 2]

In [106]:
def accumulate(rows):
    counts = np.array([0] * num_types)
    score = 0
    for row in rows:
        counts += np.array(row[0])
        score += row[1]
    print (len(rows))
    print (counts)
    print (np.array([1100, 500, 1000, 1200, 166, 1000, 200, 1000, 1000]) - counts)
    print (score)

In [20]:
# add normally as same in test_usefullness
# iteratively
    # remove some subset of elements and add in other possible combinations

In [145]:
def test_usefullness(usefulness, semi_sorted):
    sorted_normalized = sorted(semi_sorted, key=lambda row: row[1] - sum(float(item) * factor for item, factor in zip(row[0], usefulness)))
    sorted_normalized.reverse()

    items_left = np.array([1100, 500, 1000, 1200, 166, 1000, 200, 1000, 1000])
    score = 0
    items = 0
    bag_counts = []
    for bag, bag_score in sorted_normalized:
        num_of_item = 0
        while items < 1000 and sum(item < 0 for item in (items_left - bag)) == 0:
            items_left -= bag
            score += bag_score
            items += 1
            num_of_item += 1
        
        if num_of_item > 0:
            bag_counts.append((num_of_item, (bag, expected_weight_map[tuple(bag)])))
            #bag_counts.append((num_of_item, bag))
        
        if items == 1000:
            break

    return (score, bag_counts)

In [132]:
usefulness_base = [0.8, 0, 2.3, 2.0, 0, 1.05, 0.5, 1.2, 0.8]
jitters = [0, 0.05, 0.1, 0.2]

for i in range(10):
    best_usefulness = None
    best_score = 0
    sorted_normalized_base = sorted(useable_bags, key=lambda row: row[1] - sum(float(item) * factor for item, factor in zip(row[0], usefulness_base)))
    sorted_normalized_base.reverse()
    sorted_normalized_base = sorted_normalized_base[:50000]
    for j in range(len(usefulness_base)):
        for k in range(j, len(usefulness_base)):
            for jitter1 in jitters:
                for jitter2 in jitters:
                    usefulness = usefulness_base.copy()
                    usefulness[j] += jitter1
                    usefulness[k] += jitter2
                    score, bag_counts = test_usefullness(usefulness, sorted_normalized_base)
                    if score > best_score:
                        best_score = score
                        best_usefulness = usefulness

    print (best_usefulness)
    print (best_score)
    usefulness_base = best_usefulness

[0.8, 0, 2.3, 2.0, 0, 1.05, 0.5, 1.2, 0.8]
35574.3688132


KeyboardInterrupt: 

In [54]:
def score_bag(bag_counts):
    assert (sum(count for count, bag in bag_counts) == 1000)
    score_sum = 0
    for count, bag in bag_counts:
        for i in range(count):
            weight = sum(weight_gen_map[item_type]() for item_type, item_num in zip(types, bag) for j in range(item_num))
            if weight > 50:
                continue
            score_sum += weight
    return score_sum

In [146]:
usefulness = [0.8, 0, 2.3, 2.0, 0, 1.05, 0.5, 1.2, 0.8]
score, bag_counts = test_usefullness(usefulness, useable_bags)
#print (max(score_bag(bag_counts) for i in range(10)))
print (score)

35574.3688132


In [147]:
bag_counts

[(68, (array([16,  0,  0,  0,  0,  0,  0,  0,  0]), 46.69832242138849)),
 (1, (array([11,  0,  0,  0,  0,  0,  0,  2,  0]), 42.180295486424562)),
 (333, (array([0, 0, 0, 0, 0, 1, 0, 0, 3]), 33.05762270341819)),
 (333, (array([0, 0, 3, 0, 0, 0, 0, 1, 0]), 37.590305116183387)),
 (1, (array([1, 0, 1, 0, 0, 0, 0, 5, 0]), 38.08393658515736)),
 (1, (array([0, 1, 0, 0, 0, 1, 0, 0, 1]), 30.398699664499279)),
 (82, (array([0, 0, 0, 0, 0, 0, 0, 8, 0]), 38.088429484764525)),
 (1, (array([0, 0, 0, 0, 0, 4, 0, 4, 0]), 37.239938843658067)),
 (82, (array([0, 0, 0, 0, 0, 8, 0, 0, 0]), 36.518449093987364)),
 (2, (array([0, 1, 0, 0, 0, 3, 0, 0, 0]), 30.982752179742899)),
 (22, (array([0, 1, 0, 0, 0, 0, 9, 0, 0]), 29.924870417867957)),
 (2, (array([0, 2, 0, 0, 0, 0, 1, 0, 0]), 25.872064396729197)),
 (72, (array([0, 2, 0, 1, 0, 0, 0, 0, 0]), 25.491655947877572))]

In [75]:
usefulness = [1.0, 0, 2.0, 2.0, 0.7, 0.95, 1.0, 1.2, 0.8]
sorted_normalized = sorted(expected_weight_map, key=lambda key: normalize(key, usefulness) )
sorted_normalized.reverse()
[(a, expected_weight_map[a]) for a in sorted_normalized][:20]