# Current problems

- regrets are not accumulated in a sane way
    - depending on first iteration get trapped

In [385]:
#Pkg.add("StatsBase")
using StatsBase

n = 7
best_score = 0
current_weights = zeros(n)
true_weights = zeros(n)
regrets = zeros(n)
discount = 0.5

# Initialize weights
for i in 1:n
    true_weights[i] = rand(1:n)
    current_weights[i] = 1/n
end


"""
Normalize a vector to values in [0,1] that sum to 1
"""
function normalize(ordered_set)
    size = length(ordered_set)
    total = sum(normal)
    result = zeros(size)
    for i in 1:size
        result[i] = normal[i] / total
    end
    
    return result
end


"""
Evaluates objective function based on position and object's 'true weight'
"""
function score(ordered_set, discount)
    result = 0
    for i in 1:length(ordered_set)
        result += discount^i * ordered_set[i]
    end
    return result
end


"""
Draws an index based on a vector of weights
"""
function WeightedDraw(available_indeces, weight_vec)
    return findin(available_indeces,
            StatsBase.sample(
              available_indeces, 
              StatsBase.Weights(weight_vec)
            ))[1]
end


"""
returns a random ordering of indeces of objects from a vector of weights
"""
function get_ordering(strat)
    size = length(strat)
    available_indeces = Array(1:n)
    ordering = Array{Int32, 1}()
    temp_strat = copy(strat)
    
    for i in 1:size
        draw = WeightedDraw(available_indeces, temp_strat)
        push!(ordering, available_indeces[draw])
        deleteat!(temp_strat, draw)
        deleteat!(available_indeces, draw)
    end
    return ordering
end


"""
randomly draws new score and ordering based on vector of weights
"""
function new_score_and_ordering(strat, true_w, discount)    
    size = length(strat)
    new_order = get_ordering(strat)
    new_weights = zeros(size)
    for i in 1:size
        new_weights[i] = true_w[new_order[i]]
    end
    
    print("\nNew Weights: ", new_weights, "\n")
    
    return score(new_weights, discount), new_order
end


"""
randomly draws from current weights then gets the score
then updates regrets and current training weights
"""
function training_iteration(best_score, current_weights,
                            true_weights, regrets, discount)
    size = length(regrets)
    this_score, this_order = 
        new_score_and_ordering(current_weights, 
                               true_weights, 
                               discount)
    
    print("Score ", this_score, "\n")
    print("Order ", this_order, "\n")
    print("Regret ", this_score - best_score, "\n")
    
    this_regret = this_score - best_score
    if this_score > best_score
        best_score = this_score
    end
    
    for i in 1:size
        regrets[i] += discount^this_order[i] * this_regret
        if regrets[i] < 0
            regrets[i] = 0
        end
    end
    
    current_weights = normalize(regrets)
    print("----Regrets-----\n", regrets, "\n")
    print("----Weights-----\n", current_weights, "\n")
    
    return current_weights, regrets, best_score
end



training_iteration

In [386]:
best_score = score(true_weights, discount)

for i in 1:3
    current_weights, regrets, best_score = 
        training_iteration(
            best_score, current_weights,
            true_weights, regrets, discount)
end


New Weights: [2.0, 5.0, 6.0, 6.0, 3.0, 3.0, 6.0]
Score 3.5625
Order Int32[7, 1, 5, 3, 6, 2, 4]
Regret -1.0625
----Regrets-----
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
----Weights-----
[0.195905, 0.0, 0.180634, 0.111764, 0.151664, 0.14807, 0.211962]

New Weights: [6.0, 2.0, 6.0, 5.0, 3.0, 6.0, 3.0]
Score 4.7734375
Order Int32[3, 7, 4, 1, 6, 5, 2]
Regret 0.1484375
----Regrets-----
[0.0185547, 0.00115967, 0.00927734, 0.0742188, 0.00231934, 0.00463867, 0.0371094]
----Weights-----
[0.195905, 0.0, 0.180634, 0.111764, 0.151664, 0.14807, 0.211962]

New Weights: [6.0, 3.0, 2.0, 6.0, 5.0, 6.0, 3.0]
Score 4.6484375
Order Int32[3, 6, 7, 5, 1, 4, 2]
Regret -0.125
----Regrets-----
[0.00292969, 0.0, 0.00830078, 0.0703125, 0.0, 0.0, 0.00585938]
----Weights-----
[0.195905, 0.0, 0.180634, 0.111764, 0.151664, 0.14807, 0.211962]


In [382]:
true_weights

7-element Array{Float64,1}:
 4.0
 3.0
 4.0
 1.0
 1.0
 3.0
 4.0

In [319]:
best_score

4.3046875

In [358]:
ordered_set = [-0.032959, -0.671875, -0.0827637, -0.307373, -0.177246, -0.188965, 0.0194092]

this_min = minimum(ordered_set)
this_max = maximum(ordered_set)
size = length(ordered_set)

normal = zeros(size)
for i in 1:size
    normal[i] = (ordered_set[i] - this_min) / (this_max - this_min)
end

total = sum(normal)

result = zeros(size)
for i in 1:size
    result[i] = normal[i] / total
end

print(result)
print("\n", sum(result))

[0.195905, 0.0, 0.180634, 0.111764, 0.151664, 0.14807, 0.211962]
0.9999999999999999

In [355]:
minimum(ordered_set)

-0.671875