In [2]:
using POMDPs
using Random # for AbstractRNG
using POMDPModelTools
using Pkg
Pkg.add("JSON")

[32m[1m Resolving[22m[39m package versions...
[32m[1m  Updating[22m[39m `~/.julia/environments/v1.0/Project.toml`
[90m [no changes][39m
[32m[1m  Updating[22m[39m `~/.julia/environments/v1.0/Manifest.toml`
[90m [no changes][39m


In [3]:
struct ChainMDP <: MDP{Int, Symbol}
    len::Int
    p_success::Float64
    discount::Float64
    theta::Int
end


In [4]:
function POMDPs.generate_s(p::ChainMDP, s::Int, a::Symbol, rng::AbstractRNG)
    if a == :right
        success = min(s+1, p.len)
        failure = max(s-1, 1)
    else # a == :left
        success = max(s-1, 1)
        failure = min(s+1, p.len)
    end
    if s + 1 == p.len
        return p.len 
    elseif  s == 2
        return 1
    end
    return rand(rng) < p.p_success ? success : failure
end


In [5]:
# theta = 10
function POMDPs.reward(p::ChainMDP, s::Int, a::Symbol)
    if s == 2
        return p.theta
    end
    if s + 1 == p.len
        return -p.theta
    end
    if s == 0 || s == p.len
        return 0
    end
    return -1
end

In [6]:
POMDPs.initialstate_distribution(m::ChainMDP) = Deterministic(4)

In [7]:
function POMDPs.isterminal(p::ChainMDP, s::Int)
    if s == 1
        return true
    end
    if s == p.len
        return true
    end
    return false
end        

In [8]:
using POMDPSimulators
using POMDPPolicies

ChainMDP() = ChainMDP(6+2,1.0,.9, 10)
m = ChainMDP()

# policy that maps every input to a right action
policy = FunctionPolicy(s->:right)

for (s, a, r) in stepthrough(m, policy, "s,a,r", max_steps=10)
    @show s
    @show a
    @show r
    render(m, (s,a,r))
    println()
end


┌ Info: Recompiling stale cache file /Users/efan/.julia/compiled/v1.0/POMDPSimulators/i1HOp.ji for POMDPSimulators [e0d0a172-29c6-5d4e-96d0-f262df5d01fd]
└ @ Base loading.jl:1184


s = 4
a = :right
r = -1

s = 5
a = :right
r = -1

s = 6
a = :right
r = -1

s = 7
a = :right
r = -10



In [9]:
using POMDPSimulators
using POMDPPolicies

ChainMDP() = ChainMDP(6+2,1.0,.9, 10)
m = ChainMDP()

# policy that maps every input to a left action
policy = FunctionPolicy(s->:left)

function POMDPs.initialstate_distribution(m::ChainMDP)
    return Deterministic(4)
end
for (s, a, r) in stepthrough(m, policy, "s,a,r", max_steps=10)
    # @show s
    # @show a
    # @show r
    render(m, (s,a,r))
    println("s,a,r:($s,$a,$r)")
end


s,a,r:(4,left,-1)
s,a,r:(3,left,-1)
s,a,r:(2,left,10)


In [45]:

include("./ChainMDP.jl")
ChainMDP() = PFChainMDP.PChainMDP(6+2,1.0,.9, 10)
m = ChainMDP()

# policy that maps every input to a right action
policy = FunctionPolicy(s->:right)

function POMDPs.initialstate_distribution(m::PFChainMDP.PChainMDP)
    return Deterministic(4)
end
for (s, a, r) in stepthrough(m, policy, "s,a,r", max_steps=10)
    # @show s
    # @show a
    # @show r
    render(m, (s,a,r))
    println("s,a,r:($s,$a,$r)")
end


s,a,r:(4,right,-1)
s,a,r:(5,right,-1)
s,a,r:(6,right,-1)
s,a,r:(7,right,-10)




In [11]:
n_agents = 1
agents = Any[]
mdps = Any[]
n_states = 6
n_actions = 2
epochs = 10
H = 10
actions = [1, 2]
action_map = Dict(1 => :left, :2 => :right)
rev_action_map = Dict(:left => 1, :right => 2)
states = 1:(n_states+2)
# setup agents
Q_tables = []
N_tables = zeros((n_agents, n_states+2, n_actions))
print(N_tables)
policies = []
theta = 10
# policy that maps every input to a right action
for i in 1:n_agents
    push!(Q_tables, Dict{Int32,Float32}())
    push!(policies, s->:right)
    push!(mdps, ChainMDP(n_states+2,.9,.9, theta))
end
# policy = s->:right

function update_Q(Q_table, s, a, r, sp, t)
    alpha=.95
    gamma=.95
    # print("s:$s, a:$a, sp:$sp, r:$r")
    Q_table[s][rev_action_map[a]] +=  alpha * (r + gamma * 
        findmax(Q_table[sp])[1] - Q_table[s][rev_action_map[a]])
    #println(Q_table)
end
function POMDPs.initialstate_distribution(m::ChainMDP)
    return Deterministic(Int64((n_states+2)/2))
end
function run_chain!(;policies, mdps, reward_reveal_condition, true_mdp, update_Q, n_agents, n_states,
                    Q_tables, N_tables, epochs, steps)
    for e in 1:epochs
        agents = []
        for i in 1:n_agents
            m = mdps[i]
            push!(agents, 
                  Iterators.Stateful(stepthrough(m, FunctionPolicy(policies[i]), "s,a,r,sp,t", max_steps=steps)))
        end

        println("epoch: $e")
        done = false
        t = 0
        while ! done
           done = true
            
           for i in 1:n_agents
                if isempty(agents[i])
                    println("agent $i is done")
                    continue
                end
                res = popfirst!(agents[i])
                r = res[:r]
                if reward_reveal_condition(r)
                    for mdp in mpds
                        mdp = true_mdp
                    end
                end
                #println("before update: N_table: $N_tables")
                N_tables[i,res[:s],rev_action_map[res[:a]]] += 1
                #println("after update:  N_table: $N_tables")
                # println("update Q")
                update_Q(Q_tables[i],res...)
                t = res[:t]
                println("t: $t, print agent $i result: $res")
          
           end
           # println("t:$t")
           for i in 1:n_agents
                if ! isempty(agents[i])
                    done = false
                end
            end
        end
    end
end
run_chain!(policies=policies,
           mdps=mdps,  
           true_mdp=mdps[1],
           reward_reveal_condition= r -> false,
           update_Q=update_Q,
           n_agents=n_agents,
           n_states=n_states,
           Q_tables=Q_tables,
           N_tables=N_tables,
           epochs=epochs,
           steps=H)

[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]

[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]epoch: 1


KeyError: KeyError: key 4 not found

In [12]:
function square(x)
    return x * x
end

vals = Dict(1 => 4, 2 => 5, 3 => 6)
Dict(key=> square(val) for (key, val) in vals)

Dict{Int64,Int64} with 3 entries:
  2 => 25
  3 => 36
  1 => 16

In [13]:
A = [1 2; 3 4; 5 6]
B = A .+ 6
C = B .+ 6
D = hcat([A , B , C])

3×1 Array{Array{Int64,2},2}:
 [1 2; 3 4; 5 6]      
 [7 8; 9 10; 11 12]   
 [13 14; 15 16; 17 18]

In [14]:
D[2,:]

1-element Array{Array{Int64,2},1}:
 [7 8; 9 10; 11 12]

In [15]:
function ucb_pol(Q_tables, N_tables, i, actions, s)
    # return 
    C = 10.0
    if haskey(Q_tables[i], s)
       # println("ucb_pol:s:$s, N_tables:$N_tables")
       ucbs = Dict(key => sqrt(log(sum(N_tables[i][s,:])/N_tables[i][s,key])) 
                for (key,val) in Q_tables[i][s])
       # println("ucb: $(ucbs)")
       val, idx = findmax(Dict(key => Q_tables[i][s][key] + C* sqrt(log(sum(N_tables[i][s,:])/N_tables[i][s,key])) 
                for (key,val) in Q_tables[i][s])) # need to test
       return action_map[idx]
    else
       act = action_map[rand(actions, 1)[1]]
       println("selected random action $act!!!!!!!!!!!!!!!!!!!!!") 
       return act
    end
end




ucb_pol (generic function with 1 method)

In [16]:
function q_lookup_pol(Q_tables, N_tables, i, actions, s)
    # return 
    if haskey(Q_tables[i], s)
       val, idx = findmax(Q_tables[i][s]) # need to test
       return action_map[idx]
    else
       act = action_map[rand(actions, 1)[1]]
       println("selected random action $act!!!!!!!!!!!!!!!!!!!!!") 
       return act
    end
end

q_lookup_pol (generic function with 1 method)

In [17]:
function ran_pol(Q_tables, N_tables, i, actions, s)
    # return 
    act = action_map[rand(actions, 1)[1]]
    return act
end

ran_pol (generic function with 1 method)

In [18]:
curry(f, a) = (xs...) -> f(a, xs...)
curry2(f, a, b) = (xs...) -> f(a, b, xs...)
curry3(f, a, b, c) = (xs...) -> f(a, b, c, xs...)
curry4(f, a, b, c, d) = (xs...) -> f(a, b, c, d, xs...)
curry5(f, a, b, c, d, e) = (xs...) -> f(a, b, c, d, e, xs...)
curry6(f, a, b, c, d, e, g) = (xs...) -> f(a, b, c, d, e, g, xs...)

curry6 (generic function with 1 method)

In [19]:
n_states = 10
curry(f, x) = (xs...) -> f(x, xs...)
Q_tables = []
mdps = Any[]
N_tables = zeros((n_agents, n_states+2, n_actions))
policies = []
epochs = 500
theta = 10
H = 10
# Setup Q table according to MDP

n_agents = 1
agents = Any[]
n_states = 10


10

In [20]:
function POMDPs.reward(p::ChainMDP, s::Int, a::Symbol)
    if s == 2
        return 10
    end
    if s + 1 == p.len
        return -10
    end
    if s == 0 || s == p.len
        return 0
    end
    return -1
end

In [21]:
# UCB




#setup
for i in 1:n_agents
    push!(Q_tables, Dict{Int32, Dict{Int32, Float32}}())
    for state in states
      Q_tables[i][state] = Dict{Int32, Float32}()
      # print(Q_tables[i])
      for action in actions
         Q_tables[i][state][action] = 0 
      end
    end
    N_table = N_tables[i,:,:]
    push!(policies, curry(curry(curry(curry(ucb_pol, Q_tables), N_tables),i), actions))
    push!(mdps, ChainMDP(n_states+2,.9,.9, 10))
end

run_chain!(policies, mdps, theta, update_Q, n_agents,  n_states, Q_tables, N_tables, epochs, H)

MethodError: MethodError: no method matching run_chain!(::Array{Any,1}, ::Array{Any,1}, ::Int64, ::typeof(update_Q), ::Int64, ::Int64, ::Array{Any,1}, ::Array{Float64,3}, ::Int64, ::Int64)

In [22]:

n_agents = 10
Q_tables = [deepcopy(Q_tables[1]) for i in range(1,n_agents)]
policies = [deepcopy(policies[1]) for i in range(1,n_agents)]
mdps = [deepcopy(mdps[1]) for i in range(1,n_agents)]
N_tables = zeros((n_agents, n_states+2, n_actions))
run_chain!(policies, mdps, update_Q, n_agents,  n_states, Q_tables, N_tables, epochs, H)

MethodError: MethodError: no method matching run_chain!(::Array{getfield(Main, Symbol("##32#33")){getfield(Main, Symbol("##32#33")){getfield(Main, Symbol("##32#33")){getfield(Main, Symbol("##32#33")){typeof(ucb_pol),Array{Any,1}},Array{Float64,3}},Int64},Array{Int64,1}},1}, ::Array{ChainMDP,1}, ::typeof(update_Q), ::Int64, ::Int64, ::Array{Dict{Int32,Dict{Int32,Float32}},1}, ::Array{Float64,3}, ::Int64, ::Int64)

In [23]:
# paramter for seed sampling
using Distributions
theta = 10 * sign(rand(Bernoulli(0.5))-.5)
ntheta = - theta
print("theta: $theta, ntheta: $ntheta")
# Thompson sampling
#Should just be seed sampling, but you do it every step

theta: -10.0, ntheta: 10.0

In [24]:

using POMDPs
include("./ChainMDP.jl")
num_states = 10

mdp = PFChainMDP.PChainMDP(num_states+2,.9,.9, 10)
POMDPs.reward(mdp, 1, :left)

-1

In [25]:
function update_Q(Q_table, s, a, r, sp, t, rev_action_map)
    alpha=.95
    gamma=.95
    # print("s:$s, a:$a, sp:$sp, r:$r")
    Q_table[s][rev_action_map[a]] +=  alpha * (r + gamma * 
        findmax(Q_table[sp])[1] - Q_table[s][rev_action_map[a]])
    #println(Q_table)
end

update_Q (generic function with 2 methods)

In [26]:
mdps = [deepcopy(mdp) for i in 1:2]
isequal(mdps[1], mdps[2])

mdp1 = PFChainMDP.PChainMDP(num_states+2,.9,.9, 10)

mdp2 = PFChainMDP.PChainMDP(num_states+2,.9,.9, 10)
mdp1 === mdp2

true

In [27]:

using DataFrames
function get_average_regret(df)
    num_states = 20
    R = (num_states ) / 2 
    max_theta_state = 2
    #df2 = groupby(groupby(df, :epoch), :agent)
    # Think best way is to just post process to fix it up.
    # find the last state for each epoch for each agent and then fill it out by appending rows
    dfa = deepcopy(df)
    df2 = by(df, [:run, :epoch, :agent], max_t = [:time] => x -> maximum(x.time))
    for row in eachrow(df2)
        state = df[(df[:run].==row.run).&(df[:epoch].==row.epoch).&(
                df[:agent].==row.agent).&(df[:time].==row.max_t),:].state[1]
        #println(state)
        new_time = row.max_t[1]
        new_state = state[1]
        if state == num_states - 1
            continue
        end
        for i in state:max_theta_state
            #println("$state, $max_theta_state, $i")
            new_time += 1
            new_state -=1
            new_row = DataFrame(run = row.run, epoch = row.epoch, agent= row.agent,
                   time = new_time, state = new_state, reward = (new_state == 2) ? 10 : -1)
            append!(dfa,new_row)
        end
    end

    dfa
    #print(dfa)
    df2 = by(dfa, [:run, :epoch, :agent], Regret = [:state, :reward] =>  x ->   R  - sum( x.reward))
    print(df2)
    # df3 = by(df2, [:agent], AverageRegret = [:Regret ] => mean)
    # df3
    #aggregate(df, :Species, [sum, mean])
    #average regret
    mean(df2.Regret)
end

get_average_regret (generic function with 1 method)

In [28]:
# UCB
using Distributions
include("./Agent.jl")
num_agents = 5
agents = Any[]
num_states = 20
num_actions = 2
epochs = 30
H = floor(Int, 3 * num_states / 2)
actions = [1, 2]
action_map = Dict(1 => :left, :2 => :right)
rev_action_map = Dict(:left => 1, :right => 2)
states = 1:(num_states+2)

theta = 10
# Setup Q table according to MDP



#setup
#agmdp = PFChainMDP.PChainMDP(num_states+2,1.0,.9, theta * sign(rand(Bernoulli(0.5))-.5))
(Q_tables, N_tables, policies) = PFAgent.setup_agents(states, num_states, num_agents,
                                                            actions, num_actions, ucb_pol)
true_mdp = PFChainMDP.PChainMDP(num_states+2,1.0,.9, theta)

function chain_found_target(r)
   if r == -10 || r == 10
        return true
   end
   return false
end

PFAgent.run_chain!(
           policies=policies,
           found_target=chain_found_target,
           mdps=[deepcopy(true_mdp) for i in 1:num_agents],
           update_Q=update_Q,
           n_agents=num_agents,
           n_states=num_states,
           Q_tables=Q_tables,
           N_tables=N_tables,
           epochs=epochs,
           steps=H,
           rev_action_map=rev_action_map,
           stop_early=false)
print(Q_tables)

setup agents
agent 1 is done
agent 1 is done
agent 2 is done
agent 1 is done
agent 2 is done
agent 3 is done
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
agent 1 is done
agent 1 is done
agent 2 is done
agent 1 is done
agent 2 is done
agent 1 is done
agent 2 is done
agent 3 is done
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
e: 3, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 3, t: 2, agent 1, result: (s = 13, a = :left, r = -1, sp = 12, t = 2)
e: 3, t: 1, agent 2, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 3, t: 3, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 3)
e: 3, t: 2, agent 2, result: (s = 13, a = :left, r = -1, sp = 12, t = 2)
e: 3, t: 1, agent 3, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 3, t: 4, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 4)
e: 3, t: 3, agent 2, result: (s = 12, a = :right, r = -1, sp = 13, t = 3)
e: 3, t: 2, agent 3, result: (s = 13, a = :ri

e: 3, t: 28, agent 3, result: (s = 9, a = :right, r = -1, sp = 10, t = 28)
e: 3, t: 27, agent 4, result: (s = 16, a = :right, r = -1, sp = 17, t = 27)
e: 3, t: 26, agent 5, result: (s = 17, a = :right, r = -1, sp = 18, t = 26)
agent 1 is done
e: 3, t: 30, agent 2, result: (s = 17, a = :left, r = -1, sp = 16, t = 30)
e: 3, t: 29, agent 3, result: (s = 10, a = :right, r = -1, sp = 11, t = 29)
e: 3, t: 28, agent 4, result: (s = 17, a = :left, r = -1, sp = 16, t = 28)
e: 3, t: 27, agent 5, result: (s = 18, a = :left, r = -1, sp = 17, t = 27)
agent 1 is done
agent 2 is done
e: 3, t: 30, agent 3, result: (s = 11, a = :right, r = -1, sp = 12, t = 30)
e: 3, t: 29, agent 4, result: (s = 16, a = :left, r = -1, sp = 15, t = 29)
e: 3, t: 28, agent 5, result: (s = 17, a = :left, r = -1, sp = 16, t = 28)
agent 1 is done
agent 2 is done
agent 3 is done
e: 3, t: 30, agent 4, result: (s = 15, a = :left, r = -1, sp = 14, t = 30)
e: 3, t: 29, agent 5, result: (s = 16, a = :left, r = -1, sp = 15, t = 29)


e: 6, t: 24, agent 4, result: (s = 15, a = :left, r = -1, sp = 14, t = 24)
e: 6, t: 23, agent 5, result: (s = 18, a = :right, r = -1, sp = 19, t = 23)
agent 1 is done
e: 6, t: 27, agent 2, result: (s = 6, a = :right, r = -1, sp = 7, t = 27)
e: 6, t: 26, agent 3, result: (s = 7, a = :right, r = -1, sp = 8, t = 26)
e: 6, t: 25, agent 4, result: (s = 14, a = :left, r = -1, sp = 13, t = 25)
e: 6, t: 24, agent 5, result: (s = 19, a = :right, r = -1, sp = 20, t = 24)
agent 1 is done
e: 6, t: 28, agent 2, result: (s = 7, a = :right, r = -1, sp = 8, t = 28)
e: 6, t: 27, agent 3, result: (s = 8, a = :left, r = -1, sp = 7, t = 27)
e: 6, t: 26, agent 4, result: (s = 13, a = :left, r = -1, sp = 12, t = 26)
e: 6, t: 25, agent 5, result: (s = 20, a = :right, r = -1, sp = 21, t = 25)
agent 1 is done
e: 6, t: 29, agent 2, result: (s = 8, a = :left, r = -1, sp = 7, t = 29)
e: 6, t: 28, agent 3, result: (s = 7, a = :left, r = -1, sp = 6, t = 28)
e: 6, t: 27, agent 4, result: (s = 12, a = :left, r = -1, 

e: 9, t: 20, agent 5, result: (s = 11, a = :right, r = -1, sp = 12, t = 20)
e: 9, t: 25, agent 1, result: (s = 8, a = :left, r = -1, sp = 7, t = 25)
e: 9, t: 24, agent 2, result: (s = 7, a = :left, r = -1, sp = 6, t = 24)
e: 9, t: 23, agent 3, result: (s = 8, a = :right, r = -1, sp = 9, t = 23)
e: 9, t: 22, agent 4, result: (s = 7, a = :right, r = -1, sp = 8, t = 22)
e: 9, t: 21, agent 5, result: (s = 12, a = :left, r = -1, sp = 11, t = 21)
e: 9, t: 26, agent 1, result: (s = 7, a = :left, r = -1, sp = 6, t = 26)
e: 9, t: 25, agent 2, result: (s = 6, a = :left, r = -1, sp = 5, t = 25)
e: 9, t: 24, agent 3, result: (s = 9, a = :right, r = -1, sp = 10, t = 24)
e: 9, t: 23, agent 4, result: (s = 8, a = :right, r = -1, sp = 9, t = 23)
e: 9, t: 22, agent 5, result: (s = 11, a = :left, r = -1, sp = 10, t = 22)
e: 9, t: 27, agent 1, result: (s = 6, a = :left, r = -1, sp = 5, t = 27)
e: 9, t: 26, agent 2, result: (s = 5, a = :left, r = -1, sp = 4, t = 26)
e: 9, t: 25, agent 3, result: (s = 10, 

e: 12, t: 18, agent 3, result: (s = 5, a = :left, r = -1, sp = 4, t = 18)
e: 12, t: 17, agent 4, result: (s = 6, a = :right, r = -1, sp = 7, t = 17)
e: 12, t: 16, agent 5, result: (s = 9, a = :left, r = -1, sp = 8, t = 16)
e: 12, t: 21, agent 1, result: (s = 16, a = :left, r = -1, sp = 15, t = 21)
e: 12, t: 20, agent 2, result: (s = 5, a = :left, r = -1, sp = 4, t = 20)
e: 12, t: 19, agent 3, result: (s = 4, a = :left, r = -1, sp = 3, t = 19)
e: 12, t: 18, agent 4, result: (s = 7, a = :left, r = -1, sp = 6, t = 18)
e: 12, t: 17, agent 5, result: (s = 8, a = :left, r = -1, sp = 7, t = 17)
e: 12, t: 22, agent 1, result: (s = 15, a = :left, r = -1, sp = 14, t = 22)
e: 12, t: 21, agent 2, result: (s = 4, a = :left, r = -1, sp = 3, t = 21)
e: 12, t: 20, agent 3, result: (s = 3, a = :right, r = -1, sp = 4, t = 20)
e: 12, t: 19, agent 4, result: (s = 6, a = :left, r = -1, sp = 5, t = 19)
e: 12, t: 18, agent 5, result: (s = 7, a = :right, r = -1, sp = 8, t = 18)
e: 12, t: 23, agent 1, result: 

e: 15, t: 17, agent 2, result: (s = 10, a = :right, r = -1, sp = 11, t = 17)
e: 15, t: 16, agent 3, result: (s = 11, a = :left, r = -1, sp = 10, t = 16)
e: 15, t: 15, agent 4, result: (s = 20, a = :left, r = -1, sp = 19, t = 15)
e: 15, t: 14, agent 5, result: (s = 7, a = :left, r = -1, sp = 6, t = 14)
e: 15, t: 19, agent 1, result: (s = 14, a = :left, r = -1, sp = 13, t = 19)
e: 15, t: 18, agent 2, result: (s = 11, a = :left, r = -1, sp = 10, t = 18)
e: 15, t: 17, agent 3, result: (s = 10, a = :left, r = -1, sp = 9, t = 17)
e: 15, t: 16, agent 4, result: (s = 19, a = :left, r = -1, sp = 18, t = 16)
e: 15, t: 15, agent 5, result: (s = 6, a = :right, r = -1, sp = 7, t = 15)
e: 15, t: 20, agent 1, result: (s = 13, a = :left, r = -1, sp = 12, t = 20)
e: 15, t: 19, agent 2, result: (s = 10, a = :left, r = -1, sp = 9, t = 19)
e: 15, t: 18, agent 3, result: (s = 9, a = :right, r = -1, sp = 10, t = 18)
e: 15, t: 17, agent 4, result: (s = 18, a = :left, r = -1, sp = 17, t = 17)
e: 15, t: 16, ag

e: 18, t: 9, agent 5, result: (s = 12, a = :left, r = -1, sp = 11, t = 9)
e: 18, t: 14, agent 1, result: (s = 9, a = :right, r = -1, sp = 10, t = 14)
e: 18, t: 13, agent 2, result: (s = 10, a = :left, r = -1, sp = 9, t = 13)
e: 18, t: 12, agent 3, result: (s = 11, a = :right, r = -1, sp = 12, t = 12)
e: 18, t: 11, agent 4, result: (s = 10, a = :left, r = -1, sp = 9, t = 11)
e: 18, t: 10, agent 5, result: (s = 11, a = :right, r = -1, sp = 12, t = 10)
e: 18, t: 15, agent 1, result: (s = 10, a = :right, r = -1, sp = 11, t = 15)
e: 18, t: 14, agent 2, result: (s = 9, a = :left, r = -1, sp = 8, t = 14)
e: 18, t: 13, agent 3, result: (s = 12, a = :right, r = -1, sp = 13, t = 13)
e: 18, t: 12, agent 4, result: (s = 9, a = :right, r = -1, sp = 10, t = 12)
e: 18, t: 11, agent 5, result: (s = 12, a = :left, r = -1, sp = 11, t = 11)
e: 18, t: 16, agent 1, result: (s = 11, a = :right, r = -1, sp = 12, t = 16)
e: 18, t: 15, agent 2, result: (s = 8, a = :left, r = -1, sp = 7, t = 15)
e: 18, t: 14, a

e: 21, t: 5, agent 5, result: (s = 8, a = :left, r = -1, sp = 7, t = 5)
e: 21, t: 10, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 10)
e: 21, t: 9, agent 2, result: (s = 8, a = :left, r = -1, sp = 7, t = 9)
e: 21, t: 8, agent 3, result: (s = 13, a = :left, r = -1, sp = 12, t = 8)
e: 21, t: 7, agent 4, result: (s = 14, a = :left, r = -1, sp = 13, t = 7)
e: 21, t: 6, agent 5, result: (s = 7, a = :right, r = -1, sp = 8, t = 6)
e: 21, t: 11, agent 1, result: (s = 14, a = :left, r = -1, sp = 13, t = 11)
e: 21, t: 10, agent 2, result: (s = 7, a = :left, r = -1, sp = 6, t = 10)
e: 21, t: 9, agent 3, result: (s = 12, a = :left, r = -1, sp = 11, t = 9)
e: 21, t: 8, agent 4, result: (s = 13, a = :left, r = -1, sp = 12, t = 8)
e: 21, t: 7, agent 5, result: (s = 8, a = :right, r = -1, sp = 9, t = 7)
e: 21, t: 12, agent 1, result: (s = 13, a = :left, r = -1, sp = 12, t = 12)
e: 21, t: 11, agent 2, result: (s = 6, a = :left, r = -1, sp = 5, t = 11)
e: 21, t: 10, agent 3, result: (s = 1

e: 21, t: 30, agent 4, result: (s = 15, a = :left, r = -1, sp = 14, t = 30)
e: 21, t: 29, agent 5, result: (s = 4, a = :right, r = -1, sp = 5, t = 29)
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
e: 21, t: 30, agent 5, result: (s = 5, a = :right, r = -1, sp = 6, t = 30)
agent 4 is done
agent 3 is done
agent 4 is done
agent 3 is done
agent 4 is done
agent 3 is done
agent 4 is done
agent 3 is done
agent 4 is done
agent 3 is done
agent 4 is done
agent 3 is done
agent 4 is done
agent 3 is done
agent 4 is done
agent 3 is done
agent 4 is done
agent 3 is done
agent 4 is done
agent 1 is done
agent 3 is done
agent 4 is done
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
agent 1 is done
agent 1 is done
agent 2 is done
agent 1 is done
agent 2 is done
agent 3 is done
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
e: 24,

e: 24, t: 27, agent 4, result: (s = 14, a = :right, r = -1, sp = 15, t = 27)
e: 24, t: 26, agent 5, result: (s = 9, a = :left, r = -1, sp = 8, t = 26)
agent 1 is done
e: 24, t: 30, agent 2, result: (s = 11, a = :left, r = -1, sp = 10, t = 30)
e: 24, t: 29, agent 3, result: (s = 10, a = :right, r = -1, sp = 11, t = 29)
e: 24, t: 28, agent 4, result: (s = 15, a = :left, r = -1, sp = 14, t = 28)
e: 24, t: 27, agent 5, result: (s = 8, a = :left, r = -1, sp = 7, t = 27)
agent 1 is done
agent 2 is done
e: 24, t: 30, agent 3, result: (s = 11, a = :left, r = -1, sp = 10, t = 30)
e: 24, t: 29, agent 4, result: (s = 14, a = :left, r = -1, sp = 13, t = 29)
e: 24, t: 28, agent 5, result: (s = 7, a = :right, r = -1, sp = 8, t = 28)
agent 1 is done
agent 2 is done
agent 3 is done
e: 24, t: 30, agent 4, result: (s = 13, a = :left, r = -1, sp = 12, t = 30)
e: 24, t: 29, agent 5, result: (s = 8, a = :left, r = -1, sp = 7, t = 29)
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
e: 24, t:

e: 27, t: 25, agent 3, result: (s = 8, a = :left, r = -1, sp = 7, t = 25)
e: 27, t: 24, agent 4, result: (s = 3, a = :left, r = -1, sp = 2, t = 24)
e: 27, t: 23, agent 5, result: (s = 8, a = :right, r = -1, sp = 9, t = 23)
agent 1 is done
e: 27, t: 27, agent 2, result: (s = 6, a = :left, r = -1, sp = 5, t = 27)
e: 27, t: 26, agent 3, result: (s = 7, a = :left, r = -1, sp = 6, t = 26)
e: 27, t: 25, agent 4, result: (s = 2, a = :right, r = 10, sp = 1, t = 25)
e: 27, t: 24, agent 5, result: (s = 9, a = :right, r = -1, sp = 10, t = 24)
agent 1 is done
e: 27, t: 28, agent 2, result: (s = 5, a = :left, r = -1, sp = 4, t = 28)
e: 27, t: 27, agent 3, result: (s = 6, a = :left, r = -1, sp = 5, t = 27)
agent 4 is done
e: 27, t: 25, agent 5, result: (s = 10, a = :right, r = -1, sp = 11, t = 25)
agent 1 is done
e: 27, t: 29, agent 2, result: (s = 4, a = :left, r = -1, sp = 3, t = 29)
e: 27, t: 28, agent 3, result: (s = 5, a = :right, r = -1, sp = 6, t = 28)
agent 4 is done
e: 27, t: 26, agent 5, r

e: 30, t: 24, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 24)
e: 30, t: 23, agent 2, result: (s = 12, a = :left, r = -1, sp = 11, t = 23)
e: 30, t: 22, agent 3, result: (s = 13, a = :left, r = -1, sp = 12, t = 22)
e: 30, t: 21, agent 4, result: (s = 10, a = :left, r = -1, sp = 9, t = 21)
e: 30, t: 20, agent 5, result: (s = 11, a = :right, r = -1, sp = 12, t = 20)
e: 30, t: 25, agent 1, result: (s = 10, a = :left, r = -1, sp = 9, t = 25)
e: 30, t: 24, agent 2, result: (s = 11, a = :left, r = -1, sp = 10, t = 24)
e: 30, t: 23, agent 3, result: (s = 12, a = :left, r = -1, sp = 11, t = 23)
e: 30, t: 22, agent 4, result: (s = 9, a = :right, r = -1, sp = 10, t = 22)
e: 30, t: 21, agent 5, result: (s = 12, a = :right, r = -1, sp = 13, t = 21)
e: 30, t: 26, agent 1, result: (s = 9, a = :left, r = -1, sp = 8, t = 26)
e: 30, t: 25, agent 2, result: (s = 10, a = :right, r = -1, sp = 11, t = 25)
e: 30, t: 24, agent 3, result: (s = 11, a = :left, r = -1, sp = 10, t = 24)
e: 30, t: 23,

In [29]:

include("./Agent.jl")
# Base setup 

# Solution

# solve mdp in both directions
# Then each agent will pick on of the policies.
# common
num_agents = 1
agents = Any[]
num_states = 20
num_actions = 2
epochs = 10000
H = floor(Int, 3 * num_states / 2)
actions = [1, 2]
action_map = Dict(1 => :left, :2 => :right)
rev_action_map = Dict(:left => 1, :right => 2)
states = 1:(num_states+2)


theta = 10


(Q_tables1, N_tables1, policies1) = PFAgent.setup_agents(states, num_states, num_agents,
                                                            actions, num_actions, ucb_pol)

true_mdp = PFChainMDP.PChainMDP(num_states+2,1.0,.99, theta)
PFAgent.run_chain!(policies=policies1,
           found_target=chain_found_target,
           mdps=[true_mdp],
           update_Q=update_Q,
           n_agents=num_agents,
           n_states=num_states,
           Q_tables=Q_tables1,
           N_tables=N_tables1,
           epochs=epochs,
           steps=H,
           rev_action_map=rev_action_map,
           stop_early=false)

# theta = -10
using JSON
println("Q1:$(json(Q_tables1,2))")
println("N1:$(json(N_tables1,2))")

true_mdp2 = PFChainMDP.PChainMDP(num_states+2,1.0,.99, -theta)
(Q_tables2, N_tables2, policies2) = PFAgent.setup_agents(states, num_states, num_agents,
                                                            actions, num_actions, ucb_pol)
PFAgent.run_chain!(policies=policies2,
           found_target=chain_found_target,
           mdps=[true_mdp2],
           update_Q=update_Q,
           n_agents=num_agents,
           n_states=num_states,
           Q_tables=Q_tables2,
           N_tables=N_tables2,
           epochs=epochs,
           steps=H,
           rev_action_map=rev_action_map,
           stop_early=false)



println("Q2: $(json(Q_tables2[1],2))")
println("N2: $(json(N_tables2[1],2))")

setup agents




e: 1000, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1000, t: 2, agent 1, result: (s = 13, a = :left, r = -1, sp = 12, t = 2)
e: 1000, t: 3, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 3)
e: 1000, t: 4, agent 1, result: (s = 11, a = :right, r = -1, sp = 12, t = 4)
e: 1000, t: 5, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 5)
e: 1000, t: 6, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 6)
e: 1000, t: 7, agent 1, result: (s = 10, a = :right, r = -1, sp = 11, t = 7)
e: 1000, t: 8, agent 1, result: (s = 11, a = :right, r = -1, sp = 12, t = 8)
e: 1000, t: 9, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 9)
e: 1000, t: 10, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 10)
e: 1000, t: 11, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 11)
e: 1000, t: 12, agent 1, result: (s = 15, a = :right, r = -1, sp = 16, t = 12)
e: 1000, t: 13, agent 1, result: (s = 16, a = :left, r = -1, sp = 15, t = 

e: 6000, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 6000, t: 2, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 6000, t: 3, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 6000, t: 4, agent 1, result: (s = 15, a = :left, r = -1, sp = 14, t = 4)
e: 6000, t: 5, agent 1, result: (s = 14, a = :left, r = -1, sp = 13, t = 5)
e: 6000, t: 6, agent 1, result: (s = 13, a = :left, r = -1, sp = 12, t = 6)
e: 6000, t: 7, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 7)
e: 6000, t: 8, agent 1, result: (s = 11, a = :right, r = -1, sp = 12, t = 8)
e: 6000, t: 9, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 9)
e: 6000, t: 10, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 10)
e: 6000, t: 11, agent 1, result: (s = 14, a = :left, r = -1, sp = 13, t = 11)
e: 6000, t: 12, agent 1, result: (s = 13, a = :left, r = -1, sp = 12, t = 12)
e: 6000, t: 13, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 13

Q1:[
  {
    "18": {
      "2": -8.083571,
      "1": -6.7962008
    },
    "2": {
      "2": 10.0,
      "1": 10.0
    },
    "16": {
      "2": -6.7962008,
      "1": -5.3697515
    },
    "11": {
      "2": -2.9359977,
      "1": -1.0925183
    },
    "21": {
      "2": -10.0,
      "1": -10.0
    },
    "7": {
      "2": 0.95011824,
      "1": 3.2134275
    },
    "9": {
      "2": -1.0925183,
      "1": 0.95011824
    },
    "10": {
      "2": -2.0378923,
      "1": -0.09738767
    },
    "19": {
      "2": -8.679393,
      "1": -7.456391
    },
    "17": {
      "2": -7.456391,
      "1": -6.101264
    },
    "8": {
      "2": -0.09738767,
      "1": 2.052756
    },
    "22": {
      "2": 0.0,
      "1": 0.0
    },
    "6": {
      "2": 2.052756,
      "1": 4.435187
    },
    "4": {
      "2": 4.435187,
      "1": 7.075
    },
    "3": {
      "2": 5.7212496,
      "1": 8.5
    },
    "5": {
      "2": 3.2134275,
      "1": 5.7212496
    },
    "20": {
      "2": -10.5,
      "1

e: 5000, t: 1, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 5000, t: 2, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 5000, t: 3, agent 1, result: (s = 10, a = :right, r = -1, sp = 11, t = 3)
e: 5000, t: 4, agent 1, result: (s = 11, a = :right, r = -1, sp = 12, t = 4)
e: 5000, t: 5, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 5)
e: 5000, t: 6, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 6)
e: 5000, t: 7, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 7)
e: 5000, t: 8, agent 1, result: (s = 15, a = :right, r = -1, sp = 16, t = 8)
e: 5000, t: 9, agent 1, result: (s = 16, a = :right, r = -1, sp = 17, t = 9)
e: 5000, t: 10, agent 1, result: (s = 17, a = :right, r = -1, sp = 18, t = 10)
e: 5000, t: 11, agent 1, result: (s = 18, a = :right, r = -1, sp = 19, t = 11)
e: 5000, t: 12, agent 1, result: (s = 19, a = :right, r = -1, sp = 20, t = 12)
e: 5000, t: 13, agent 1, result: (s = 20, a = :right, r = -1, sp = 21, t

e: 10000, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 10000, t: 2, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 10000, t: 3, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 10000, t: 4, agent 1, result: (s = 15, a = :left, r = -1, sp = 14, t = 4)
e: 10000, t: 5, agent 1, result: (s = 14, a = :left, r = -1, sp = 13, t = 5)
e: 10000, t: 6, agent 1, result: (s = 13, a = :left, r = -1, sp = 12, t = 6)
e: 10000, t: 7, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 7)
e: 10000, t: 8, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 8)
e: 10000, t: 9, agent 1, result: (s = 10, a = :left, r = -1, sp = 9, t = 9)
e: 10000, t: 10, agent 1, result: (s = 9, a = :right, r = -1, sp = 10, t = 10)
e: 10000, t: 11, agent 1, result: (s = 10, a = :right, r = -1, sp = 11, t = 11)
e: 10000, t: 12, agent 1, result: (s = 11, a = :right, r = -1, sp = 12, t = 12)
e: 10000, t: 13, agent 1, result: (s = 12, a = :right, r = -1, sp 

In [30]:
#Now we run with that policy
include("./Agent.jl")
function run_ucb_chain_simulations(nruns, Q_tables1, Q_tables2, N_tables1, N_tables2, num_agents)
    runs = []
    for i in 1:nruns
        agents = Any[]
        num_states = 20
        num_actions = 2
        epochs=1
        ucb_policies = []
        H = floor(Int, 3 * num_states / 2)
        actions = [1, 2]
        action_map = Dict(1 => :left, :2 => :right)
        rev_action_map = Dict(:left => 1, :right => 2)
        states = 1:(num_states+2)
        #print("Before q's")
        true_mdp = PFChainMDP.PChainMDP(num_states+2,1.0,.9, 10 * sign(rand(Bernoulli(0.5))-.5))
#         Q_tables_ucb = Dict(1 => Q_tables1[1])
#         N_tables_ucb = Dict(1 => N_tables1[1])
        Q_tables_list = [Q_tables1[1], Q_tables2[1]]
        N_tables_list = [N_tables1[1], N_tables2[1]]
        Q_tables_ucb = Dict(i => Q_tables_list[rand(1:length(Q_tables_list))] for i in 1:num_agents)
        N_tables_ucb = Dict(i => N_tables_list[rand(1:length(N_tables_list))] for i in 1:num_agents)
        #println("Q_T: $Q_tables_thomp")
        #println("N_T: $N_tables_thomp")
        for ag in 1:num_agents
          #push!(ucb_policies, curry(curry(curry(curry(ucb_pol, Q_tables_ucb), N_tables_ucb),1), actions))

          push!(ucb_policies, curry(curry(curry(curry(q_lookup_pol, Q_tables_ucb), N_tables_ucb),1), actions))

        end
        # push!(thomp_policies, curry(curry(curry(curry(thomp_pol_func, Q_tables_thomp), N_tables_thomp),0), actions))
        #println("after policies")
        (Q_tables3, N_tables3, trash) = PFAgent.setup_agents(states, num_states, num_agents,
                                                                    actions, num_actions, ucb_pol)

        mdps = [deepcopy(true_mdp) for ag in 1:num_agents]
        r_history = PFAgent.run_chain!(
                   policies=ucb_policies,
                   found_target=chain_found_target,
                   mdps=mdps,
                   update_Q=update_Q,
                   n_agents=num_agents,
                   n_states=num_states,
                   Q_tables=deepcopy(Q_tables_ucb),
                   N_tables=deepcopy(N_tables_ucb),
                   epochs=epochs,
                   steps=H,
                   rev_action_map=rev_action_map,
                   stop_early=true)

        R = (num_states - 2) / 2
        #println([r for (e,i,t,r) in r_history if i == 1])
        #println([r for (e,i,t,r) in r_history if i == 2])
        for (e, ag, t, st, r)  in r_history
          push!(runs, (i, e, ag, t, st, r)) 
        end
    end
    return runs
end



run_ucb_chain_simulations (generic function with 1 method)

In [31]:
num_agents = 1
results = run_ucb_chain_simulations(30, Q_tables1, Q_tables2, N_tables1, N_tables2, num_agents)
using CSV
using DataFrames
df_ucb = DataFrame(run = [x[1] for x in results], epoch = [x[2] for x in results], agent=[x[3] for x in results],
               time = [x[4] for x in results], state = [x[5] for x in results], reward = [x[6] for x in results])


setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 2, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 3, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 4, agent 1, result: (s = 15, a = :right, r = -1, sp = 16, t = 4)
e: 1, t: 5, agent 1, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 6, agent 1, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 7, agent 1, result: (s = 18, a = :right, r = -1, sp = 19, t = 7)
e: 1, t: 8, agent 1, result: (s = 19, a = :right, r = -1, sp = 20, t = 8)
e: 1, t: 9, agent 1, result: (s = 20, a = :right, r = -1, sp = 21, t = 9)
e: 1, t: 10, agent 1, result: (s = 21, a = :right, r = -10, sp = 22, t = 10)
setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 2, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 3, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 

e: 1, t: 5, agent 1, result: (s = 8, a = :left, r = -1, sp = 7, t = 5)
e: 1, t: 6, agent 1, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 7, agent 1, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 8, agent 1, result: (s = 5, a = :left, r = -1, sp = 4, t = 8)
e: 1, t: 9, agent 1, result: (s = 4, a = :left, r = -1, sp = 3, t = 9)
e: 1, t: 10, agent 1, result: (s = 3, a = :left, r = -1, sp = 2, t = 10)
e: 1, t: 11, agent 1, result: (s = 2, a = :right, r = -10, sp = 1, t = 11)
setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 2, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 3, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 4, agent 1, result: (s = 15, a = :right, r = -1, sp = 16, t = 4)
e: 1, t: 5, agent 1, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 6, agent 1, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 7, agent 1, result: (s = 18, a 

e: 1, t: 6, agent 1, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 7, agent 1, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 8, agent 1, result: (s = 5, a = :left, r = -1, sp = 4, t = 8)
e: 1, t: 9, agent 1, result: (s = 4, a = :left, r = -1, sp = 3, t = 9)
e: 1, t: 10, agent 1, result: (s = 3, a = :left, r = -1, sp = 2, t = 10)
e: 1, t: 11, agent 1, result: (s = 2, a = :right, r = 10, sp = 1, t = 11)
setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 2, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 3, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 4, agent 1, result: (s = 15, a = :right, r = -1, sp = 16, t = 4)
e: 1, t: 5, agent 1, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 6, agent 1, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 7, agent 1, result: (s = 18, a = :right, r = -1, sp = 19, t = 7)
e: 1, t: 8, agent 1, result: (s = 19, 

┌ Info: Recompiling stale cache file /Users/efan/.julia/compiled/v1.0/CSV/HHBkp.ji for CSV [336ed68f-0bac-5ca0-87d4-7b16caf5d00b]
└ @ Base loading.jl:1184


Unnamed: 0_level_0,run,epoch,agent,time,state,reward
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64,Int64
1,1,1,1,1,12,-1
2,1,1,1,2,13,-1
3,1,1,1,3,14,-1
4,1,1,1,4,15,-1
5,1,1,1,5,16,-1
6,1,1,1,6,17,-1
7,1,1,1,7,18,-1
8,1,1,1,8,19,-1
9,1,1,1,9,20,-1
10,1,1,1,10,21,-10


In [32]:

get_average_regret(df_ucb)

30×4 DataFrame
│ Row │ run   │ epoch │ agent │ Regret  │
│     │ [90mInt64[39m │ [90mInt64[39m │ [90mInt64[39m │ [90mFloat64[39m │
├─────┼───────┼───────┼───────┼─────────┤
│ 1   │ 1     │ 1     │ 1     │ 29.0    │
│ 2   │ 2     │ 1     │ 1     │ 29.0    │
│ 3   │ 3     │ 1     │ 1     │ 31.0    │
│ 4   │ 4     │ 1     │ 1     │ 29.0    │
│ 5   │ 5     │ 1     │ 1     │ 29.0    │
│ 6   │ 6     │ 1     │ 1     │ 9.0     │
│ 7   │ 7     │ 1     │ 1     │ 11.0    │
│ 8   │ 8     │ 1     │ 1     │ 11.0    │
│ 9   │ 9     │ 1     │ 1     │ 31.0    │
│ 10  │ 10    │ 1     │ 1     │ 9.0     │
│ 11  │ 11    │ 1     │ 1     │ 9.0     │
│ 12  │ 12    │ 1     │ 1     │ 29.0    │
│ 13  │ 13    │ 1     │ 1     │ 9.0     │
│ 14  │ 14    │ 1     │ 1     │ 31.0    │
│ 15  │ 15    │ 1     │ 1     │ 29.0    │
│ 16  │ 16    │ 1     │ 1     │ 31.0    │
│ 17  │ 17    │ 1     │ 1     │ 29.0    │
│ 18  │ 18    │ 1     │ 1     │ 31.0    │
│ 19  │ 19    │ 1     │ 1     │ 11.0    │
│ 20  │ 20    │ 1    

19.266666666666666

In [33]:
# Now we need a policy which randomly picks between the 2
function thomp_pol_func(Q_tables, N_tables, i, actions, s)
    # passed in i ignored, kept for consistency
    i = rand(1:length(Q_tables))
    #println("i:$i")
    # return 
    if haskey(Q_tables[i], s)
       # println("ucb: $(ucbs)")
       val, idx = findmax(Q_tables[i][s])
       #println("Selected $val, $idx from $(Q_tables[i][s]) for $i, $s")
       #print("$(Q_tables[i][s])")
       return action_map[idx]
    else
       act = action_map[rand(actions, 1)[1]]
       println("random action $act") 
       return act
    end
end

thomp_pol_func (generic function with 1 method)

In [34]:
#Now we run with that policy
include("./Agent.jl")
function run_thompson_chain_simulations(nruns, Q_tables1, Q_tables2, N_tables1, N_tables2, num_agents)
    runs = []
    for i in 1:nruns
        agents = Any[]
        num_states = 20
        num_actions = 2
        epochs=1
        thomp_policies = []
        H = floor(Int, 3 * num_states / 2)
        actions = [1, 2]
        action_map = Dict(1 => :left, :2 => :right)
        rev_action_map = Dict(:left => 1, :right => 2)
        states = 1:(num_states+2)
        #print("Before q's")
        true_mdp = PFChainMDP.PChainMDP(num_states+2,1.0,.9, theta)
        Q_tables_thomp = Dict(1 => Q_tables1[1], 2 => Q_tables2[1])
        N_tables_thomp = Dict(1 => N_tables1[1], 2 => N_tables2[1])
        #println("Q_T: $Q_tables_thomp")
        #println("N_T: $N_tables_thomp")
        for ag in 1:num_agents
          push!(thomp_policies, curry(curry(curry(curry(thomp_pol_func, Q_tables_thomp), N_tables_thomp),0), actions))
        end
        # push!(thomp_policies, curry(curry(curry(curry(thomp_pol_func, Q_tables_thomp), N_tables_thomp),0), actions))
        #println("after policies")
        (Q_tables3, N_tables3, trash) = PFAgent.setup_agents(states, num_states, num_agents,
                                                                    actions, num_actions, ucb_pol)

        mdps = [deepcopy(true_mdp) for ag in 1:num_agents]
        r_history = PFAgent.run_chain!(
                   policies=thomp_policies,
                   found_target=chain_found_target,
                   mdps=mdps,
                   update_Q=update_Q,
                   n_agents=num_agents,
                   n_states=num_states,
                   Q_tables=Q_tables3,
                   N_tables=N_tables3,
                   epochs=epochs,
                   steps=H,
                   rev_action_map=rev_action_map,
                   stop_early=true)

        R = (num_states - 2) / 2
        #println([r for (e,i,t,r) in r_history if i == 1])
        #println([r for (e,i,t,r) in r_history if i == 2])
        for (e, ag, t, st, r)  in r_history
          push!(runs, (i, e, ag, t, st, r)) 
        end
    end
    return runs
end



run_thompson_chain_simulations (generic function with 1 method)

In [35]:
num_agents = 10
results = run_thompson_chain_simulations(100, Q_tables1, Q_tables2, N_tables1, N_tables2, num_agents)
using CSV
using DataFrames
df_thomp = DataFrame(run = [x[1] for x in results], epoch = [x[2] for x in results], agent=[x[3] for x in results],
               time = [x[4] for x in results], state = [x[5] for x in results], reward = [x[6] for x in results])
#println(df_thomp)
#df_thomp

setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 2, agent 1, result: (s = 13, a = :left, r = -1, sp = 12, t = 2)
e: 1, t: 1, agent 2, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 3, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 3)
e: 1, t: 2, agent 2, result: (s = 13, a = :left, r = -1, sp = 12, t = 2)
e: 1, t: 1, agent 3, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 4, agent 1, result: (s = 13, a = :left, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 2, result: (s = 12, a = :right, r = -1, sp = 13, t = 3)
e: 1, t: 2, agent 3, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 1, agent 4, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 5, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 5)
e: 1, t: 4, agent 2, result: (s = 13, a = :left, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 3, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 2, agent 4, result: (

e: 1, t: 15, agent 5, result: (s = 18, a = :left, r = -1, sp = 17, t = 15)
e: 1, t: 14, agent 6, result: (s = 11, a = :left, r = -1, sp = 10, t = 14)
e: 1, t: 13, agent 7, result: (s = 18, a = :right, r = -1, sp = 19, t = 13)
e: 1, t: 12, agent 8, result: (s = 13, a = :left, r = -1, sp = 12, t = 12)
e: 1, t: 11, agent 9, result: (s = 14, a = :left, r = -1, sp = 13, t = 11)
e: 1, t: 10, agent 10, result: (s = 7, a = :right, r = -1, sp = 8, t = 10)
e: 1, t: 20, agent 1, result: (s = 17, a = :right, r = -1, sp = 18, t = 20)
e: 1, t: 19, agent 2, result: (s = 10, a = :right, r = -1, sp = 11, t = 19)
e: 1, t: 18, agent 3, result: (s = 15, a = :right, r = -1, sp = 16, t = 18)
e: 1, t: 17, agent 4, result: (s = 14, a = :right, r = -1, sp = 15, t = 17)
e: 1, t: 16, agent 5, result: (s = 17, a = :left, r = -1, sp = 16, t = 16)
e: 1, t: 15, agent 6, result: (s = 10, a = :right, r = -1, sp = 11, t = 15)
e: 1, t: 14, agent 7, result: (s = 19, a = :right, r = -1, sp = 20, t = 14)
e: 1, t: 13, agent

e: 1, t: 6, agent 3, result: (s = 11, a = :right, r = -1, sp = 12, t = 6)
e: 1, t: 5, agent 4, result: (s = 12, a = :right, r = -1, sp = 13, t = 5)
e: 1, t: 4, agent 5, result: (s = 13, a = :left, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 6, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 2, agent 7, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 8, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 9, agent 1, result: (s = 10, a = :right, r = -1, sp = 11, t = 9)
e: 1, t: 8, agent 2, result: (s = 13, a = :left, r = -1, sp = 12, t = 8)
e: 1, t: 7, agent 3, result: (s = 12, a = :left, r = -1, sp = 11, t = 7)
e: 1, t: 6, agent 4, result: (s = 13, a = :left, r = -1, sp = 12, t = 6)
e: 1, t: 5, agent 5, result: (s = 12, a = :right, r = -1, sp = 13, t = 5)
e: 1, t: 4, agent 6, result: (s = 15, a = :right, r = -1, sp = 16, t = 4)
e: 1, t: 3, agent 7, result: (s = 10, a = :right, r = -1, sp = 11, t = 3)
e: 1, t: 2, agent 8, result: (s = 11, a = :l

e: 1, t: 19, agent 4, result: (s = 18, a = :left, r = -1, sp = 17, t = 19)
e: 1, t: 18, agent 5, result: (s = 13, a = :right, r = -1, sp = 14, t = 18)
e: 1, t: 17, agent 6, result: (s = 16, a = :left, r = -1, sp = 15, t = 17)
e: 1, t: 16, agent 7, result: (s = 19, a = :right, r = -1, sp = 20, t = 16)
e: 1, t: 15, agent 8, result: (s = 8, a = :left, r = -1, sp = 7, t = 15)
e: 1, t: 14, agent 9, result: (s = 13, a = :left, r = -1, sp = 12, t = 14)
e: 1, t: 13, agent 10, result: (s = 6, a = :left, r = -1, sp = 5, t = 13)
e: 1, t: 23, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 23)
e: 1, t: 22, agent 2, result: (s = 13, a = :left, r = -1, sp = 12, t = 22)
e: 1, t: 21, agent 3, result: (s = 2, a = :right, r = 10, sp = 1, t = 21)
setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 2, agent 1, result: (s = 11, a = :right, r = -1, sp = 12, t = 2)
e: 1, t: 1, agent 2, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 3, agent 

e: 1, t: 14, agent 5, result: (s = 11, a = :right, r = -1, sp = 12, t = 14)
e: 1, t: 13, agent 6, result: (s = 14, a = :left, r = -1, sp = 13, t = 13)
e: 1, t: 12, agent 7, result: (s = 11, a = :left, r = -1, sp = 10, t = 12)
e: 1, t: 11, agent 8, result: (s = 10, a = :left, r = -1, sp = 9, t = 11)
e: 1, t: 10, agent 9, result: (s = 13, a = :left, r = -1, sp = 12, t = 10)
e: 1, t: 9, agent 10, result: (s = 8, a = :left, r = -1, sp = 7, t = 9)
e: 1, t: 19, agent 1, result: (s = 6, a = :left, r = -1, sp = 5, t = 19)
e: 1, t: 18, agent 2, result: (s = 15, a = :right, r = -1, sp = 16, t = 18)
e: 1, t: 17, agent 3, result: (s = 16, a = :right, r = -1, sp = 17, t = 17)
e: 1, t: 16, agent 4, result: (s = 9, a = :right, r = -1, sp = 10, t = 16)
e: 1, t: 15, agent 5, result: (s = 12, a = :left, r = -1, sp = 11, t = 15)
e: 1, t: 14, agent 6, result: (s = 13, a = :right, r = -1, sp = 14, t = 14)
e: 1, t: 13, agent 7, result: (s = 10, a = :right, r = -1, sp = 11, t = 13)
e: 1, t: 12, agent 8, resu

e: 1, t: 29, agent 4, result: (s = 4, a = :right, r = -1, sp = 5, t = 29)
e: 1, t: 28, agent 5, result: (s = 9, a = :left, r = -1, sp = 8, t = 28)
e: 1, t: 27, agent 6, result: (s = 12, a = :left, r = -1, sp = 11, t = 27)
e: 1, t: 26, agent 7, result: (s = 17, a = :left, r = -1, sp = 16, t = 26)
e: 1, t: 25, agent 8, result: (s = 10, a = :right, r = -1, sp = 11, t = 25)
e: 1, t: 24, agent 9, result: (s = 13, a = :right, r = -1, sp = 14, t = 24)
e: 1, t: 23, agent 10, result: (s = 6, a = :left, r = -1, sp = 5, t = 23)
agent 1 is done
agent 2 is done
agent 3 is done
e: 1, t: 30, agent 4, result: (s = 5, a = :right, r = -1, sp = 6, t = 30)
e: 1, t: 29, agent 5, result: (s = 8, a = :left, r = -1, sp = 7, t = 29)
e: 1, t: 28, agent 6, result: (s = 11, a = :left, r = -1, sp = 10, t = 28)
e: 1, t: 27, agent 7, result: (s = 16, a = :right, r = -1, sp = 17, t = 27)
e: 1, t: 26, agent 8, result: (s = 11, a = :left, r = -1, sp = 10, t = 26)
e: 1, t: 25, agent 9, result: (s = 14, a = :right, r = -

e: 1, t: 4, agent 3, result: (s = 13, a = :left, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 4, result: (s = 10, a = :right, r = -1, sp = 11, t = 3)
e: 1, t: 2, agent 5, result: (s = 11, a = :right, r = -1, sp = 12, t = 2)
e: 1, t: 1, agent 6, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 7, agent 1, result: (s = 10, a = :right, r = -1, sp = 11, t = 7)
e: 1, t: 6, agent 2, result: (s = 7, a = :right, r = -1, sp = 8, t = 6)
e: 1, t: 5, agent 3, result: (s = 12, a = :right, r = -1, sp = 13, t = 5)
e: 1, t: 4, agent 4, result: (s = 11, a = :right, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 5, result: (s = 12, a = :right, r = -1, sp = 13, t = 3)
e: 1, t: 2, agent 6, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 7, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 8, agent 1, result: (s = 11, a = :right, r = -1, sp = 12, t = 8)
e: 1, t: 7, agent 2, result: (s = 8, a = :right, r = -1, sp = 9, t = 7)
e: 1, t: 6, agent 3, result: (s = 13, a = :rig

e: 1, t: 21, agent 1, result: (s = 10, a = :left, r = -1, sp = 9, t = 21)
e: 1, t: 20, agent 2, result: (s = 9, a = :right, r = -1, sp = 10, t = 20)
e: 1, t: 19, agent 3, result: (s = 18, a = :left, r = -1, sp = 17, t = 19)
e: 1, t: 18, agent 4, result: (s = 15, a = :left, r = -1, sp = 14, t = 18)
e: 1, t: 17, agent 5, result: (s = 12, a = :right, r = -1, sp = 13, t = 17)
e: 1, t: 16, agent 6, result: (s = 15, a = :right, r = -1, sp = 16, t = 16)
e: 1, t: 15, agent 7, result: (s = 12, a = :right, r = -1, sp = 13, t = 15)
e: 1, t: 14, agent 8, result: (s = 13, a = :left, r = -1, sp = 12, t = 14)
e: 1, t: 13, agent 9, result: (s = 14, a = :left, r = -1, sp = 13, t = 13)
e: 1, t: 12, agent 10, result: (s = 13, a = :left, r = -1, sp = 12, t = 12)
e: 1, t: 22, agent 1, result: (s = 9, a = :right, r = -1, sp = 10, t = 22)
e: 1, t: 21, agent 2, result: (s = 10, a = :right, r = -1, sp = 11, t = 21)
e: 1, t: 20, agent 3, result: (s = 17, a = :left, r = -1, sp = 16, t = 20)
e: 1, t: 19, agent 4,

e: 1, t: 27, agent 9, result: (s = 16, a = :right, r = -1, sp = 17, t = 27)
e: 1, t: 26, agent 10, result: (s = 11, a = :left, r = -1, sp = 10, t = 26)
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
agent 5 is done
agent 6 is done
e: 1, t: 30, agent 7, result: (s = 15, a = :left, r = -1, sp = 14, t = 30)
e: 1, t: 29, agent 8, result: (s = 16, a = :left, r = -1, sp = 15, t = 29)
e: 1, t: 28, agent 9, result: (s = 17, a = :left, r = -1, sp = 16, t = 28)
e: 1, t: 27, agent 10, result: (s = 10, a = :left, r = -1, sp = 9, t = 27)
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
agent 5 is done
agent 6 is done
agent 7 is done
e: 1, t: 30, agent 8, result: (s = 15, a = :left, r = -1, sp = 14, t = 30)
e: 1, t: 29, agent 9, result: (s = 16, a = :left, r = -1, sp = 15, t = 29)
e: 1, t: 28, agent 10, result: (s = 9, a = :left, r = -1, sp = 8, t = 28)
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
agent 5 is done
agent 6 is done
agent 7 is done
agen

e: 1, t: 11, agent 7, result: (s = 16, a = :right, r = -1, sp = 17, t = 11)
e: 1, t: 10, agent 8, result: (s = 11, a = :right, r = -1, sp = 12, t = 10)
e: 1, t: 9, agent 9, result: (s = 12, a = :left, r = -1, sp = 11, t = 9)
e: 1, t: 8, agent 10, result: (s = 5, a = :right, r = -1, sp = 6, t = 8)
e: 1, t: 18, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 18)
e: 1, t: 17, agent 2, result: (s = 10, a = :right, r = -1, sp = 11, t = 17)
e: 1, t: 16, agent 3, result: (s = 11, a = :left, r = -1, sp = 10, t = 16)
e: 1, t: 15, agent 4, result: (s = 14, a = :right, r = -1, sp = 15, t = 15)
e: 1, t: 14, agent 5, result: (s = 17, a = :right, r = -1, sp = 18, t = 14)
e: 1, t: 13, agent 6, result: (s = 10, a = :left, r = -1, sp = 9, t = 13)
e: 1, t: 12, agent 7, result: (s = 17, a = :left, r = -1, sp = 16, t = 12)
e: 1, t: 11, agent 8, result: (s = 12, a = :left, r = -1, sp = 11, t = 11)
e: 1, t: 10, agent 9, result: (s = 11, a = :right, r = -1, sp = 12, t = 10)
e: 1, t: 9, agent 10, re

e: 1, t: 9, agent 2, result: (s = 14, a = :left, r = -1, sp = 13, t = 9)
e: 1, t: 8, agent 3, result: (s = 11, a = :left, r = -1, sp = 10, t = 8)
e: 1, t: 7, agent 4, result: (s = 14, a = :left, r = -1, sp = 13, t = 7)
e: 1, t: 6, agent 5, result: (s = 13, a = :left, r = -1, sp = 12, t = 6)
e: 1, t: 5, agent 6, result: (s = 10, a = :left, r = -1, sp = 9, t = 5)
e: 1, t: 4, agent 7, result: (s = 13, a = :right, r = -1, sp = 14, t = 4)
e: 1, t: 3, agent 8, result: (s = 12, a = :right, r = -1, sp = 13, t = 3)
e: 1, t: 2, agent 9, result: (s = 11, a = :right, r = -1, sp = 12, t = 2)
e: 1, t: 1, agent 10, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 11, agent 1, result: (s = 14, a = :left, r = -1, sp = 13, t = 11)
e: 1, t: 10, agent 2, result: (s = 13, a = :right, r = -1, sp = 14, t = 10)
e: 1, t: 9, agent 3, result: (s = 10, a = :left, r = -1, sp = 9, t = 9)
e: 1, t: 8, agent 4, result: (s = 13, a = :left, r = -1, sp = 12, t = 8)
e: 1, t: 7, agent 5, result: (s = 12, a = :

e: 1, t: 1, agent 7, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 8, agent 1, result: (s = 9, a = :right, r = -1, sp = 10, t = 8)
e: 1, t: 7, agent 2, result: (s = 16, a = :left, r = -1, sp = 15, t = 7)
e: 1, t: 6, agent 3, result: (s = 13, a = :right, r = -1, sp = 14, t = 6)
e: 1, t: 5, agent 4, result: (s = 12, a = :left, r = -1, sp = 11, t = 5)
e: 1, t: 4, agent 5, result: (s = 11, a = :right, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 6, result: (s = 14, a = :left, r = -1, sp = 13, t = 3)
e: 1, t: 2, agent 7, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 8, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 9, agent 1, result: (s = 10, a = :right, r = -1, sp = 11, t = 9)
e: 1, t: 8, agent 2, result: (s = 15, a = :left, r = -1, sp = 14, t = 8)
e: 1, t: 7, agent 3, result: (s = 14, a = :right, r = -1, sp = 15, t = 7)
e: 1, t: 6, agent 4, result: (s = 11, a = :right, r = -1, sp = 12, t = 6)
e: 1, t: 5, agent 5, result: (s = 12, a = :rig

e: 1, t: 22, agent 1, result: (s = 17, a = :left, r = -1, sp = 16, t = 22)
e: 1, t: 21, agent 2, result: (s = 14, a = :right, r = -1, sp = 15, t = 21)
e: 1, t: 20, agent 3, result: (s = 13, a = :right, r = -1, sp = 14, t = 20)
e: 1, t: 19, agent 4, result: (s = 18, a = :right, r = -1, sp = 19, t = 19)
e: 1, t: 18, agent 5, result: (s = 11, a = :right, r = -1, sp = 12, t = 18)
e: 1, t: 17, agent 6, result: (s = 10, a = :left, r = -1, sp = 9, t = 17)
e: 1, t: 16, agent 7, result: (s = 11, a = :left, r = -1, sp = 10, t = 16)
e: 1, t: 15, agent 8, result: (s = 14, a = :right, r = -1, sp = 15, t = 15)
e: 1, t: 14, agent 9, result: (s = 15, a = :left, r = -1, sp = 14, t = 14)
e: 1, t: 13, agent 10, result: (s = 18, a = :left, r = -1, sp = 17, t = 13)
e: 1, t: 23, agent 1, result: (s = 16, a = :left, r = -1, sp = 15, t = 23)
e: 1, t: 22, agent 2, result: (s = 15, a = :left, r = -1, sp = 14, t = 22)
e: 1, t: 21, agent 3, result: (s = 14, a = :left, r = -1, sp = 13, t = 21)
e: 1, t: 20, agent 4

e: 1, t: 2, agent 3, result: (s = 13, a = :left, r = -1, sp = 12, t = 2)
e: 1, t: 1, agent 4, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 5, agent 1, result: (s = 10, a = :right, r = -1, sp = 11, t = 5)
e: 1, t: 4, agent 2, result: (s = 13, a = :left, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 3, result: (s = 12, a = :right, r = -1, sp = 13, t = 3)
e: 1, t: 2, agent 4, result: (s = 13, a = :left, r = -1, sp = 12, t = 2)
e: 1, t: 1, agent 5, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 6, agent 1, result: (s = 11, a = :right, r = -1, sp = 12, t = 6)
e: 1, t: 5, agent 2, result: (s = 12, a = :left, r = -1, sp = 11, t = 5)
e: 1, t: 4, agent 3, result: (s = 13, a = :left, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 4, result: (s = 12, a = :left, r = -1, sp = 11, t = 3)
e: 1, t: 2, agent 5, result: (s = 13, a = :left, r = -1, sp = 12, t = 2)
e: 1, t: 1, agent 6, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 7, agent 1, result: (s = 12, a = :ri

e: 1, t: 18, agent 3, result: (s = 9, a = :right, r = -1, sp = 10, t = 18)
e: 1, t: 17, agent 4, result: (s = 10, a = :left, r = -1, sp = 9, t = 17)
e: 1, t: 16, agent 5, result: (s = 11, a = :left, r = -1, sp = 10, t = 16)
e: 1, t: 15, agent 6, result: (s = 6, a = :right, r = -1, sp = 7, t = 15)
e: 1, t: 14, agent 7, result: (s = 9, a = :right, r = -1, sp = 10, t = 14)
e: 1, t: 13, agent 8, result: (s = 12, a = :right, r = -1, sp = 13, t = 13)
e: 1, t: 12, agent 9, result: (s = 17, a = :left, r = -1, sp = 16, t = 12)
e: 1, t: 11, agent 10, result: (s = 16, a = :left, r = -1, sp = 15, t = 11)
e: 1, t: 21, agent 1, result: (s = 14, a = :left, r = -1, sp = 13, t = 21)
e: 1, t: 20, agent 2, result: (s = 15, a = :right, r = -1, sp = 16, t = 20)
e: 1, t: 19, agent 3, result: (s = 10, a = :left, r = -1, sp = 9, t = 19)
e: 1, t: 18, agent 4, result: (s = 9, a = :right, r = -1, sp = 10, t = 18)
e: 1, t: 17, agent 5, result: (s = 10, a = :right, r = -1, sp = 11, t = 17)
e: 1, t: 16, agent 6, re

e: 1, t: 26, agent 9, result: (s = 15, a = :right, r = -1, sp = 16, t = 26)
e: 1, t: 25, agent 10, result: (s = 6, a = :left, r = -1, sp = 5, t = 25)
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
agent 5 is done
e: 1, t: 30, agent 6, result: (s = 7, a = :left, r = -1, sp = 6, t = 30)
e: 1, t: 29, agent 7, result: (s = 10, a = :right, r = -1, sp = 11, t = 29)
e: 1, t: 28, agent 8, result: (s = 5, a = :left, r = -1, sp = 4, t = 28)
e: 1, t: 27, agent 9, result: (s = 16, a = :left, r = -1, sp = 15, t = 27)
e: 1, t: 26, agent 10, result: (s = 5, a = :left, r = -1, sp = 4, t = 26)
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
agent 5 is done
agent 6 is done
e: 1, t: 30, agent 7, result: (s = 11, a = :right, r = -1, sp = 12, t = 30)
e: 1, t: 29, agent 8, result: (s = 4, a = :right, r = -1, sp = 5, t = 29)
e: 1, t: 28, agent 9, result: (s = 15, a = :right, r = -1, sp = 16, t = 28)
e: 1, t: 27, agent 10, result: (s = 4, a = :right, r = -1, sp = 5, t = 27)
ag

e: 1, t: 7, agent 10, result: (s = 6, a = :right, r = -1, sp = 7, t = 7)
e: 1, t: 17, agent 1, result: (s = 14, a = :left, r = -1, sp = 13, t = 17)
e: 1, t: 16, agent 2, result: (s = 11, a = :left, r = -1, sp = 10, t = 16)
e: 1, t: 15, agent 3, result: (s = 12, a = :right, r = -1, sp = 13, t = 15)
e: 1, t: 14, agent 4, result: (s = 7, a = :right, r = -1, sp = 8, t = 14)
e: 1, t: 13, agent 5, result: (s = 14, a = :left, r = -1, sp = 13, t = 13)
e: 1, t: 12, agent 6, result: (s = 15, a = :right, r = -1, sp = 16, t = 12)
e: 1, t: 11, agent 7, result: (s = 6, a = :right, r = -1, sp = 7, t = 11)
e: 1, t: 10, agent 8, result: (s = 11, a = :right, r = -1, sp = 12, t = 10)
e: 1, t: 9, agent 9, result: (s = 14, a = :right, r = -1, sp = 15, t = 9)
e: 1, t: 8, agent 10, result: (s = 7, a = :left, r = -1, sp = 6, t = 8)
e: 1, t: 18, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 18)
e: 1, t: 17, agent 2, result: (s = 10, a = :right, r = -1, sp = 11, t = 17)
e: 1, t: 16, agent 3, result

e: 1, t: 24, agent 7, result: (s = 5, a = :left, r = -1, sp = 4, t = 24)
e: 1, t: 23, agent 8, result: (s = 16, a = :left, r = -1, sp = 15, t = 23)
e: 1, t: 22, agent 9, result: (s = 15, a = :right, r = -1, sp = 16, t = 22)
e: 1, t: 21, agent 10, result: (s = 12, a = :left, r = -1, sp = 11, t = 21)
agent 1 is done
e: 1, t: 30, agent 2, result: (s = 11, a = :right, r = -1, sp = 12, t = 30)
e: 1, t: 29, agent 3, result: (s = 8, a = :left, r = -1, sp = 7, t = 29)
e: 1, t: 28, agent 4, result: (s = 9, a = :right, r = -1, sp = 10, t = 28)
e: 1, t: 27, agent 5, result: (s = 16, a = :left, r = -1, sp = 15, t = 27)
e: 1, t: 26, agent 6, result: (s = 19, a = :left, r = -1, sp = 18, t = 26)
e: 1, t: 25, agent 7, result: (s = 4, a = :right, r = -1, sp = 5, t = 25)
e: 1, t: 24, agent 8, result: (s = 15, a = :right, r = -1, sp = 16, t = 24)
e: 1, t: 23, agent 9, result: (s = 16, a = :right, r = -1, sp = 17, t = 23)
e: 1, t: 22, agent 10, result: (s = 11, a = :right, r = -1, sp = 12, t = 22)
agent 1

e: 1, t: 10, agent 6, result: (s = 13, a = :left, r = -1, sp = 12, t = 10)
e: 1, t: 9, agent 7, result: (s = 14, a = :left, r = -1, sp = 13, t = 9)
e: 1, t: 8, agent 8, result: (s = 13, a = :left, r = -1, sp = 12, t = 8)
e: 1, t: 7, agent 9, result: (s = 10, a = :left, r = -1, sp = 9, t = 7)
e: 1, t: 6, agent 10, result: (s = 11, a = :right, r = -1, sp = 12, t = 6)
e: 1, t: 16, agent 1, result: (s = 9, a = :right, r = -1, sp = 10, t = 16)
e: 1, t: 15, agent 2, result: (s = 6, a = :left, r = -1, sp = 5, t = 15)
e: 1, t: 14, agent 3, result: (s = 9, a = :right, r = -1, sp = 10, t = 14)
e: 1, t: 13, agent 4, result: (s = 12, a = :left, r = -1, sp = 11, t = 13)
e: 1, t: 12, agent 5, result: (s = 17, a = :left, r = -1, sp = 16, t = 12)
e: 1, t: 11, agent 6, result: (s = 12, a = :left, r = -1, sp = 11, t = 11)
e: 1, t: 10, agent 7, result: (s = 13, a = :right, r = -1, sp = 14, t = 10)
e: 1, t: 9, agent 8, result: (s = 12, a = :left, r = -1, sp = 11, t = 9)
e: 1, t: 8, agent 9, result: (s = 9

e: 1, t: 5, agent 4, result: (s = 10, a = :left, r = -1, sp = 9, t = 5)
e: 1, t: 4, agent 5, result: (s = 11, a = :right, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 6, result: (s = 12, a = :left, r = -1, sp = 11, t = 3)
e: 1, t: 2, agent 7, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 8, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 9, agent 1, result: (s = 10, a = :left, r = -1, sp = 9, t = 9)
e: 1, t: 8, agent 2, result: (s = 11, a = :left, r = -1, sp = 10, t = 8)
e: 1, t: 7, agent 3, result: (s = 10, a = :right, r = -1, sp = 11, t = 7)
e: 1, t: 6, agent 4, result: (s = 9, a = :right, r = -1, sp = 10, t = 6)
e: 1, t: 5, agent 5, result: (s = 12, a = :left, r = -1, sp = 11, t = 5)
e: 1, t: 4, agent 6, result: (s = 11, a = :left, r = -1, sp = 10, t = 4)
e: 1, t: 3, agent 7, result: (s = 10, a = :left, r = -1, sp = 9, t = 3)
e: 1, t: 2, agent 8, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 1, agent 9, result: (s = 12, a = :left, r

e: 1, t: 17, agent 6, result: (s = 6, a = :right, r = -1, sp = 7, t = 17)
e: 1, t: 16, agent 7, result: (s = 7, a = :right, r = -1, sp = 8, t = 16)
e: 1, t: 15, agent 8, result: (s = 18, a = :left, r = -1, sp = 17, t = 15)
e: 1, t: 14, agent 9, result: (s = 13, a = :left, r = -1, sp = 12, t = 14)
e: 1, t: 13, agent 10, result: (s = 8, a = :right, r = -1, sp = 9, t = 13)
e: 1, t: 23, agent 1, result: (s = 8, a = :left, r = -1, sp = 7, t = 23)
e: 1, t: 22, agent 2, result: (s = 7, a = :right, r = -1, sp = 8, t = 22)
e: 1, t: 21, agent 3, result: (s = 14, a = :right, r = -1, sp = 15, t = 21)
e: 1, t: 20, agent 4, result: (s = 13, a = :right, r = -1, sp = 14, t = 20)
e: 1, t: 19, agent 5, result: (s = 6, a = :left, r = -1, sp = 5, t = 19)
e: 1, t: 18, agent 6, result: (s = 7, a = :right, r = -1, sp = 8, t = 18)
e: 1, t: 17, agent 7, result: (s = 8, a = :left, r = -1, sp = 7, t = 17)
e: 1, t: 16, agent 8, result: (s = 17, a = :right, r = -1, sp = 18, t = 16)
e: 1, t: 15, agent 9, result: (s

e: 1, t: 13, agent 2, result: (s = 14, a = :right, r = -1, sp = 15, t = 13)
e: 1, t: 12, agent 3, result: (s = 13, a = :left, r = -1, sp = 12, t = 12)
e: 1, t: 11, agent 4, result: (s = 8, a = :left, r = -1, sp = 7, t = 11)
e: 1, t: 10, agent 5, result: (s = 17, a = :left, r = -1, sp = 16, t = 10)
e: 1, t: 9, agent 6, result: (s = 6, a = :right, r = -1, sp = 7, t = 9)
e: 1, t: 8, agent 7, result: (s = 11, a = :left, r = -1, sp = 10, t = 8)
e: 1, t: 7, agent 8, result: (s = 16, a = :right, r = -1, sp = 17, t = 7)
e: 1, t: 6, agent 9, result: (s = 9, a = :left, r = -1, sp = 8, t = 6)
e: 1, t: 5, agent 10, result: (s = 10, a = :right, r = -1, sp = 11, t = 5)
e: 1, t: 15, agent 1, result: (s = 14, a = :left, r = -1, sp = 13, t = 15)
e: 1, t: 14, agent 2, result: (s = 15, a = :right, r = -1, sp = 16, t = 14)
e: 1, t: 13, agent 3, result: (s = 12, a = :left, r = -1, sp = 11, t = 13)
e: 1, t: 12, agent 4, result: (s = 7, a = :right, r = -1, sp = 8, t = 12)
e: 1, t: 11, agent 5, result: (s = 1

e: 1, t: 19, agent 9, result: (s = 8, a = :right, r = -1, sp = 9, t = 19)
e: 1, t: 18, agent 10, result: (s = 13, a = :right, r = -1, sp = 14, t = 18)
e: 1, t: 28, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 28)
e: 1, t: 27, agent 2, result: (s = 14, a = :right, r = -1, sp = 15, t = 27)
e: 1, t: 26, agent 3, result: (s = 15, a = :right, r = -1, sp = 16, t = 26)
e: 1, t: 25, agent 4, result: (s = 10, a = :right, r = -1, sp = 11, t = 25)
e: 1, t: 24, agent 5, result: (s = 15, a = :right, r = -1, sp = 16, t = 24)
e: 1, t: 23, agent 6, result: (s = 4, a = :left, r = -1, sp = 3, t = 23)
e: 1, t: 22, agent 7, result: (s = 15, a = :right, r = -1, sp = 16, t = 22)
e: 1, t: 21, agent 8, result: (s = 20, a = :right, r = -1, sp = 21, t = 21)
e: 1, t: 20, agent 9, result: (s = 9, a = :left, r = -1, sp = 8, t = 20)
e: 1, t: 19, agent 10, result: (s = 14, a = :left, r = -1, sp = 13, t = 19)
e: 1, t: 29, agent 1, result: (s = 10, a = :left, r = -1, sp = 9, t = 29)
e: 1, t: 28, agent 2, 

e: 1, t: 14, agent 4, result: (s = 11, a = :left, r = -1, sp = 10, t = 14)
e: 1, t: 13, agent 5, result: (s = 14, a = :right, r = -1, sp = 15, t = 13)
e: 1, t: 12, agent 6, result: (s = 15, a = :right, r = -1, sp = 16, t = 12)
e: 1, t: 11, agent 7, result: (s = 18, a = :right, r = -1, sp = 19, t = 11)
e: 1, t: 10, agent 8, result: (s = 17, a = :left, r = -1, sp = 16, t = 10)
e: 1, t: 9, agent 9, result: (s = 12, a = :right, r = -1, sp = 13, t = 9)
e: 1, t: 8, agent 10, result: (s = 17, a = :right, r = -1, sp = 18, t = 8)
e: 1, t: 18, agent 1, result: (s = 15, a = :right, r = -1, sp = 16, t = 18)
e: 1, t: 17, agent 2, result: (s = 14, a = :right, r = -1, sp = 15, t = 17)
e: 1, t: 16, agent 3, result: (s = 17, a = :left, r = -1, sp = 16, t = 16)
e: 1, t: 15, agent 4, result: (s = 10, a = :right, r = -1, sp = 11, t = 15)
e: 1, t: 14, agent 5, result: (s = 15, a = :left, r = -1, sp = 14, t = 14)
e: 1, t: 13, agent 6, result: (s = 16, a = :right, r = -1, sp = 17, t = 13)
e: 1, t: 12, agent 

e: 1, t: 3, agent 2, result: (s = 12, a = :left, r = -1, sp = 11, t = 3)
e: 1, t: 2, agent 3, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 1, agent 4, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 5, agent 1, result: (s = 10, a = :right, r = -1, sp = 11, t = 5)
e: 1, t: 4, agent 2, result: (s = 11, a = :left, r = -1, sp = 10, t = 4)
e: 1, t: 3, agent 3, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 2, agent 4, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 5, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 6, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 6)
e: 1, t: 5, agent 2, result: (s = 10, a = :right, r = -1, sp = 11, t = 5)
e: 1, t: 4, agent 3, result: (s = 15, a = :left, r = -1, sp = 14, t = 4)
e: 1, t: 3, agent 4, result: (s = 10, a = :right, r = -1, sp = 11, t = 3)
e: 1, t: 2, agent 5, result: (s = 13, a = :left, r = -1, sp = 12, t = 2)
e: 1, t: 1, agent 6, result: (s = 12, a = :ri

e: 1, t: 19, agent 2, result: (s = 18, a = :right, r = -1, sp = 19, t = 19)
e: 1, t: 18, agent 3, result: (s = 9, a = :left, r = -1, sp = 8, t = 18)
e: 1, t: 17, agent 4, result: (s = 14, a = :right, r = -1, sp = 15, t = 17)
e: 1, t: 16, agent 5, result: (s = 13, a = :right, r = -1, sp = 14, t = 16)
e: 1, t: 15, agent 6, result: (s = 16, a = :right, r = -1, sp = 17, t = 15)
e: 1, t: 14, agent 7, result: (s = 13, a = :left, r = -1, sp = 12, t = 14)
e: 1, t: 13, agent 8, result: (s = 18, a = :left, r = -1, sp = 17, t = 13)
e: 1, t: 12, agent 9, result: (s = 15, a = :left, r = -1, sp = 14, t = 12)
e: 1, t: 11, agent 10, result: (s = 16, a = :right, r = -1, sp = 17, t = 11)
e: 1, t: 21, agent 1, result: (s = 4, a = :left, r = -1, sp = 3, t = 21)
e: 1, t: 20, agent 2, result: (s = 19, a = :left, r = -1, sp = 18, t = 20)
e: 1, t: 19, agent 3, result: (s = 8, a = :right, r = -1, sp = 9, t = 19)
e: 1, t: 18, agent 4, result: (s = 15, a = :right, r = -1, sp = 16, t = 18)
e: 1, t: 17, agent 5, r

e: 1, t: 12, agent 5, result: (s = 5, a = :right, r = -1, sp = 6, t = 12)
e: 1, t: 11, agent 6, result: (s = 12, a = :left, r = -1, sp = 11, t = 11)
e: 1, t: 10, agent 7, result: (s = 13, a = :left, r = -1, sp = 12, t = 10)
e: 1, t: 9, agent 8, result: (s = 12, a = :left, r = -1, sp = 11, t = 9)
e: 1, t: 8, agent 9, result: (s = 17, a = :left, r = -1, sp = 16, t = 8)
e: 1, t: 7, agent 10, result: (s = 8, a = :right, r = -1, sp = 9, t = 7)
e: 1, t: 17, agent 1, result: (s = 6, a = :left, r = -1, sp = 5, t = 17)
e: 1, t: 16, agent 2, result: (s = 11, a = :left, r = -1, sp = 10, t = 16)
e: 1, t: 15, agent 3, result: (s = 16, a = :left, r = -1, sp = 15, t = 15)
e: 1, t: 14, agent 4, result: (s = 7, a = :right, r = -1, sp = 8, t = 14)
e: 1, t: 13, agent 5, result: (s = 6, a = :left, r = -1, sp = 5, t = 13)
e: 1, t: 12, agent 6, result: (s = 11, a = :right, r = -1, sp = 12, t = 12)
e: 1, t: 11, agent 7, result: (s = 12, a = :right, r = -1, sp = 13, t = 11)
e: 1, t: 10, agent 8, result: (s = 

e: 1, t: 11, agent 4, result: (s = 10, a = :right, r = -1, sp = 11, t = 11)
e: 1, t: 10, agent 5, result: (s = 11, a = :right, r = -1, sp = 12, t = 10)
e: 1, t: 9, agent 6, result: (s = 8, a = :right, r = -1, sp = 9, t = 9)
e: 1, t: 8, agent 7, result: (s = 7, a = :right, r = -1, sp = 8, t = 8)
e: 1, t: 7, agent 8, result: (s = 14, a = :right, r = -1, sp = 15, t = 7)
e: 1, t: 6, agent 9, result: (s = 9, a = :left, r = -1, sp = 8, t = 6)
e: 1, t: 5, agent 10, result: (s = 12, a = :right, r = -1, sp = 13, t = 5)
e: 1, t: 15, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 15)
e: 1, t: 14, agent 2, result: (s = 9, a = :right, r = -1, sp = 10, t = 14)
e: 1, t: 13, agent 3, result: (s = 10, a = :left, r = -1, sp = 9, t = 13)
e: 1, t: 12, agent 4, result: (s = 11, a = :left, r = -1, sp = 10, t = 12)
e: 1, t: 11, agent 5, result: (s = 12, a = :left, r = -1, sp = 11, t = 11)
e: 1, t: 10, agent 6, result: (s = 9, a = :left, r = -1, sp = 8, t = 10)
e: 1, t: 9, agent 7, result: (s = 8,

e: 1, t: 28, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 28)
e: 1, t: 27, agent 2, result: (s = 10, a = :left, r = -1, sp = 9, t = 27)
e: 1, t: 26, agent 3, result: (s = 5, a = :right, r = -1, sp = 6, t = 26)
e: 1, t: 25, agent 4, result: (s = 12, a = :left, r = -1, sp = 11, t = 25)
e: 1, t: 24, agent 5, result: (s = 11, a = :right, r = -1, sp = 12, t = 24)
e: 1, t: 23, agent 6, result: (s = 8, a = :right, r = -1, sp = 9, t = 23)
e: 1, t: 22, agent 7, result: (s = 13, a = :left, r = -1, sp = 12, t = 22)
e: 1, t: 21, agent 8, result: (s = 16, a = :right, r = -1, sp = 17, t = 21)
e: 1, t: 20, agent 9, result: (s = 5, a = :right, r = -1, sp = 6, t = 20)
e: 1, t: 19, agent 10, result: (s = 12, a = :right, r = -1, sp = 13, t = 19)
e: 1, t: 29, agent 1, result: (s = 14, a = :left, r = -1, sp = 13, t = 29)
e: 1, t: 28, agent 2, result: (s = 9, a = :left, r = -1, sp = 8, t = 28)
e: 1, t: 27, agent 3, result: (s = 6, a = :right, r = -1, sp = 7, t = 27)
e: 1, t: 26, agent 4, resul

e: 1, t: 1, agent 10, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 11, agent 1, result: (s = 10, a = :right, r = -1, sp = 11, t = 11)
e: 1, t: 10, agent 2, result: (s = 9, a = :right, r = -1, sp = 10, t = 10)
e: 1, t: 9, agent 3, result: (s = 8, a = :right, r = -1, sp = 9, t = 9)
e: 1, t: 8, agent 4, result: (s = 11, a = :left, r = -1, sp = 10, t = 8)
e: 1, t: 7, agent 5, result: (s = 10, a = :left, r = -1, sp = 9, t = 7)
e: 1, t: 6, agent 6, result: (s = 11, a = :right, r = -1, sp = 12, t = 6)
e: 1, t: 5, agent 7, result: (s = 12, a = :right, r = -1, sp = 13, t = 5)
e: 1, t: 4, agent 8, result: (s = 13, a = :right, r = -1, sp = 14, t = 4)
e: 1, t: 3, agent 9, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 2, agent 10, result: (s = 13, a = :left, r = -1, sp = 12, t = 2)
e: 1, t: 12, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 12)
e: 1, t: 11, agent 2, result: (s = 10, a = :right, r = -1, sp = 11, t = 11)
e: 1, t: 10, agent 3, result: (s = 9

e: 1, t: 7, agent 5, result: (s = 10, a = :right, r = -1, sp = 11, t = 7)
e: 1, t: 6, agent 6, result: (s = 13, a = :left, r = -1, sp = 12, t = 6)
e: 1, t: 5, agent 7, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 4, agent 8, result: (s = 13, a = :right, r = -1, sp = 14, t = 4)
e: 1, t: 3, agent 9, result: (s = 12, a = :left, r = -1, sp = 11, t = 3)
e: 1, t: 2, agent 10, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 12, agent 1, result: (s = 13, a = :left, r = -1, sp = 12, t = 12)
e: 1, t: 11, agent 2, result: (s = 8, a = :left, r = -1, sp = 7, t = 11)
e: 1, t: 10, agent 3, result: (s = 15, a = :right, r = -1, sp = 16, t = 10)
e: 1, t: 9, agent 4, result: (s = 10, a = :left, r = -1, sp = 9, t = 9)
e: 1, t: 8, agent 5, result: (s = 11, a = :right, r = -1, sp = 12, t = 8)
e: 1, t: 7, agent 6, result: (s = 12, a = :left, r = -1, sp = 11, t = 7)
e: 1, t: 6, agent 7, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 5, agent 8, result: (s = 14, a 

e: 1, t: 24, agent 2, result: (s = 11, a = :left, r = -1, sp = 10, t = 24)
e: 1, t: 23, agent 3, result: (s = 18, a = :right, r = -1, sp = 19, t = 23)
e: 1, t: 22, agent 4, result: (s = 15, a = :right, r = -1, sp = 16, t = 22)
e: 1, t: 21, agent 5, result: (s = 6, a = :left, r = -1, sp = 5, t = 21)
e: 1, t: 20, agent 6, result: (s = 7, a = :left, r = -1, sp = 6, t = 20)
e: 1, t: 19, agent 7, result: (s = 18, a = :right, r = -1, sp = 19, t = 19)
e: 1, t: 18, agent 8, result: (s = 13, a = :right, r = -1, sp = 14, t = 18)
e: 1, t: 17, agent 9, result: (s = 12, a = :left, r = -1, sp = 11, t = 17)
e: 1, t: 16, agent 10, result: (s = 19, a = :left, r = -1, sp = 18, t = 16)
e: 1, t: 26, agent 1, result: (s = 13, a = :left, r = -1, sp = 12, t = 26)
e: 1, t: 25, agent 2, result: (s = 10, a = :right, r = -1, sp = 11, t = 25)
e: 1, t: 24, agent 3, result: (s = 19, a = :left, r = -1, sp = 18, t = 24)
e: 1, t: 23, agent 4, result: (s = 16, a = :left, r = -1, sp = 15, t = 23)
e: 1, t: 22, agent 5, r

e: 1, t: 5, agent 10, result: (s = 12, a = :right, r = -1, sp = 13, t = 5)
e: 1, t: 15, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 15)
e: 1, t: 14, agent 2, result: (s = 7, a = :left, r = -1, sp = 6, t = 14)
e: 1, t: 13, agent 3, result: (s = 16, a = :right, r = -1, sp = 17, t = 13)
e: 1, t: 12, agent 4, result: (s = 11, a = :right, r = -1, sp = 12, t = 12)
e: 1, t: 11, agent 5, result: (s = 10, a = :right, r = -1, sp = 11, t = 11)
e: 1, t: 10, agent 6, result: (s = 15, a = :left, r = -1, sp = 14, t = 10)
e: 1, t: 9, agent 7, result: (s = 12, a = :left, r = -1, sp = 11, t = 9)
e: 1, t: 8, agent 8, result: (s = 13, a = :right, r = -1, sp = 14, t = 8)
e: 1, t: 7, agent 9, result: (s = 14, a = :left, r = -1, sp = 13, t = 7)
e: 1, t: 6, agent 10, result: (s = 13, a = :left, r = -1, sp = 12, t = 6)
e: 1, t: 16, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 16)
e: 1, t: 15, agent 2, result: (s = 6, a = :right, r = -1, sp = 7, t = 15)
e: 1, t: 14, agent 3, result: (

e: 1, t: 22, agent 7, result: (s = 15, a = :right, r = -1, sp = 16, t = 22)
e: 1, t: 21, agent 8, result: (s = 12, a = :right, r = -1, sp = 13, t = 21)
e: 1, t: 20, agent 9, result: (s = 11, a = :left, r = -1, sp = 10, t = 20)
e: 1, t: 19, agent 10, result: (s = 4, a = :left, r = -1, sp = 3, t = 19)
e: 1, t: 29, agent 1, result: (s = 16, a = :left, r = -1, sp = 15, t = 29)
e: 1, t: 28, agent 2, result: (s = 13, a = :right, r = -1, sp = 14, t = 28)
e: 1, t: 27, agent 3, result: (s = 12, a = :left, r = -1, sp = 11, t = 27)
e: 1, t: 26, agent 4, result: (s = 7, a = :right, r = -1, sp = 8, t = 26)
e: 1, t: 25, agent 5, result: (s = 16, a = :right, r = -1, sp = 17, t = 25)
e: 1, t: 24, agent 6, result: (s = 9, a = :right, r = -1, sp = 10, t = 24)
e: 1, t: 23, agent 7, result: (s = 16, a = :right, r = -1, sp = 17, t = 23)
e: 1, t: 22, agent 8, result: (s = 13, a = :right, r = -1, sp = 14, t = 22)
e: 1, t: 21, agent 9, result: (s = 10, a = :left, r = -1, sp = 9, t = 21)
e: 1, t: 20, agent 10,

e: 1, t: 6, agent 7, result: (s = 13, a = :left, r = -1, sp = 12, t = 6)
e: 1, t: 5, agent 8, result: (s = 16, a = :left, r = -1, sp = 15, t = 5)
e: 1, t: 4, agent 9, result: (s = 11, a = :right, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 10, result: (s = 14, a = :left, r = -1, sp = 13, t = 3)
e: 1, t: 13, agent 1, result: (s = 20, a = :right, r = -1, sp = 21, t = 13)
e: 1, t: 12, agent 2, result: (s = 7, a = :left, r = -1, sp = 6, t = 12)
e: 1, t: 11, agent 3, result: (s = 8, a = :left, r = -1, sp = 7, t = 11)
e: 1, t: 10, agent 4, result: (s = 15, a = :right, r = -1, sp = 16, t = 10)
e: 1, t: 9, agent 5, result: (s = 10, a = :right, r = -1, sp = 11, t = 9)
e: 1, t: 8, agent 6, result: (s = 7, a = :right, r = -1, sp = 8, t = 8)
e: 1, t: 7, agent 7, result: (s = 12, a = :left, r = -1, sp = 11, t = 7)
e: 1, t: 6, agent 8, result: (s = 15, a = :right, r = -1, sp = 16, t = 6)
e: 1, t: 5, agent 9, result: (s = 12, a = :right, r = -1, sp = 13, t = 5)
e: 1, t: 4, agent 10, result: (s = 13, a 

e: 1, t: 8, agent 10, result: (s = 9, a = :left, r = -1, sp = 8, t = 8)
e: 1, t: 18, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 18)
e: 1, t: 17, agent 2, result: (s = 10, a = :right, r = -1, sp = 11, t = 17)
e: 1, t: 16, agent 3, result: (s = 19, a = :left, r = -1, sp = 18, t = 16)
e: 1, t: 15, agent 4, result: (s = 12, a = :right, r = -1, sp = 13, t = 15)
e: 1, t: 14, agent 5, result: (s = 9, a = :right, r = -1, sp = 10, t = 14)
e: 1, t: 13, agent 6, result: (s = 12, a = :right, r = -1, sp = 13, t = 13)
e: 1, t: 12, agent 7, result: (s = 15, a = :left, r = -1, sp = 14, t = 12)
e: 1, t: 11, agent 8, result: (s = 14, a = :left, r = -1, sp = 13, t = 11)
e: 1, t: 10, agent 9, result: (s = 9, a = :right, r = -1, sp = 10, t = 10)
e: 1, t: 9, agent 10, result: (s = 8, a = :left, r = -1, sp = 7, t = 9)
e: 1, t: 19, agent 1, result: (s = 14, a = :left, r = -1, sp = 13, t = 19)
e: 1, t: 18, agent 2, result: (s = 11, a = :left, r = -1, sp = 10, t = 18)
e: 1, t: 17, agent 3, resul

e: 1, t: 23, agent 10, result: (s = 10, a = :right, r = -1, sp = 11, t = 23)
agent 1 is done
agent 2 is done
agent 3 is done
e: 1, t: 30, agent 4, result: (s = 9, a = :left, r = -1, sp = 8, t = 30)
e: 1, t: 29, agent 5, result: (s = 2, a = :right, r = 10, sp = 1, t = 29)
setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 2, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 2, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 3, agent 1, result: (s = 10, a = :left, r = -1, sp = 9, t = 3)
e: 1, t: 2, agent 2, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 3, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 4, agent 1, result: (s = 9, a = :left, r = -1, sp = 8, t = 4)
e: 1, t: 3, agent 2, result: (s = 10, a = :right, r = -1, sp = 11, t = 3)
e: 1, t: 2, agent 3, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 1, agent 4, result: (s = 12, a = :left, r = -1, s

e: 1, t: 7, agent 2, result: (s = 14, a = :right, r = -1, sp = 15, t = 7)
e: 1, t: 6, agent 3, result: (s = 11, a = :left, r = -1, sp = 10, t = 6)
e: 1, t: 5, agent 4, result: (s = 14, a = :left, r = -1, sp = 13, t = 5)
e: 1, t: 4, agent 5, result: (s = 9, a = :right, r = -1, sp = 10, t = 4)
e: 1, t: 3, agent 6, result: (s = 12, a = :right, r = -1, sp = 13, t = 3)
e: 1, t: 2, agent 7, result: (s = 13, a = :left, r = -1, sp = 12, t = 2)
e: 1, t: 1, agent 8, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 9, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 9)
e: 1, t: 8, agent 2, result: (s = 15, a = :left, r = -1, sp = 14, t = 8)
e: 1, t: 7, agent 3, result: (s = 10, a = :right, r = -1, sp = 11, t = 7)
e: 1, t: 6, agent 4, result: (s = 13, a = :left, r = -1, sp = 12, t = 6)
e: 1, t: 5, agent 5, result: (s = 10, a = :right, r = -1, sp = 11, t = 5)
e: 1, t: 4, agent 6, result: (s = 13, a = :left, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 7, result: (s = 12, a = :le

e: 1, t: 4, agent 7, result: (s = 9, a = :right, r = -1, sp = 10, t = 4)
e: 1, t: 3, agent 8, result: (s = 12, a = :right, r = -1, sp = 13, t = 3)
e: 1, t: 2, agent 9, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 10, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 11, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 11)
e: 1, t: 10, agent 2, result: (s = 11, a = :left, r = -1, sp = 10, t = 10)
e: 1, t: 9, agent 3, result: (s = 12, a = :right, r = -1, sp = 13, t = 9)
e: 1, t: 8, agent 4, result: (s = 13, a = :left, r = -1, sp = 12, t = 8)
e: 1, t: 7, agent 5, result: (s = 10, a = :left, r = -1, sp = 9, t = 7)
e: 1, t: 6, agent 6, result: (s = 11, a = :right, r = -1, sp = 12, t = 6)
e: 1, t: 5, agent 7, result: (s = 10, a = :left, r = -1, sp = 9, t = 5)
e: 1, t: 4, agent 8, result: (s = 13, a = :right, r = -1, sp = 14, t = 4)
e: 1, t: 3, agent 9, result: (s = 10, a = :right, r = -1, sp = 11, t = 3)
e: 1, t: 2, agent 10, result: (s = 13, a 

e: 1, t: 20, agent 5, result: (s = 7, a = :left, r = -1, sp = 6, t = 20)
e: 1, t: 19, agent 6, result: (s = 14, a = :left, r = -1, sp = 13, t = 19)
e: 1, t: 18, agent 7, result: (s = 7, a = :left, r = -1, sp = 6, t = 18)
e: 1, t: 17, agent 8, result: (s = 16, a = :right, r = -1, sp = 17, t = 17)
e: 1, t: 16, agent 9, result: (s = 7, a = :left, r = -1, sp = 6, t = 16)
e: 1, t: 15, agent 10, result: (s = 12, a = :right, r = -1, sp = 13, t = 15)
e: 1, t: 25, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 25)
e: 1, t: 24, agent 2, result: (s = 11, a = :right, r = -1, sp = 12, t = 24)
e: 1, t: 23, agent 3, result: (s = 12, a = :right, r = -1, sp = 13, t = 23)
e: 1, t: 22, agent 4, result: (s = 13, a = :left, r = -1, sp = 12, t = 22)
e: 1, t: 21, agent 5, result: (s = 6, a = :left, r = -1, sp = 5, t = 21)
e: 1, t: 20, agent 6, result: (s = 13, a = :left, r = -1, sp = 12, t = 20)
e: 1, t: 19, agent 7, result: (s = 6, a = :right, r = -1, sp = 7, t = 19)
e: 1, t: 18, agent 8, result:

e: 1, t: 5, agent 6, result: (s = 14, a = :left, r = -1, sp = 13, t = 5)
e: 1, t: 4, agent 7, result: (s = 13, a = :right, r = -1, sp = 14, t = 4)
e: 1, t: 3, agent 8, result: (s = 12, a = :right, r = -1, sp = 13, t = 3)
e: 1, t: 2, agent 9, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 10, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 11, agent 1, result: (s = 10, a = :right, r = -1, sp = 11, t = 11)
e: 1, t: 10, agent 2, result: (s = 13, a = :left, r = -1, sp = 12, t = 10)
e: 1, t: 9, agent 3, result: (s = 16, a = :right, r = -1, sp = 17, t = 9)
e: 1, t: 8, agent 4, result: (s = 15, a = :left, r = -1, sp = 14, t = 8)
e: 1, t: 7, agent 5, result: (s = 12, a = :right, r = -1, sp = 13, t = 7)
e: 1, t: 6, agent 6, result: (s = 13, a = :left, r = -1, sp = 12, t = 6)
e: 1, t: 5, agent 7, result: (s = 14, a = :right, r = -1, sp = 15, t = 5)
e: 1, t: 4, agent 8, result: (s = 13, a = :left, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 9, result: (s = 10, a 

e: 1, t: 5, agent 5, result: (s = 12, a = :left, r = -1, sp = 11, t = 5)
e: 1, t: 4, agent 6, result: (s = 13, a = :left, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 7, result: (s = 12, a = :left, r = -1, sp = 11, t = 3)
e: 1, t: 2, agent 8, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 1, agent 9, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 10, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 10)
e: 1, t: 9, agent 2, result: (s = 10, a = :right, r = -1, sp = 11, t = 9)
e: 1, t: 8, agent 3, result: (s = 13, a = :right, r = -1, sp = 14, t = 8)
e: 1, t: 7, agent 4, result: (s = 12, a = :left, r = -1, sp = 11, t = 7)
e: 1, t: 6, agent 5, result: (s = 11, a = :right, r = -1, sp = 12, t = 6)
e: 1, t: 5, agent 6, result: (s = 12, a = :left, r = -1, sp = 11, t = 5)
e: 1, t: 4, agent 7, result: (s = 11, a = :left, r = -1, sp = 10, t = 4)
e: 1, t: 3, agent 8, result: (s = 14, a = :left, r = -1, sp = 13, t = 3)
e: 1, t: 2, agent 9, result: (s = 13, a = :r

e: 1, t: 21, agent 3, result: (s = 12, a = :right, r = -1, sp = 13, t = 21)
e: 1, t: 20, agent 4, result: (s = 13, a = :right, r = -1, sp = 14, t = 20)
e: 1, t: 19, agent 5, result: (s = 10, a = :left, r = -1, sp = 9, t = 19)
e: 1, t: 18, agent 6, result: (s = 13, a = :right, r = -1, sp = 14, t = 18)
e: 1, t: 17, agent 7, result: (s = 14, a = :right, r = -1, sp = 15, t = 17)
e: 1, t: 16, agent 8, result: (s = 9, a = :left, r = -1, sp = 8, t = 16)
e: 1, t: 15, agent 9, result: (s = 10, a = :left, r = -1, sp = 9, t = 15)
e: 1, t: 14, agent 10, result: (s = 15, a = :right, r = -1, sp = 16, t = 14)
e: 1, t: 24, agent 1, result: (s = 13, a = :left, r = -1, sp = 12, t = 24)
e: 1, t: 23, agent 2, result: (s = 14, a = :left, r = -1, sp = 13, t = 23)
e: 1, t: 22, agent 3, result: (s = 13, a = :left, r = -1, sp = 12, t = 22)
e: 1, t: 21, agent 4, result: (s = 14, a = :left, r = -1, sp = 13, t = 21)
e: 1, t: 20, agent 5, result: (s = 9, a = :right, r = -1, sp = 10, t = 20)
e: 1, t: 19, agent 6, r

e: 1, t: 2, agent 2, result: (s = 11, a = :right, r = -1, sp = 12, t = 2)
e: 1, t: 1, agent 3, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 4, agent 1, result: (s = 9, a = :left, r = -1, sp = 8, t = 4)
e: 1, t: 3, agent 2, result: (s = 12, a = :right, r = -1, sp = 13, t = 3)
e: 1, t: 2, agent 3, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 1, agent 4, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 5, agent 1, result: (s = 8, a = :left, r = -1, sp = 7, t = 5)
e: 1, t: 4, agent 2, result: (s = 13, a = :left, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 3, result: (s = 14, a = :left, r = -1, sp = 13, t = 3)
e: 1, t: 2, agent 4, result: (s = 13, a = :left, r = -1, sp = 12, t = 2)
e: 1, t: 1, agent 5, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 6, agent 1, result: (s = 7, a = :right, r = -1, sp = 8, t = 6)
e: 1, t: 5, agent 2, result: (s = 12, a = :right, r = -1, sp = 13, t = 5)
e: 1, t: 4, agent 3, result: (s = 13, a = :left, r

e: 1, t: 11, agent 9, result: (s = 10, a = :right, r = -1, sp = 11, t = 11)
e: 1, t: 10, agent 10, result: (s = 13, a = :right, r = -1, sp = 14, t = 10)
e: 1, t: 20, agent 1, result: (s = 15, a = :left, r = -1, sp = 14, t = 20)
e: 1, t: 19, agent 2, result: (s = 14, a = :right, r = -1, sp = 15, t = 19)
e: 1, t: 18, agent 3, result: (s = 9, a = :left, r = -1, sp = 8, t = 18)
e: 1, t: 17, agent 4, result: (s = 8, a = :right, r = -1, sp = 9, t = 17)
e: 1, t: 16, agent 5, result: (s = 7, a = :right, r = -1, sp = 8, t = 16)
e: 1, t: 15, agent 6, result: (s = 14, a = :left, r = -1, sp = 13, t = 15)
e: 1, t: 14, agent 7, result: (s = 13, a = :right, r = -1, sp = 14, t = 14)
e: 1, t: 13, agent 8, result: (s = 12, a = :right, r = -1, sp = 13, t = 13)
e: 1, t: 12, agent 9, result: (s = 11, a = :right, r = -1, sp = 12, t = 12)
e: 1, t: 11, agent 10, result: (s = 14, a = :left, r = -1, sp = 13, t = 11)
e: 1, t: 21, agent 1, result: (s = 14, a = :left, r = -1, sp = 13, t = 21)
e: 1, t: 20, agent 2,

e: 1, t: 24, agent 10, result: (s = 11, a = :left, r = -1, sp = 10, t = 24)
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
e: 1, t: 30, agent 5, result: (s = 7, a = :left, r = -1, sp = 6, t = 30)
e: 1, t: 29, agent 6, result: (s = 14, a = :left, r = -1, sp = 13, t = 29)
e: 1, t: 28, agent 7, result: (s = 17, a = :right, r = -1, sp = 18, t = 28)
e: 1, t: 27, agent 8, result: (s = 18, a = :right, r = -1, sp = 19, t = 27)
e: 1, t: 26, agent 9, result: (s = 17, a = :left, r = -1, sp = 16, t = 26)
e: 1, t: 25, agent 10, result: (s = 10, a = :left, r = -1, sp = 9, t = 25)
agent 1 is done
agent 2 is done
agent 3 is done
agent 4 is done
agent 5 is done
e: 1, t: 30, agent 6, result: (s = 13, a = :right, r = -1, sp = 14, t = 30)
e: 1, t: 29, agent 7, result: (s = 18, a = :right, r = -1, sp = 19, t = 29)
e: 1, t: 28, agent 8, result: (s = 19, a = :right, r = -1, sp = 20, t = 28)
e: 1, t: 27, agent 9, result: (s = 16, a = :left, r = -1, sp = 15, t = 27)
e: 1, t: 26, agent 10, resu

e: 1, t: 15, agent 3, result: (s = 18, a = :left, r = -1, sp = 17, t = 15)
e: 1, t: 14, agent 4, result: (s = 15, a = :left, r = -1, sp = 14, t = 14)
e: 1, t: 13, agent 5, result: (s = 12, a = :left, r = -1, sp = 11, t = 13)
e: 1, t: 12, agent 6, result: (s = 11, a = :right, r = -1, sp = 12, t = 12)
e: 1, t: 11, agent 7, result: (s = 12, a = :right, r = -1, sp = 13, t = 11)
e: 1, t: 10, agent 8, result: (s = 9, a = :right, r = -1, sp = 10, t = 10)
e: 1, t: 9, agent 9, result: (s = 8, a = :left, r = -1, sp = 7, t = 9)
e: 1, t: 8, agent 10, result: (s = 11, a = :right, r = -1, sp = 12, t = 8)
e: 1, t: 18, agent 1, result: (s = 7, a = :left, r = -1, sp = 6, t = 18)
e: 1, t: 17, agent 2, result: (s = 10, a = :left, r = -1, sp = 9, t = 17)
e: 1, t: 16, agent 3, result: (s = 17, a = :right, r = -1, sp = 18, t = 16)
e: 1, t: 15, agent 4, result: (s = 14, a = :left, r = -1, sp = 13, t = 15)
e: 1, t: 14, agent 5, result: (s = 11, a = :right, r = -1, sp = 12, t = 14)
e: 1, t: 13, agent 6, result

e: 1, t: 21, agent 10, result: (s = 8, a = :right, r = -1, sp = 9, t = 21)
agent 1 is done
e: 1, t: 30, agent 2, result: (s = 5, a = :left, r = -1, sp = 4, t = 30)
e: 1, t: 29, agent 3, result: (s = 18, a = :right, r = -1, sp = 19, t = 29)
e: 1, t: 28, agent 4, result: (s = 9, a = :right, r = -1, sp = 10, t = 28)
e: 1, t: 27, agent 5, result: (s = 18, a = :left, r = -1, sp = 17, t = 27)
e: 1, t: 26, agent 6, result: (s = 15, a = :left, r = -1, sp = 14, t = 26)
e: 1, t: 25, agent 7, result: (s = 14, a = :left, r = -1, sp = 13, t = 25)
e: 1, t: 24, agent 8, result: (s = 11, a = :left, r = -1, sp = 10, t = 24)
e: 1, t: 23, agent 9, result: (s = 6, a = :right, r = -1, sp = 7, t = 23)
e: 1, t: 22, agent 10, result: (s = 9, a = :right, r = -1, sp = 10, t = 22)
agent 1 is done
agent 2 is done
e: 1, t: 30, agent 3, result: (s = 19, a = :left, r = -1, sp = 18, t = 30)
e: 1, t: 29, agent 4, result: (s = 10, a = :left, r = -1, sp = 9, t = 29)
e: 1, t: 28, agent 5, result: (s = 17, a = :right, r =

e: 1, t: 5, agent 9, result: (s = 16, a = :left, r = -1, sp = 15, t = 5)
e: 1, t: 4, agent 10, result: (s = 11, a = :left, r = -1, sp = 10, t = 4)
e: 1, t: 14, agent 1, result: (s = 7, a = :left, r = -1, sp = 6, t = 14)
e: 1, t: 13, agent 2, result: (s = 18, a = :right, r = -1, sp = 19, t = 13)
e: 1, t: 12, agent 3, result: (s = 9, a = :right, r = -1, sp = 10, t = 12)
e: 1, t: 11, agent 4, result: (s = 16, a = :right, r = -1, sp = 17, t = 11)
e: 1, t: 10, agent 5, result: (s = 13, a = :right, r = -1, sp = 14, t = 10)
e: 1, t: 9, agent 6, result: (s = 8, a = :right, r = -1, sp = 9, t = 9)
e: 1, t: 8, agent 7, result: (s = 13, a = :right, r = -1, sp = 14, t = 8)
e: 1, t: 7, agent 8, result: (s = 12, a = :left, r = -1, sp = 11, t = 7)
e: 1, t: 6, agent 9, result: (s = 15, a = :left, r = -1, sp = 14, t = 6)
e: 1, t: 5, agent 10, result: (s = 10, a = :right, r = -1, sp = 11, t = 5)
e: 1, t: 15, agent 1, result: (s = 6, a = :left, r = -1, sp = 5, t = 15)
e: 1, t: 14, agent 2, result: (s = 19

e: 1, t: 3, agent 10, result: (s = 10, a = :left, r = -1, sp = 9, t = 3)
e: 1, t: 13, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 13)
e: 1, t: 12, agent 2, result: (s = 13, a = :right, r = -1, sp = 14, t = 12)
e: 1, t: 11, agent 3, result: (s = 16, a = :right, r = -1, sp = 17, t = 11)
e: 1, t: 10, agent 4, result: (s = 11, a = :right, r = -1, sp = 12, t = 10)
e: 1, t: 9, agent 5, result: (s = 16, a = :right, r = -1, sp = 17, t = 9)
e: 1, t: 8, agent 6, result: (s = 13, a = :left, r = -1, sp = 12, t = 8)
e: 1, t: 7, agent 7, result: (s = 8, a = :left, r = -1, sp = 7, t = 7)
e: 1, t: 6, agent 8, result: (s = 13, a = :left, r = -1, sp = 12, t = 6)
e: 1, t: 5, agent 9, result: (s = 14, a = :left, r = -1, sp = 13, t = 5)
e: 1, t: 4, agent 10, result: (s = 9, a = :right, r = -1, sp = 10, t = 4)
e: 1, t: 14, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 14)
e: 1, t: 13, agent 2, result: (s = 14, a = :left, r = -1, sp = 13, t = 13)
e: 1, t: 12, agent 3, result: (s =

Excessive output truncated after 524299 bytes.

e: 1, t: 19, agent 8, result: (s = 12, a = :right, r = -1, sp = 13, t = 19)
e: 1, t: 18, agent 9, result: (s = 13, a = :right, r = -1, sp = 14, t = 18)
e: 1, t: 17, agent 10, result: (s = 2, a = :right, r = 10, sp = 1, t = 17)
setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 2, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 2, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 3, agent 1, result: (s = 10, a = :right, r = -1, sp = 11, t = 3)
e: 1, t: 2, agent 2, result: (s = 11, a = :right, r = -1, sp = 12, t = 2)
e: 1, t: 1, agent 3, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 4, agent 1, result: (s = 11, a = :right, r = -1, sp = 12, t = 4)
e: 1, t: 3, agent 2, result: (s = 12, a = :left, r = -1, sp = 11, t = 3)
e: 1, t: 2, agent 3, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 4, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 5, agent 1, resul

Unnamed: 0_level_0,run,epoch,agent,time,state,reward
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64,Int64
1,1,1,1,1,12,-1
2,1,1,1,2,13,-1
3,1,1,2,1,12,-1
4,1,1,1,3,12,-1
5,1,1,2,2,13,-1
6,1,1,3,1,12,-1
7,1,1,1,4,13,-1
8,1,1,2,3,12,-1
9,1,1,3,2,13,-1
10,1,1,4,1,12,-1


In [36]:

get_average_regret(df_thomp)

1000×4 DataFrame
│ Row  │ run   │ epoch │ agent │ Regret  │
│      │ [90mInt64[39m │ [90mInt64[39m │ [90mInt64[39m │ [90mFloat64[39m │
├──────┼───────┼───────┼───────┼─────────┤
│ 1    │ 1     │ 1     │ 1     │ 49.0    │
│ 2    │ 1     │ 1     │ 2     │ 38.0    │
│ 3    │ 1     │ 1     │ 3     │ 37.0    │
│ 4    │ 1     │ 1     │ 4     │ 36.0    │
│ 5    │ 1     │ 1     │ 5     │ 35.0    │
│ 6    │ 1     │ 1     │ 6     │ 34.0    │
│ 7    │ 1     │ 1     │ 7     │ 33.0    │
│ 8    │ 1     │ 1     │ 8     │ 32.0    │
│ 9    │ 1     │ 1     │ 9     │ 31.0    │
│ 10   │ 1     │ 1     │ 10    │ 30.0    │
│ 11   │ 2     │ 1     │ 1     │ 33.0    │
│ 12   │ 2     │ 1     │ 2     │ 32.0    │
│ 13   │ 2     │ 1     │ 3     │ 21.0    │
│ 14   │ 2     │ 1     │ 4     │ 29.0    │
│ 15   │ 2     │ 1     │ 5     │ 28.0    │
│ 16   │ 2     │ 1     │ 6     │ 27.0    │
│ 17   │ 2     │ 1     │ 7     │ 26.0    │
│ 18   │ 2     │ 1     │ 8     │ 25.0    │
│ 19   │ 2     │ 1     │ 9     │ 24.0   

│ 299  │ 30    │ 1     │ 9     │ 40.0    │
│ 300  │ 30    │ 1     │ 10    │ 40.0    │
│ 301  │ 31    │ 1     │ 1     │ 29.0    │
│ 302  │ 31    │ 1     │ 2     │ 37.0    │
│ 303  │ 31    │ 1     │ 3     │ 26.0    │
│ 304  │ 31    │ 1     │ 4     │ 25.0    │
│ 305  │ 31    │ 1     │ 5     │ 24.0    │
│ 306  │ 31    │ 1     │ 6     │ 23.0    │
│ 307  │ 31    │ 1     │ 7     │ 22.0    │
│ 308  │ 31    │ 1     │ 8     │ 21.0    │
│ 309  │ 31    │ 1     │ 9     │ 20.0    │
│ 310  │ 31    │ 1     │ 10    │ 19.0    │
│ 311  │ 32    │ 1     │ 1     │ 36.0    │
│ 312  │ 32    │ 1     │ 2     │ 35.0    │
│ 313  │ 32    │ 1     │ 3     │ 34.0    │
│ 314  │ 32    │ 1     │ 4     │ 33.0    │
│ 315  │ 32    │ 1     │ 5     │ 32.0    │
│ 316  │ 32    │ 1     │ 6     │ 31.0    │
│ 317  │ 32    │ 1     │ 7     │ 30.0    │
│ 318  │ 32    │ 1     │ 8     │ 29.0    │
│ 319  │ 32    │ 1     │ 9     │ 28.0    │
│ 320  │ 32    │ 1     │ 10    │ 17.0    │
│ 321  │ 33    │ 1     │ 1     │ 40.0    │
│ 322  │ 33

│ 567  │ 57    │ 1     │ 7     │ 23.0    │
│ 568  │ 57    │ 1     │ 8     │ 22.0    │
│ 569  │ 57    │ 1     │ 9     │ 11.0    │
│ 570  │ 57    │ 1     │ 10    │ 19.0    │
│ 571  │ 58    │ 1     │ 1     │ 38.0    │
│ 572  │ 58    │ 1     │ 2     │ 27.0    │
│ 573  │ 58    │ 1     │ 3     │ 35.0    │
│ 574  │ 58    │ 1     │ 4     │ 34.0    │
│ 575  │ 58    │ 1     │ 5     │ 33.0    │
│ 576  │ 58    │ 1     │ 6     │ 32.0    │
│ 577  │ 58    │ 1     │ 7     │ 31.0    │
│ 578  │ 58    │ 1     │ 8     │ 30.0    │
│ 579  │ 58    │ 1     │ 9     │ 29.0    │
│ 580  │ 58    │ 1     │ 10    │ 28.0    │
│ 581  │ 59    │ 1     │ 1     │ 38.0    │
│ 582  │ 59    │ 1     │ 2     │ 37.0    │
│ 583  │ 59    │ 1     │ 3     │ 36.0    │
│ 584  │ 59    │ 1     │ 4     │ 35.0    │
│ 585  │ 59    │ 1     │ 5     │ 34.0    │
│ 586  │ 59    │ 1     │ 6     │ 33.0    │
│ 587  │ 59    │ 1     │ 7     │ 32.0    │
│ 588  │ 59    │ 1     │ 8     │ 31.0    │
│ 589  │ 59    │ 1     │ 9     │ 30.0    │
│ 590  │ 59

│ 859  │ 86    │ 1     │ 9     │ 40.0    │
│ 860  │ 86    │ 1     │ 10    │ 40.0    │
│ 861  │ 87    │ 1     │ 1     │ 15.0    │
│ 862  │ 87    │ 1     │ 2     │ 23.0    │
│ 863  │ 87    │ 1     │ 3     │ 22.0    │
│ 864  │ 87    │ 1     │ 4     │ 21.0    │
│ 865  │ 87    │ 1     │ 5     │ 20.0    │
│ 866  │ 87    │ 1     │ 6     │ 19.0    │
│ 867  │ 87    │ 1     │ 7     │ 18.0    │
│ 868  │ 87    │ 1     │ 8     │ 17.0    │
│ 869  │ 87    │ 1     │ 9     │ 16.0    │
│ 870  │ 87    │ 1     │ 10    │ 15.0    │
│ 871  │ 88    │ 1     │ 1     │ 45.0    │
│ 872  │ 88    │ 1     │ 2     │ 34.0    │
│ 873  │ 88    │ 1     │ 3     │ 33.0    │
│ 874  │ 88    │ 1     │ 4     │ 32.0    │
│ 875  │ 88    │ 1     │ 5     │ 31.0    │
│ 876  │ 88    │ 1     │ 6     │ 30.0    │
│ 877  │ 88    │ 1     │ 7     │ 29.0    │
│ 878  │ 88    │ 1     │ 8     │ 28.0    │
│ 879  │ 88    │ 1     │ 9     │ 27.0    │
│ 880  │ 88    │ 1     │ 10    │ 26.0    │
│ 881  │ 89    │ 1     │ 1     │ 38.0    │
│ 882  │ 89

30.698

In [37]:
# Policies are chosen outside.
function seed_pol_func(Q_tables, N_tables, i, actions, s)
    # passed in i ignored, kept for consistency
    # i = rand(1:length(Q_tables))
    #println("i:$i")
    # return 
    if haskey(Q_tables[i], s)
       # println("ucb: $(ucbs)")
       val, idx = findmax(Q_tables[i][s])
       #println("Selected $val, $idx from $(Q_tables[i][s]) for $i, $s")
       #print("$(Q_tables[i][s])")
       return action_map[idx]
    else
       act = action_map[rand(actions, 1)[1]]
       println("random action $act") 
       return act
    end
end

seed_pol_func (generic function with 1 method)

In [38]:
#Now we run with that policy
include("./Agent.jl")
using StatsBase
function run_seed_chain_simulations(nruns, Q_tables1, Q_tables2, N_tables1, N_tables2, num_agents)
    runs = []
    for i in 1:nruns
        agents = Any[]
        num_states = 20
        num_actions = 2
        epochs=1
        seed_policies = []
        H = floor(Int, 3 * num_states / 2)
        actions = [1, 2]
        action_map = Dict(1 => :left, :2 => :right)
        rev_action_map = Dict(:left => 1, :right => 2)
        states = 1:(num_states+2)
        #print("Before q's")
        true_mdp = PFChainMDP.PChainMDP(num_states+2,1.0,1.0, theta)
        Q_tables_list = [Q_tables1[1], Q_tables2[1]]
        N_tables_list = [N_tables1[1], N_tables2[1]]
        # so diff with thompson sampling is that we should do the rand now
        Q_tables_seed = Dict(i => Q_tables_list[rand(1:length(Q_tables_list))] for i in 1:num_agents)
        N_tables_seed = Dict(i => N_tables_list[rand(1:length(N_tables_list))] for i in 1:num_agents)
        
        for ag in 1:num_agents
            #println("Q_T: $Q_tables_thomp")
            #println("N_T: $N_tables_thomp")
            push!(seed_policies, curry(curry(curry(curry(seed_pol_func, Q_tables_seed), N_tables_seed),ag), actions))
            # push!(seed_policies, curry(curry(curry(curry(seed_pol_func, Q_tables_seed), N_tables_seed),2), actions))
        end
        #println("after policies")
        (Q_tables3, N_tables3, trash) = PFAgent.setup_agents(states, num_states, num_agents,
                                                                    actions, num_actions, ucb_pol)
        mdps = [deepcopy(true_mdp) for ag in 1:num_agents]
        r_history = PFAgent.run_chain!(
                   policies=seed_policies,
                   found_target=chain_found_target,
                   mdps=mdps,
                   update_Q=update_Q,
                   n_agents=num_agents,
                   n_states=num_states,
                   Q_tables=Q_tables3,
                   N_tables=N_tables3,
                   epochs=epochs,
                   steps=H,
                   rev_action_map=rev_action_map,
                   stop_early=true)

        R = (num_states - 2) / 2
        #println([r for (e,i,t,r) in r_history if i == 1])
        #println([r for (e,i,t,r) in r_history if i == 2])
        print(r_history)
        #reg = R - sum([r for (e,ag,t,r) in r_history if ag == an_ag end 0])
        for (e, ag, t, st, r)  in r_history
          push!(runs, (i, e, ag, t, st, r)) 
        end
        #reg_a2 = R - sum([r for (e,ag,t,r) in r_history if ag == 2])
    end
    return runs
end                                    
                                    



run_seed_chain_simulations (generic function with 1 method)

In [39]:
num_agents = 5
results = run_seed_chain_simulations(100, Q_tables1, Q_tables2, N_tables1, N_tables2, num_agents)
using CSV
using DataFrames
df = DataFrame(run = [x[1] for x in results], epoch = [x[2] for x in results], agent=[x[3] for x in results],
               time = [x[4] for x in results], state = [x[5] for x in results], reward = [x[6] for x in results])
println(df)
CSV.write("seed_chain_simulation.csv", df)
df

setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 2, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 1, agent 2, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 3, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 2, agent 2, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 1, agent 3, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 4, agent 1, result: (s = 15, a = :right, r = -1, sp = 16, t = 4)
e: 1, t: 3, agent 2, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 2, agent 3, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 1, agent 4, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 5, agent 1, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 4, agent 2, result: (s = 15, a = :right, r = -1, sp = 16, t = 4)
e: 1, t: 3, agent 3, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 2, agent 4, resul

e: 1, t: 8, agent 3, result: (s = 19, a = :right, r = -1, sp = 20, t = 8)
e: 1, t: 7, agent 4, result: (s = 18, a = :right, r = -1, sp = 19, t = 7)
e: 1, t: 6, agent 5, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 11, agent 1, result: (s = 2, a = :right, r = 10, sp = 1, t = 11)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 11, -1), (1, 2, 1, 12, -1), (1, 1, 3, 10, -1), (1, 2, 2, 11, -1), (1, 3, 1, 12, -1), (1, 1, 4, 9, -1), (1, 2, 3, 10, -1), (1, 3, 2, 13, -1), (1, 4, 1, 12, -1), (1, 1, 5, 8, -1), (1, 2, 4, 9, -1), (1, 3, 3, 14, -1), (1, 4, 2, 13, -1), (1, 5, 1, 12, -1), (1, 1, 6, 7, -1), (1, 2, 5, 8, -1), (1, 3, 4, 15, -1), (1, 4, 3, 14, -1), (1, 5, 2, 13, -1), (1, 1, 7, 6, -1), (1, 2, 6, 7, -1), (1, 3, 5, 16, -1), (1, 4, 4, 15, -1), (1, 5, 3, 14, -1), (1, 1, 8, 5, -1), (1, 2, 7, 6, -1), (1, 3, 6, 17, -1), (1, 4, 5, 16, -1), (1, 5, 4, 15, -1), (1, 1, 9, 4, -1), (1, 2, 8, 5, -1), (1, 3, 7, 18, -1), (1, 4, 6, 17, -1), (1, 5, 5, 16, -1), (1, 1, 10, 3, -1), (1, 2, 9, 4, -1), (1, 3, 8,

e: 1, t: 9, agent 1, result: (s = 4, a = :left, r = -1, sp = 3, t = 9)
e: 1, t: 8, agent 2, result: (s = 19, a = :right, r = -1, sp = 20, t = 8)
e: 1, t: 7, agent 3, result: (s = 18, a = :right, r = -1, sp = 19, t = 7)
e: 1, t: 6, agent 4, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 5, agent 5, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 10, agent 1, result: (s = 3, a = :left, r = -1, sp = 2, t = 10)
e: 1, t: 9, agent 2, result: (s = 20, a = :right, r = -1, sp = 21, t = 9)
e: 1, t: 8, agent 3, result: (s = 19, a = :right, r = -1, sp = 20, t = 8)
e: 1, t: 7, agent 4, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 5, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 11, agent 1, result: (s = 2, a = :right, r = 10, sp = 1, t = 11)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 11, -1), (1, 2, 1, 12, -1), (1, 1, 3, 10, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 9, -1), (1, 2, 3, 14, -1), (1, 3, 2, 13, -1), (1, 4, 1, 12, -1), (1,

e: 1, t: 9, agent 2, result: (s = 4, a = :left, r = -1, sp = 3, t = 9)
e: 1, t: 8, agent 3, result: (s = 19, a = :right, r = -1, sp = 20, t = 8)
e: 1, t: 7, agent 4, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 5, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 11, agent 1, result: (s = 2, a = :right, r = 10, sp = 1, t = 11)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 11, -1), (1, 2, 1, 12, -1), (1, 1, 3, 10, -1), (1, 2, 2, 11, -1), (1, 3, 1, 12, -1), (1, 1, 4, 9, -1), (1, 2, 3, 10, -1), (1, 3, 2, 13, -1), (1, 4, 1, 12, -1), (1, 1, 5, 8, -1), (1, 2, 4, 9, -1), (1, 3, 3, 14, -1), (1, 4, 2, 11, -1), (1, 5, 1, 12, -1), (1, 1, 6, 7, -1), (1, 2, 5, 8, -1), (1, 3, 4, 15, -1), (1, 4, 3, 10, -1), (1, 5, 2, 13, -1), (1, 1, 7, 6, -1), (1, 2, 6, 7, -1), (1, 3, 5, 16, -1), (1, 4, 4, 9, -1), (1, 5, 3, 14, -1), (1, 1, 8, 5, -1), (1, 2, 7, 6, -1), (1, 3, 6, 17, -1), (1, 4, 5, 8, -1), (1, 5, 4, 15, -1), (1, 1, 9, 4, -1), (1, 2, 8, 5, -1), (1, 3, 7, 18, -1), (1, 4, 6, 7, -1), 

e: 1, t: 4, agent 4, result: (s = 9, a = :left, r = -1, sp = 8, t = 4)
e: 1, t: 3, agent 5, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 8, agent 1, result: (s = 19, a = :right, r = -1, sp = 20, t = 8)
e: 1, t: 7, agent 2, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 3, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 5, agent 4, result: (s = 8, a = :left, r = -1, sp = 7, t = 5)
e: 1, t: 4, agent 5, result: (s = 15, a = :right, r = -1, sp = 16, t = 4)
e: 1, t: 9, agent 1, result: (s = 20, a = :right, r = -1, sp = 21, t = 9)
e: 1, t: 8, agent 2, result: (s = 5, a = :left, r = -1, sp = 4, t = 8)
e: 1, t: 7, agent 3, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 4, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 5, agent 5, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 10, agent 1, result: (s = 21, a = :right, r = -10, sp = 22, t = 10)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, 

Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 15, -1), (1, 2, 3, 14, -1), (1, 3, 2, 13, -1), (1, 4, 1, 12, -1), (1, 1, 5, 16, -1), (1, 2, 4, 15, -1), (1, 3, 3, 14, -1), (1, 4, 2, 11, -1), (1, 5, 1, 12, -1), (1, 1, 6, 17, -1), (1, 2, 5, 16, -1), (1, 3, 4, 15, -1), (1, 4, 3, 10, -1), (1, 5, 2, 11, -1), (1, 1, 7, 18, -1), (1, 2, 6, 17, -1), (1, 3, 5, 16, -1), (1, 4, 4, 9, -1), (1, 5, 3, 10, -1), (1, 1, 8, 19, -1), (1, 2, 7, 18, -1), (1, 3, 6, 17, -1), (1, 4, 5, 8, -1), (1, 5, 4, 9, -1), (1, 1, 9, 20, -1), (1, 2, 8, 19, -1), (1, 3, 7, 18, -1), (1, 4, 6, 7, -1), (1, 5, 5, 8, -1), (1, 1, 10, 21, -10)]setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 2, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 2, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 3, agent 1, result: (s = 10, a = :left, r = -1, sp = 9, t = 3)
e: 1, t: 2, 

Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 15, -1), (1, 2, 3, 14, -1), (1, 3, 2, 13, -1), (1, 4, 1, 12, -1), (1, 1, 5, 16, -1), (1, 2, 4, 15, -1), (1, 3, 3, 14, -1), (1, 4, 2, 13, -1), (1, 5, 1, 12, -1), (1, 1, 6, 17, -1), (1, 2, 5, 16, -1), (1, 3, 4, 15, -1), (1, 4, 3, 14, -1), (1, 5, 2, 11, -1), (1, 1, 7, 18, -1), (1, 2, 6, 17, -1), (1, 3, 5, 16, -1), (1, 4, 4, 15, -1), (1, 5, 3, 10, -1), (1, 1, 8, 19, -1), (1, 2, 7, 18, -1), (1, 3, 6, 17, -1), (1, 4, 5, 16, -1), (1, 5, 4, 9, -1), (1, 1, 9, 20, -1), (1, 2, 8, 19, -1), (1, 3, 7, 18, -1), (1, 4, 6, 17, -1), (1, 5, 5, 8, -1), (1, 1, 10, 21, -10)]setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 2, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 1, agent 2, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 3, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1

e: 1, t: 6, agent 5, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 11, agent 1, result: (s = 2, a = :right, r = 10, sp = 1, t = 11)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 11, -1), (1, 2, 1, 12, -1), (1, 1, 3, 10, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 9, -1), (1, 2, 3, 14, -1), (1, 3, 2, 11, -1), (1, 4, 1, 12, -1), (1, 1, 5, 8, -1), (1, 2, 4, 15, -1), (1, 3, 3, 10, -1), (1, 4, 2, 13, -1), (1, 5, 1, 12, -1), (1, 1, 6, 7, -1), (1, 2, 5, 16, -1), (1, 3, 4, 9, -1), (1, 4, 3, 14, -1), (1, 5, 2, 13, -1), (1, 1, 7, 6, -1), (1, 2, 6, 17, -1), (1, 3, 5, 8, -1), (1, 4, 4, 15, -1), (1, 5, 3, 14, -1), (1, 1, 8, 5, -1), (1, 2, 7, 18, -1), (1, 3, 6, 7, -1), (1, 4, 5, 16, -1), (1, 5, 4, 15, -1), (1, 1, 9, 4, -1), (1, 2, 8, 19, -1), (1, 3, 7, 6, -1), (1, 4, 6, 17, -1), (1, 5, 5, 16, -1), (1, 1, 10, 3, -1), (1, 2, 9, 20, -1), (1, 3, 8, 5, -1), (1, 4, 7, 18, -1), (1, 5, 6, 17, -1), (1, 1, 11, 2, 10)]setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t 

e: 1, t: 7, agent 3, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 4, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 5, agent 5, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 10, agent 1, result: (s = 21, a = :right, r = -10, sp = 22, t = 10)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 15, -1), (1, 2, 3, 14, -1), (1, 3, 2, 11, -1), (1, 4, 1, 12, -1), (1, 1, 5, 16, -1), (1, 2, 4, 15, -1), (1, 3, 3, 10, -1), (1, 4, 2, 13, -1), (1, 5, 1, 12, -1), (1, 1, 6, 17, -1), (1, 2, 5, 16, -1), (1, 3, 4, 9, -1), (1, 4, 3, 14, -1), (1, 5, 2, 13, -1), (1, 1, 7, 18, -1), (1, 2, 6, 17, -1), (1, 3, 5, 8, -1), (1, 4, 4, 15, -1), (1, 5, 3, 14, -1), (1, 1, 8, 19, -1), (1, 2, 7, 18, -1), (1, 3, 6, 7, -1), (1, 4, 5, 16, -1), (1, 5, 4, 15, -1), (1, 1, 9, 20, -1), (1, 2, 8, 19, -1), (1, 3, 7, 6, -1), (1, 4, 6, 17, -1), (1, 5, 5, 16, -1), (1, 1, 10, 21, -10)]setup agents
e: 1, 

e: 1, t: 6, agent 1, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 5, agent 2, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 4, agent 3, result: (s = 9, a = :left, r = -1, sp = 8, t = 4)
e: 1, t: 3, agent 4, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 2, agent 5, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 7, agent 1, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 2, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 5, agent 3, result: (s = 8, a = :left, r = -1, sp = 7, t = 5)
e: 1, t: 4, agent 4, result: (s = 15, a = :right, r = -1, sp = 16, t = 4)
e: 1, t: 3, agent 5, result: (s = 10, a = :left, r = -1, sp = 9, t = 3)
e: 1, t: 8, agent 1, result: (s = 5, a = :left, r = -1, sp = 4, t = 8)
e: 1, t: 7, agent 2, result: (s = 18, a = :right, r = -1, sp = 19, t = 7)
e: 1, t: 6, agent 3, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 5, agent 4, result: (s = 16, a = :right, r = -1, s

e: 1, t: 2, agent 3, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 4, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 5, agent 1, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 4, agent 2, result: (s = 9, a = :left, r = -1, sp = 8, t = 4)
e: 1, t: 3, agent 3, result: (s = 10, a = :left, r = -1, sp = 9, t = 3)
e: 1, t: 2, agent 4, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 5, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 6, agent 1, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 5, agent 2, result: (s = 8, a = :left, r = -1, sp = 7, t = 5)
e: 1, t: 4, agent 3, result: (s = 9, a = :left, r = -1, sp = 8, t = 4)
e: 1, t: 3, agent 4, result: (s = 10, a = :left, r = -1, sp = 9, t = 3)
e: 1, t: 2, agent 5, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 7, agent 1, result: (s = 18, a = :right, r = -1, sp = 19, t = 7)
e: 1, t: 6, agent 2, result: (s = 7, a = :left, r = -1, 

e: 1, t: 9, agent 1, result: (s = 20, a = :right, r = -1, sp = 21, t = 9)
e: 1, t: 8, agent 2, result: (s = 5, a = :left, r = -1, sp = 4, t = 8)
e: 1, t: 7, agent 3, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 4, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 5, agent 5, result: (s = 8, a = :left, r = -1, sp = 7, t = 5)
e: 1, t: 10, agent 1, result: (s = 21, a = :right, r = -10, sp = 22, t = 10)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 11, -1), (1, 3, 1, 12, -1), (1, 1, 4, 15, -1), (1, 2, 3, 10, -1), (1, 3, 2, 11, -1), (1, 4, 1, 12, -1), (1, 1, 5, 16, -1), (1, 2, 4, 9, -1), (1, 3, 3, 10, -1), (1, 4, 2, 11, -1), (1, 5, 1, 12, -1), (1, 1, 6, 17, -1), (1, 2, 5, 8, -1), (1, 3, 4, 9, -1), (1, 4, 3, 10, -1), (1, 5, 2, 11, -1), (1, 1, 7, 18, -1), (1, 2, 6, 7, -1), (1, 3, 5, 8, -1), (1, 4, 4, 9, -1), (1, 5, 3, 10, -1), (1, 1, 8, 19, -1), (1, 2, 7, 6, -1), (1, 3, 6, 7, -1), (1, 4, 5, 8, -1), (1, 5, 4, 9, -1), (

e: 1, t: 5, agent 4, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 4, agent 5, result: (s = 15, a = :right, r = -1, sp = 16, t = 4)
e: 1, t: 9, agent 1, result: (s = 20, a = :right, r = -1, sp = 21, t = 9)
e: 1, t: 8, agent 2, result: (s = 5, a = :left, r = -1, sp = 4, t = 8)
e: 1, t: 7, agent 3, result: (s = 18, a = :right, r = -1, sp = 19, t = 7)
e: 1, t: 6, agent 4, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 5, agent 5, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 10, agent 1, result: (s = 21, a = :right, r = -10, sp = 22, t = 10)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 11, -1), (1, 3, 1, 12, -1), (1, 1, 4, 15, -1), (1, 2, 3, 10, -1), (1, 3, 2, 13, -1), (1, 4, 1, 12, -1), (1, 1, 5, 16, -1), (1, 2, 4, 9, -1), (1, 3, 3, 14, -1), (1, 4, 2, 13, -1), (1, 5, 1, 12, -1), (1, 1, 6, 17, -1), (1, 2, 5, 8, -1), (1, 3, 4, 15, -1), (1, 4, 3, 14, -1), (1, 5, 2, 13, -1), (1, 1, 7, 18, -1), (1, 2, 

e: 1, t: 9, agent 1, result: (s = 20, a = :right, r = -1, sp = 21, t = 9)
e: 1, t: 8, agent 2, result: (s = 5, a = :left, r = -1, sp = 4, t = 8)
e: 1, t: 7, agent 3, result: (s = 18, a = :right, r = -1, sp = 19, t = 7)
e: 1, t: 6, agent 4, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 5, agent 5, result: (s = 8, a = :left, r = -1, sp = 7, t = 5)
e: 1, t: 10, agent 1, result: (s = 21, a = :right, r = -10, sp = 22, t = 10)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 11, -1), (1, 3, 1, 12, -1), (1, 1, 4, 15, -1), (1, 2, 3, 10, -1), (1, 3, 2, 13, -1), (1, 4, 1, 12, -1), (1, 1, 5, 16, -1), (1, 2, 4, 9, -1), (1, 3, 3, 14, -1), (1, 4, 2, 11, -1), (1, 5, 1, 12, -1), (1, 1, 6, 17, -1), (1, 2, 5, 8, -1), (1, 3, 4, 15, -1), (1, 4, 3, 10, -1), (1, 5, 2, 11, -1), (1, 1, 7, 18, -1), (1, 2, 6, 7, -1), (1, 3, 5, 16, -1), (1, 4, 4, 9, -1), (1, 5, 3, 10, -1), (1, 1, 8, 19, -1), (1, 2, 7, 6, -1), (1, 3, 6, 17, -1), (1, 4, 5, 8, -1), (1, 5, 4, 9, 

e: 1, t: 5, agent 4, result: (s = 8, a = :left, r = -1, sp = 7, t = 5)
e: 1, t: 4, agent 5, result: (s = 9, a = :left, r = -1, sp = 8, t = 4)
e: 1, t: 9, agent 1, result: (s = 20, a = :right, r = -1, sp = 21, t = 9)
e: 1, t: 8, agent 2, result: (s = 19, a = :right, r = -1, sp = 20, t = 8)
e: 1, t: 7, agent 3, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 4, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 5, agent 5, result: (s = 8, a = :left, r = -1, sp = 7, t = 5)
e: 1, t: 10, agent 1, result: (s = 21, a = :right, r = -10, sp = 22, t = 10)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 15, -1), (1, 2, 3, 14, -1), (1, 3, 2, 11, -1), (1, 4, 1, 12, -1), (1, 1, 5, 16, -1), (1, 2, 4, 15, -1), (1, 3, 3, 10, -1), (1, 4, 2, 11, -1), (1, 5, 1, 12, -1), (1, 1, 6, 17, -1), (1, 2, 5, 16, -1), (1, 3, 4, 9, -1), (1, 4, 3, 10, -1), (1, 5, 2, 11, -1), (1, 1, 7, 18, -1), (1, 2, 6, 17, -1),

Any[(1, 1, 1, 12, -1), (1, 1, 2, 11, -1), (1, 2, 1, 12, -1), (1, 1, 3, 10, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 9, -1), (1, 2, 3, 14, -1), (1, 3, 2, 11, -1), (1, 4, 1, 12, -1), (1, 1, 5, 8, -1), (1, 2, 4, 15, -1), (1, 3, 3, 10, -1), (1, 4, 2, 13, -1), (1, 5, 1, 12, -1), (1, 1, 6, 7, -1), (1, 2, 5, 16, -1), (1, 3, 4, 9, -1), (1, 4, 3, 14, -1), (1, 5, 2, 13, -1), (1, 1, 7, 6, -1), (1, 2, 6, 17, -1), (1, 3, 5, 8, -1), (1, 4, 4, 15, -1), (1, 5, 3, 14, -1), (1, 1, 8, 5, -1), (1, 2, 7, 18, -1), (1, 3, 6, 7, -1), (1, 4, 5, 16, -1), (1, 5, 4, 15, -1), (1, 1, 9, 4, -1), (1, 2, 8, 19, -1), (1, 3, 7, 6, -1), (1, 4, 6, 17, -1), (1, 5, 5, 16, -1), (1, 1, 10, 3, -1), (1, 2, 9, 20, -1), (1, 3, 8, 5, -1), (1, 4, 7, 18, -1), (1, 5, 6, 17, -1), (1, 1, 11, 2, 10)]setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 2, agent 1, result: (s = 11, a = :left, r = -1, sp = 10, t = 2)
e: 1, t: 1, agent 2, result: (s = 12, a = :left, r = -1, sp = 11, t = 

e: 1, t: 9, agent 1, result: (s = 20, a = :right, r = -1, sp = 21, t = 9)
e: 1, t: 8, agent 2, result: (s = 19, a = :right, r = -1, sp = 20, t = 8)
e: 1, t: 7, agent 3, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 4, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 5, agent 5, result: (s = 8, a = :left, r = -1, sp = 7, t = 5)
e: 1, t: 10, agent 1, result: (s = 21, a = :right, r = -10, sp = 22, t = 10)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 15, -1), (1, 2, 3, 14, -1), (1, 3, 2, 11, -1), (1, 4, 1, 12, -1), (1, 1, 5, 16, -1), (1, 2, 4, 15, -1), (1, 3, 3, 10, -1), (1, 4, 2, 11, -1), (1, 5, 1, 12, -1), (1, 1, 6, 17, -1), (1, 2, 5, 16, -1), (1, 3, 4, 9, -1), (1, 4, 3, 10, -1), (1, 5, 2, 11, -1), (1, 1, 7, 18, -1), (1, 2, 6, 17, -1), (1, 3, 5, 8, -1), (1, 4, 4, 9, -1), (1, 5, 3, 10, -1), (1, 1, 8, 19, -1), (1, 2, 7, 18, -1), (1, 3, 6, 7, -1), (1, 4, 5, 8, -1), (1, 5, 4, 9,

e: 1, t: 6, agent 4, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 5, agent 5, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 10, agent 1, result: (s = 3, a = :left, r = -1, sp = 2, t = 10)
e: 1, t: 9, agent 2, result: (s = 20, a = :right, r = -1, sp = 21, t = 9)
e: 1, t: 8, agent 3, result: (s = 5, a = :left, r = -1, sp = 4, t = 8)
e: 1, t: 7, agent 4, result: (s = 18, a = :right, r = -1, sp = 19, t = 7)
e: 1, t: 6, agent 5, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 11, agent 1, result: (s = 2, a = :right, r = 10, sp = 1, t = 11)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 11, -1), (1, 2, 1, 12, -1), (1, 1, 3, 10, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 9, -1), (1, 2, 3, 14, -1), (1, 3, 2, 11, -1), (1, 4, 1, 12, -1), (1, 1, 5, 8, -1), (1, 2, 4, 15, -1), (1, 3, 3, 10, -1), (1, 4, 2, 13, -1), (1, 5, 1, 12, -1), (1, 1, 6, 7, -1), (1, 2, 5, 16, -1), (1, 3, 4, 9, -1), (1, 4, 3, 14, -1), (1, 5, 2, 13, -1), (1, 1, 7, 6, -1), (1, 2, 6, 17, 

e: 1, t: 8, agent 1, result: (s = 19, a = :right, r = -1, sp = 20, t = 8)
e: 1, t: 7, agent 2, result: (s = 18, a = :right, r = -1, sp = 19, t = 7)
e: 1, t: 6, agent 3, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 5, agent 4, result: (s = 8, a = :left, r = -1, sp = 7, t = 5)
e: 1, t: 4, agent 5, result: (s = 15, a = :right, r = -1, sp = 16, t = 4)
e: 1, t: 9, agent 1, result: (s = 20, a = :right, r = -1, sp = 21, t = 9)
e: 1, t: 8, agent 2, result: (s = 19, a = :right, r = -1, sp = 20, t = 8)
e: 1, t: 7, agent 3, result: (s = 18, a = :right, r = -1, sp = 19, t = 7)
e: 1, t: 6, agent 4, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 5, agent 5, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 10, agent 1, result: (s = 21, a = :right, r = -10, sp = 22, t = 10)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 15, -1), (1, 2, 3, 14, -1), (1, 3, 2, 13, -1), (1, 4, 1, 12, 

e: 1, t: 6, agent 4, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 5, agent 5, result: (s = 8, a = :left, r = -1, sp = 7, t = 5)
e: 1, t: 10, agent 1, result: (s = 21, a = :right, r = -10, sp = 22, t = 10)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 15, -1), (1, 2, 3, 14, -1), (1, 3, 2, 13, -1), (1, 4, 1, 12, -1), (1, 1, 5, 16, -1), (1, 2, 4, 15, -1), (1, 3, 3, 14, -1), (1, 4, 2, 11, -1), (1, 5, 1, 12, -1), (1, 1, 6, 17, -1), (1, 2, 5, 16, -1), (1, 3, 4, 15, -1), (1, 4, 3, 10, -1), (1, 5, 2, 11, -1), (1, 1, 7, 18, -1), (1, 2, 6, 17, -1), (1, 3, 5, 16, -1), (1, 4, 4, 9, -1), (1, 5, 3, 10, -1), (1, 1, 8, 19, -1), (1, 2, 7, 18, -1), (1, 3, 6, 17, -1), (1, 4, 5, 8, -1), (1, 5, 4, 9, -1), (1, 1, 9, 20, -1), (1, 2, 8, 19, -1), (1, 3, 7, 18, -1), (1, 4, 6, 7, -1), (1, 5, 5, 8, -1), (1, 1, 10, 21, -10)]setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 2,

e: 1, t: 7, agent 3, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 4, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 5, agent 5, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 10, agent 1, result: (s = 3, a = :left, r = -1, sp = 2, t = 10)
e: 1, t: 9, agent 2, result: (s = 4, a = :left, r = -1, sp = 3, t = 9)
e: 1, t: 8, agent 3, result: (s = 5, a = :left, r = -1, sp = 4, t = 8)
e: 1, t: 7, agent 4, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 5, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 11, agent 1, result: (s = 2, a = :right, r = 10, sp = 1, t = 11)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 11, -1), (1, 2, 1, 12, -1), (1, 1, 3, 10, -1), (1, 2, 2, 11, -1), (1, 3, 1, 12, -1), (1, 1, 4, 9, -1), (1, 2, 3, 10, -1), (1, 3, 2, 11, -1), (1, 4, 1, 12, -1), (1, 1, 5, 8, -1), (1, 2, 4, 9, -1), (1, 3, 3, 10, -1), (1, 4, 2, 11, -1), (1, 5, 1, 12, -1), (1, 1, 6, 7, -1), (1, 2, 5, 8, -1), (1, 3, 4, 9, -1), (1, 4, 3, 

e: 1, t: 8, agent 2, result: (s = 19, a = :right, r = -1, sp = 20, t = 8)
e: 1, t: 7, agent 3, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 4, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 5, agent 5, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 10, agent 1, result: (s = 3, a = :left, r = -1, sp = 2, t = 10)
e: 1, t: 9, agent 2, result: (s = 20, a = :right, r = -1, sp = 21, t = 9)
e: 1, t: 8, agent 3, result: (s = 5, a = :left, r = -1, sp = 4, t = 8)
e: 1, t: 7, agent 4, result: (s = 18, a = :right, r = -1, sp = 19, t = 7)
e: 1, t: 6, agent 5, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 11, agent 1, result: (s = 2, a = :right, r = 10, sp = 1, t = 11)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 11, -1), (1, 2, 1, 12, -1), (1, 1, 3, 10, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 9, -1), (1, 2, 3, 14, -1), (1, 3, 2, 11, -1), (1, 4, 1, 12, -1), (1, 1, 5, 8, -1), (1, 2, 4, 15, -1), (1, 3, 3, 10, -1), (1, 4, 2, 13, -1),

e: 1, t: 3, agent 5, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 8, agent 1, result: (s = 19, a = :right, r = -1, sp = 20, t = 8)
e: 1, t: 7, agent 2, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 3, result: (s = 7, a = :left, r = -1, sp = 6, t = 6)
e: 1, t: 5, agent 4, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 4, agent 5, result: (s = 15, a = :right, r = -1, sp = 16, t = 4)
e: 1, t: 9, agent 1, result: (s = 20, a = :right, r = -1, sp = 21, t = 9)
e: 1, t: 8, agent 2, result: (s = 5, a = :left, r = -1, sp = 4, t = 8)
e: 1, t: 7, agent 3, result: (s = 6, a = :left, r = -1, sp = 5, t = 7)
e: 1, t: 6, agent 4, result: (s = 17, a = :right, r = -1, sp = 18, t = 6)
e: 1, t: 5, agent 5, result: (s = 16, a = :right, r = -1, sp = 17, t = 5)
e: 1, t: 10, agent 1, result: (s = 21, a = :right, r = -10, sp = 22, t = 10)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 11, -1), (1, 3, 1, 12, -1), (1,

e: 1, t: 11, agent 1, result: (s = 2, a = :right, r = 10, sp = 1, t = 11)
Any[(1, 1, 1, 12, -1), (1, 1, 2, 11, -1), (1, 2, 1, 12, -1), (1, 1, 3, 10, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 9, -1), (1, 2, 3, 14, -1), (1, 3, 2, 13, -1), (1, 4, 1, 12, -1), (1, 1, 5, 8, -1), (1, 2, 4, 15, -1), (1, 3, 3, 14, -1), (1, 4, 2, 11, -1), (1, 5, 1, 12, -1), (1, 1, 6, 7, -1), (1, 2, 5, 16, -1), (1, 3, 4, 15, -1), (1, 4, 3, 10, -1), (1, 5, 2, 11, -1), (1, 1, 7, 6, -1), (1, 2, 6, 17, -1), (1, 3, 5, 16, -1), (1, 4, 4, 9, -1), (1, 5, 3, 10, -1), (1, 1, 8, 5, -1), (1, 2, 7, 18, -1), (1, 3, 6, 17, -1), (1, 4, 5, 8, -1), (1, 5, 4, 9, -1), (1, 1, 9, 4, -1), (1, 2, 8, 19, -1), (1, 3, 7, 18, -1), (1, 4, 6, 7, -1), (1, 5, 5, 8, -1), (1, 1, 10, 3, -1), (1, 2, 9, 20, -1), (1, 3, 8, 19, -1), (1, 4, 7, 6, -1), (1, 5, 6, 7, -1), (1, 1, 11, 2, 10)]setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 2, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t =

Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 11, -1), (1, 3, 1, 12, -1), (1, 1, 4, 15, -1), (1, 2, 3, 10, -1), (1, 3, 2, 11, -1), (1, 4, 1, 12, -1), (1, 1, 5, 16, -1), (1, 2, 4, 9, -1), (1, 3, 3, 10, -1), (1, 4, 2, 13, -1), (1, 5, 1, 12, -1), (1, 1, 6, 17, -1), (1, 2, 5, 8, -1), (1, 3, 4, 9, -1), (1, 4, 3, 14, -1), (1, 5, 2, 11, -1), (1, 1, 7, 18, -1), (1, 2, 6, 7, -1), (1, 3, 5, 8, -1), (1, 4, 4, 15, -1), (1, 5, 3, 10, -1), (1, 1, 8, 19, -1), (1, 2, 7, 6, -1), (1, 3, 6, 7, -1), (1, 4, 5, 16, -1), (1, 5, 4, 9, -1), (1, 1, 9, 20, -1), (1, 2, 8, 5, -1), (1, 3, 7, 6, -1), (1, 4, 6, 17, -1), (1, 5, 5, 8, -1), (1, 1, 10, 21, -10)]setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 2, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 1, agent 2, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 3, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 2, a

Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 11, -1), (1, 3, 1, 12, -1), (1, 1, 4, 15, -1), (1, 2, 3, 10, -1), (1, 3, 2, 13, -1), (1, 4, 1, 12, -1), (1, 1, 5, 16, -1), (1, 2, 4, 9, -1), (1, 3, 3, 14, -1), (1, 4, 2, 11, -1), (1, 5, 1, 12, -1), (1, 1, 6, 17, -1), (1, 2, 5, 8, -1), (1, 3, 4, 15, -1), (1, 4, 3, 10, -1), (1, 5, 2, 13, -1), (1, 1, 7, 18, -1), (1, 2, 6, 7, -1), (1, 3, 5, 16, -1), (1, 4, 4, 9, -1), (1, 5, 3, 14, -1), (1, 1, 8, 19, -1), (1, 2, 7, 6, -1), (1, 3, 6, 17, -1), (1, 4, 5, 8, -1), (1, 5, 4, 15, -1), (1, 1, 9, 20, -1), (1, 2, 8, 5, -1), (1, 3, 7, 18, -1), (1, 4, 6, 7, -1), (1, 5, 5, 16, -1), (1, 1, 10, 21, -10)]setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 2, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 1, agent 2, result: (s = 12, a = :left, r = -1, sp = 11, t = 1)
e: 1, t: 3, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 2,

Any[(1, 1, 1, 12, -1), (1, 1, 2, 13, -1), (1, 2, 1, 12, -1), (1, 1, 3, 14, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 15, -1), (1, 2, 3, 14, -1), (1, 3, 2, 11, -1), (1, 4, 1, 12, -1), (1, 1, 5, 16, -1), (1, 2, 4, 15, -1), (1, 3, 3, 10, -1), (1, 4, 2, 11, -1), (1, 5, 1, 12, -1), (1, 1, 6, 17, -1), (1, 2, 5, 16, -1), (1, 3, 4, 9, -1), (1, 4, 3, 10, -1), (1, 5, 2, 13, -1), (1, 1, 7, 18, -1), (1, 2, 6, 17, -1), (1, 3, 5, 8, -1), (1, 4, 4, 9, -1), (1, 5, 3, 14, -1), (1, 1, 8, 19, -1), (1, 2, 7, 18, -1), (1, 3, 6, 7, -1), (1, 4, 5, 8, -1), (1, 5, 4, 15, -1), (1, 1, 9, 20, -1), (1, 2, 8, 19, -1), (1, 3, 7, 6, -1), (1, 4, 6, 7, -1), (1, 5, 5, 16, -1), (1, 1, 10, 21, -10)]setup agents
e: 1, t: 1, agent 1, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 2, agent 1, result: (s = 13, a = :right, r = -1, sp = 14, t = 2)
e: 1, t: 1, agent 2, result: (s = 12, a = :right, r = -1, sp = 13, t = 1)
e: 1, t: 3, agent 1, result: (s = 14, a = :right, r = -1, sp = 15, t = 3)
e: 1, t: 

Any[(1, 1, 1, 12, -1), (1, 1, 2, 11, -1), (1, 2, 1, 12, -1), (1, 1, 3, 10, -1), (1, 2, 2, 13, -1), (1, 3, 1, 12, -1), (1, 1, 4, 9, -1), (1, 2, 3, 14, -1), (1, 3, 2, 13, -1), (1, 4, 1, 12, -1), (1, 1, 5, 8, -1), (1, 2, 4, 15, -1), (1, 3, 3, 14, -1), (1, 4, 2, 13, -1), (1, 5, 1, 12, -1), (1, 1, 6, 7, -1), (1, 2, 5, 16, -1), (1, 3, 4, 15, -1), (1, 4, 3, 14, -1), (1, 5, 2, 11, -1), (1, 1, 7, 6, -1), (1, 2, 6, 17, -1), (1, 3, 5, 16, -1), (1, 4, 4, 15, -1), (1, 5, 3, 10, -1), (1, 1, 8, 5, -1), (1, 2, 7, 18, -1), (1, 3, 6, 17, -1), (1, 4, 5, 16, -1), (1, 5, 4, 9, -1), (1, 1, 9, 4, -1), (1, 2, 8, 19, -1), (1, 3, 7, 18, -1), (1, 4, 6, 17, -1), (1, 5, 5, 8, -1), (1, 1, 10, 3, -1), (1, 2, 9, 20, -1), (1, 3, 8, 19, -1), (1, 4, 7, 18, -1), (1, 5, 6, 7, -1), (1, 1, 11, 2, 10)]3855×6 DataFrame
│ Row  │ run   │ epoch │ agent │ time  │ state │ reward │
│      │ [90mInt64[39m │ [90mInt64[39m │ [90mInt64[39m │ [90mInt64[39m │ [90mInt64[39m │ [90mInt64[39m  │
├──────┼───────┼───────┼───────┼──

│ 134  │ 4     │ 1     │ 1     │ 6     │ 7     │ -1     │
│ 135  │ 4     │ 1     │ 2     │ 5     │ 8     │ -1     │
│ 136  │ 4     │ 1     │ 3     │ 4     │ 15    │ -1     │
│ 137  │ 4     │ 1     │ 4     │ 3     │ 10    │ -1     │
│ 138  │ 4     │ 1     │ 5     │ 2     │ 13    │ -1     │
│ 139  │ 4     │ 1     │ 1     │ 7     │ 6     │ -1     │
│ 140  │ 4     │ 1     │ 2     │ 6     │ 7     │ -1     │
│ 141  │ 4     │ 1     │ 3     │ 5     │ 16    │ -1     │
│ 142  │ 4     │ 1     │ 4     │ 4     │ 9     │ -1     │
│ 143  │ 4     │ 1     │ 5     │ 3     │ 14    │ -1     │
│ 144  │ 4     │ 1     │ 1     │ 8     │ 5     │ -1     │
│ 145  │ 4     │ 1     │ 2     │ 7     │ 6     │ -1     │
│ 146  │ 4     │ 1     │ 3     │ 6     │ 17    │ -1     │
│ 147  │ 4     │ 1     │ 4     │ 5     │ 8     │ -1     │
│ 148  │ 4     │ 1     │ 5     │ 4     │ 15    │ -1     │
│ 149  │ 4     │ 1     │ 1     │ 9     │ 4     │ -1     │
│ 150  │ 4     │ 1     │ 2     │ 8     │ 5     │ -1     │
│ 151  │ 4    

│ 346  │ 9     │ 1     │ 3     │ 6     │ 17    │ -1     │
│ 347  │ 9     │ 1     │ 4     │ 5     │ 8     │ -1     │
│ 348  │ 9     │ 1     │ 5     │ 4     │ 9     │ -1     │
│ 349  │ 9     │ 1     │ 1     │ 9     │ 20    │ -1     │
│ 350  │ 9     │ 1     │ 2     │ 8     │ 5     │ -1     │
│ 351  │ 9     │ 1     │ 3     │ 7     │ 18    │ -1     │
│ 352  │ 9     │ 1     │ 4     │ 6     │ 7     │ -1     │
│ 353  │ 9     │ 1     │ 5     │ 5     │ 8     │ -1     │
│ 354  │ 9     │ 1     │ 1     │ 10    │ 21    │ -10    │
│ 355  │ 10    │ 1     │ 1     │ 1     │ 12    │ -1     │
│ 356  │ 10    │ 1     │ 1     │ 2     │ 11    │ -1     │
│ 357  │ 10    │ 1     │ 2     │ 1     │ 12    │ -1     │
│ 358  │ 10    │ 1     │ 1     │ 3     │ 10    │ -1     │
│ 359  │ 10    │ 1     │ 2     │ 2     │ 11    │ -1     │
│ 360  │ 10    │ 1     │ 3     │ 1     │ 12    │ -1     │
│ 361  │ 10    │ 1     │ 1     │ 4     │ 9     │ -1     │
│ 362  │ 10    │ 1     │ 2     │ 3     │ 10    │ -1     │
│ 363  │ 10   

│ 549  │ 14    │ 1     │ 1     │ 10    │ 3     │ -1     │
│ 550  │ 14    │ 1     │ 2     │ 9     │ 4     │ -1     │
│ 551  │ 14    │ 1     │ 3     │ 8     │ 19    │ -1     │
│ 552  │ 14    │ 1     │ 4     │ 7     │ 6     │ -1     │
│ 553  │ 14    │ 1     │ 5     │ 6     │ 17    │ -1     │
│ 554  │ 14    │ 1     │ 1     │ 11    │ 2     │ 10     │
│ 555  │ 15    │ 1     │ 1     │ 1     │ 12    │ -1     │
│ 556  │ 15    │ 1     │ 1     │ 2     │ 11    │ -1     │
│ 557  │ 15    │ 1     │ 2     │ 1     │ 12    │ -1     │
│ 558  │ 15    │ 1     │ 1     │ 3     │ 10    │ -1     │
│ 559  │ 15    │ 1     │ 2     │ 2     │ 11    │ -1     │
│ 560  │ 15    │ 1     │ 3     │ 1     │ 12    │ -1     │
│ 561  │ 15    │ 1     │ 1     │ 4     │ 9     │ -1     │
│ 562  │ 15    │ 1     │ 2     │ 3     │ 10    │ -1     │
│ 563  │ 15    │ 1     │ 3     │ 2     │ 11    │ -1     │
│ 564  │ 15    │ 1     │ 4     │ 1     │ 12    │ -1     │
│ 565  │ 15    │ 1     │ 1     │ 5     │ 8     │ -1     │
│ 566  │ 15   

│ 757  │ 20    │ 1     │ 2     │ 3     │ 14    │ -1     │
│ 758  │ 20    │ 1     │ 3     │ 2     │ 13    │ -1     │
│ 759  │ 20    │ 1     │ 4     │ 1     │ 12    │ -1     │
│ 760  │ 20    │ 1     │ 1     │ 5     │ 16    │ -1     │
│ 761  │ 20    │ 1     │ 2     │ 4     │ 15    │ -1     │
│ 762  │ 20    │ 1     │ 3     │ 3     │ 14    │ -1     │
│ 763  │ 20    │ 1     │ 4     │ 2     │ 11    │ -1     │
│ 764  │ 20    │ 1     │ 5     │ 1     │ 12    │ -1     │
│ 765  │ 20    │ 1     │ 1     │ 6     │ 17    │ -1     │
│ 766  │ 20    │ 1     │ 2     │ 5     │ 16    │ -1     │
│ 767  │ 20    │ 1     │ 3     │ 4     │ 15    │ -1     │
│ 768  │ 20    │ 1     │ 4     │ 3     │ 10    │ -1     │
│ 769  │ 20    │ 1     │ 5     │ 2     │ 11    │ -1     │
│ 770  │ 20    │ 1     │ 1     │ 7     │ 18    │ -1     │
│ 771  │ 20    │ 1     │ 2     │ 6     │ 17    │ -1     │
│ 772  │ 20    │ 1     │ 3     │ 5     │ 16    │ -1     │
│ 773  │ 20    │ 1     │ 4     │ 4     │ 9     │ -1     │
│ 774  │ 20   

│ 966  │ 25    │ 1     │ 2     │ 8     │ 5     │ -1     │
│ 967  │ 25    │ 1     │ 3     │ 7     │ 6     │ -1     │
│ 968  │ 25    │ 1     │ 4     │ 6     │ 7     │ -1     │
│ 969  │ 25    │ 1     │ 5     │ 5     │ 8     │ -1     │
│ 970  │ 25    │ 1     │ 1     │ 10    │ 3     │ -1     │
│ 971  │ 25    │ 1     │ 2     │ 9     │ 4     │ -1     │
│ 972  │ 25    │ 1     │ 3     │ 8     │ 5     │ -1     │
│ 973  │ 25    │ 1     │ 4     │ 7     │ 6     │ -1     │
│ 974  │ 25    │ 1     │ 5     │ 6     │ 7     │ -1     │
│ 975  │ 25    │ 1     │ 1     │ 11    │ 2     │ 10     │
│ 976  │ 26    │ 1     │ 1     │ 1     │ 12    │ -1     │
│ 977  │ 26    │ 1     │ 1     │ 2     │ 13    │ -1     │
│ 978  │ 26    │ 1     │ 2     │ 1     │ 12    │ -1     │
│ 979  │ 26    │ 1     │ 1     │ 3     │ 14    │ -1     │
│ 980  │ 26    │ 1     │ 2     │ 2     │ 11    │ -1     │
│ 981  │ 26    │ 1     │ 3     │ 1     │ 12    │ -1     │
│ 982  │ 26    │ 1     │ 1     │ 4     │ 15    │ -1     │
│ 983  │ 26   

│ 1175 │ 31    │ 1     │ 4     │ 1     │ 12    │ -1     │
│ 1176 │ 31    │ 1     │ 1     │ 5     │ 16    │ -1     │
│ 1177 │ 31    │ 1     │ 2     │ 4     │ 9     │ -1     │
│ 1178 │ 31    │ 1     │ 3     │ 3     │ 10    │ -1     │
│ 1179 │ 31    │ 1     │ 4     │ 2     │ 11    │ -1     │
│ 1180 │ 31    │ 1     │ 5     │ 1     │ 12    │ -1     │
│ 1181 │ 31    │ 1     │ 1     │ 6     │ 17    │ -1     │
│ 1182 │ 31    │ 1     │ 2     │ 5     │ 8     │ -1     │
│ 1183 │ 31    │ 1     │ 3     │ 4     │ 9     │ -1     │
│ 1184 │ 31    │ 1     │ 4     │ 3     │ 10    │ -1     │
│ 1185 │ 31    │ 1     │ 5     │ 2     │ 13    │ -1     │
│ 1186 │ 31    │ 1     │ 1     │ 7     │ 18    │ -1     │
│ 1187 │ 31    │ 1     │ 2     │ 6     │ 7     │ -1     │
│ 1188 │ 31    │ 1     │ 3     │ 5     │ 8     │ -1     │
│ 1189 │ 31    │ 1     │ 4     │ 4     │ 9     │ -1     │
│ 1190 │ 31    │ 1     │ 5     │ 3     │ 14    │ -1     │
│ 1191 │ 31    │ 1     │ 1     │ 8     │ 19    │ -1     │
│ 1192 │ 31   

│ 1384 │ 36    │ 1     │ 4     │ 4     │ 15    │ -1     │
│ 1385 │ 36    │ 1     │ 5     │ 3     │ 14    │ -1     │
│ 1386 │ 36    │ 1     │ 1     │ 8     │ 5     │ -1     │
│ 1387 │ 36    │ 1     │ 2     │ 7     │ 18    │ -1     │
│ 1388 │ 36    │ 1     │ 3     │ 6     │ 7     │ -1     │
│ 1389 │ 36    │ 1     │ 4     │ 5     │ 16    │ -1     │
│ 1390 │ 36    │ 1     │ 5     │ 4     │ 15    │ -1     │
│ 1391 │ 36    │ 1     │ 1     │ 9     │ 4     │ -1     │
│ 1392 │ 36    │ 1     │ 2     │ 8     │ 19    │ -1     │
│ 1393 │ 36    │ 1     │ 3     │ 7     │ 6     │ -1     │
│ 1394 │ 36    │ 1     │ 4     │ 6     │ 17    │ -1     │
│ 1395 │ 36    │ 1     │ 5     │ 5     │ 16    │ -1     │
│ 1396 │ 36    │ 1     │ 1     │ 10    │ 3     │ -1     │
│ 1397 │ 36    │ 1     │ 2     │ 9     │ 20    │ -1     │
│ 1398 │ 36    │ 1     │ 3     │ 8     │ 5     │ -1     │
│ 1399 │ 36    │ 1     │ 4     │ 7     │ 18    │ -1     │
│ 1400 │ 36    │ 1     │ 5     │ 6     │ 17    │ -1     │
│ 1401 │ 36   

│ 1595 │ 42    │ 1     │ 1     │ 3     │ 14    │ -1     │
│ 1596 │ 42    │ 1     │ 2     │ 2     │ 11    │ -1     │
│ 1597 │ 42    │ 1     │ 3     │ 1     │ 12    │ -1     │
│ 1598 │ 42    │ 1     │ 1     │ 4     │ 15    │ -1     │
│ 1599 │ 42    │ 1     │ 2     │ 3     │ 10    │ -1     │
│ 1600 │ 42    │ 1     │ 3     │ 2     │ 11    │ -1     │
│ 1601 │ 42    │ 1     │ 4     │ 1     │ 12    │ -1     │
│ 1602 │ 42    │ 1     │ 1     │ 5     │ 16    │ -1     │
│ 1603 │ 42    │ 1     │ 2     │ 4     │ 9     │ -1     │
│ 1604 │ 42    │ 1     │ 3     │ 3     │ 10    │ -1     │
│ 1605 │ 42    │ 1     │ 4     │ 2     │ 11    │ -1     │
│ 1606 │ 42    │ 1     │ 5     │ 1     │ 12    │ -1     │
│ 1607 │ 42    │ 1     │ 1     │ 6     │ 17    │ -1     │
│ 1608 │ 42    │ 1     │ 2     │ 5     │ 8     │ -1     │
│ 1609 │ 42    │ 1     │ 3     │ 4     │ 9     │ -1     │
│ 1610 │ 42    │ 1     │ 4     │ 3     │ 10    │ -1     │
│ 1611 │ 42    │ 1     │ 5     │ 2     │ 11    │ -1     │
│ 1612 │ 42   

│ 1792 │ 47    │ 1     │ 1     │ 5     │ 8     │ -1     │
│ 1793 │ 47    │ 1     │ 2     │ 4     │ 9     │ -1     │
│ 1794 │ 47    │ 1     │ 3     │ 3     │ 10    │ -1     │
│ 1795 │ 47    │ 1     │ 4     │ 2     │ 11    │ -1     │
│ 1796 │ 47    │ 1     │ 5     │ 1     │ 12    │ -1     │
│ 1797 │ 47    │ 1     │ 1     │ 6     │ 7     │ -1     │
│ 1798 │ 47    │ 1     │ 2     │ 5     │ 8     │ -1     │
│ 1799 │ 47    │ 1     │ 3     │ 4     │ 9     │ -1     │
│ 1800 │ 47    │ 1     │ 4     │ 3     │ 10    │ -1     │
│ 1801 │ 47    │ 1     │ 5     │ 2     │ 13    │ -1     │
│ 1802 │ 47    │ 1     │ 1     │ 7     │ 6     │ -1     │
│ 1803 │ 47    │ 1     │ 2     │ 6     │ 7     │ -1     │
│ 1804 │ 47    │ 1     │ 3     │ 5     │ 8     │ -1     │
│ 1805 │ 47    │ 1     │ 4     │ 4     │ 9     │ -1     │
│ 1806 │ 47    │ 1     │ 5     │ 3     │ 14    │ -1     │
│ 1807 │ 47    │ 1     │ 1     │ 8     │ 5     │ -1     │
│ 1808 │ 47    │ 1     │ 2     │ 7     │ 6     │ -1     │
│ 1809 │ 47   

│ 2006 │ 52    │ 1     │ 5     │ 4     │ 9     │ -1     │
│ 2007 │ 52    │ 1     │ 1     │ 9     │ 20    │ -1     │
│ 2008 │ 52    │ 1     │ 2     │ 8     │ 19    │ -1     │
│ 2009 │ 52    │ 1     │ 3     │ 7     │ 6     │ -1     │
│ 2010 │ 52    │ 1     │ 4     │ 6     │ 7     │ -1     │
│ 2011 │ 52    │ 1     │ 5     │ 5     │ 8     │ -1     │
│ 2012 │ 52    │ 1     │ 1     │ 10    │ 21    │ -10    │
│ 2013 │ 53    │ 1     │ 1     │ 1     │ 12    │ -1     │
│ 2014 │ 53    │ 1     │ 1     │ 2     │ 11    │ -1     │
│ 2015 │ 53    │ 1     │ 2     │ 1     │ 12    │ -1     │
│ 2016 │ 53    │ 1     │ 1     │ 3     │ 10    │ -1     │
│ 2017 │ 53    │ 1     │ 2     │ 2     │ 11    │ -1     │
│ 2018 │ 53    │ 1     │ 3     │ 1     │ 12    │ -1     │
│ 2019 │ 53    │ 1     │ 1     │ 4     │ 9     │ -1     │
│ 2020 │ 53    │ 1     │ 2     │ 3     │ 10    │ -1     │
│ 2021 │ 53    │ 1     │ 3     │ 2     │ 11    │ -1     │
│ 2022 │ 53    │ 1     │ 4     │ 1     │ 12    │ -1     │
│ 2023 │ 53   

Excessive output truncated after 524289 bytes.

     │
│ 2224 │ 58    │ 1     │ 2     │ 5     │ 16    │ -1     │
│ 2225 │ 58    │ 1     │ 3     │ 4     │ 15    │ -1     │
│ 2226 │ 58    │ 1     │ 4     │ 3     │ 14    │ -1     │
│ 2227 │ 58    │ 1     │ 5     │ 2     │ 13    │ -1     │
│ 2228 │ 58    │ 1     │ 1     │ 7     │ 6     │ -1     │
│ 2229 │ 58    │ 1     │ 2     │ 6     │ 17    │ -1     │
│ 2230 │ 58    │ 1     │ 3     │ 5     │ 16    │ -1     │
│ 2231 │ 58    │ 1     │ 4     │ 4     │ 15    │ -1     │
│ 2232 │ 58    │ 1     │ 5     │ 3     │ 14    │ -1     │
│ 2233 │ 58    │ 1     │ 1     │ 8     │ 5     │ -1     │
│ 2234 │ 58    │ 1     │ 2     │ 7     │ 18    │ -1     │
│ 2235 │ 58    │ 1     │ 3     │ 6     │ 17    │ -1     │
│ 2236 │ 58    │ 1     │ 4     │ 5     │ 16    │ -1     │
│ 2237 │ 58    │ 1     │ 5     │ 4     │ 15    │ -1     │
│ 2238 │ 58    │ 1     │ 1     │ 9     │ 4     │ -1     │
│ 2239 │ 58    │ 1     │ 2     │ 8     │ 19    │ -1     │
│ 2240 │ 58    │ 1     │ 3     │ 7     │ 18    │ -1     │
│ 2241 

Unnamed: 0_level_0,run,epoch,agent,time,state,reward
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64,Int64
1,1,1,1,1,12,-1
2,1,1,1,2,13,-1
3,1,1,2,1,12,-1
4,1,1,1,3,14,-1
5,1,1,2,2,13,-1
6,1,1,3,1,12,-1
7,1,1,1,4,15,-1
8,1,1,2,3,14,-1
9,1,1,3,2,13,-1
10,1,1,4,1,12,-1


In [40]:

get_average_regret(df)





500×4 DataFrame
│ Row │ run   │ epoch │ agent │ Regret  │
│     │ [90mInt64[39m │ [90mInt64[39m │ [90mInt64[39m │ [90mFloat64[39m │
├─────┼───────┼───────┼───────┼─────────┤
│ 1   │ 1     │ 1     │ 1     │ 29.0    │
│ 2   │ 1     │ 1     │ 2     │ 18.0    │
│ 3   │ 1     │ 1     │ 3     │ 17.0    │
│ 4   │ 1     │ 1     │ 4     │ 16.0    │
│ 5   │ 1     │ 1     │ 5     │ 15.0    │
│ 6   │ 2     │ 1     │ 1     │ 11.0    │
│ 7   │ 2     │ 1     │ 2     │ 19.0    │
│ 8   │ 2     │ 1     │ 3     │ 18.0    │
│ 9   │ 2     │ 1     │ 4     │ 17.0    │
│ 10  │ 2     │ 1     │ 5     │ 16.0    │
│ 11  │ 3     │ 1     │ 1     │ 11.0    │
│ 12  │ 3     │ 1     │ 2     │ 19.0    │
│ 13  │ 3     │ 1     │ 3     │ 18.0    │
│ 14  │ 3     │ 1     │ 4     │ 17.0    │
│ 15  │ 3     │ 1     │ 5     │ 16.0    │
│ 16  │ 4     │ 1     │ 1     │ 11.0    │
│ 17  │ 4     │ 1     │ 2     │ 19.0    │
│ 18  │ 4     │ 1     │ 3     │ 18.0    │
│ 19  │ 4     │ 1     │ 4     │ 17.0    │
│ 20  │ 4     │ 1   

│ 361 │ 73    │ 1     │ 1     │ 11.0    │
│ 362 │ 73    │ 1     │ 2     │ 19.0    │
│ 363 │ 73    │ 1     │ 3     │ 18.0    │
│ 364 │ 73    │ 1     │ 4     │ 17.0    │
│ 365 │ 73    │ 1     │ 5     │ 16.0    │
│ 366 │ 74    │ 1     │ 1     │ 11.0    │
│ 367 │ 74    │ 1     │ 2     │ 19.0    │
│ 368 │ 74    │ 1     │ 3     │ 18.0    │
│ 369 │ 74    │ 1     │ 4     │ 17.0    │
│ 370 │ 74    │ 1     │ 5     │ 16.0    │
│ 371 │ 75    │ 1     │ 1     │ 29.0    │
│ 372 │ 75    │ 1     │ 2     │ 18.0    │
│ 373 │ 75    │ 1     │ 3     │ 17.0    │
│ 374 │ 75    │ 1     │ 4     │ 16.0    │
│ 375 │ 75    │ 1     │ 5     │ 15.0    │
│ 376 │ 76    │ 1     │ 1     │ 11.0    │
│ 377 │ 76    │ 1     │ 2     │ 19.0    │
│ 378 │ 76    │ 1     │ 3     │ 18.0    │
│ 379 │ 76    │ 1     │ 4     │ 17.0    │
│ 380 │ 76    │ 1     │ 5     │ 16.0    │
│ 381 │ 77    │ 1     │ 1     │ 29.0    │
│ 382 │ 77    │ 1     │ 2     │ 18.0    │
│ 383 │ 77    │ 1     │ 3     │ 17.0    │
│ 384 │ 77    │ 1     │ 4     │ 16

17.572

In [41]:

num_states
R = (num_states - 2) / 2
max_theta_state = 2
for ag in 1:num_agents
    max_s = by(df, [:epoch, :agent], :state => maximum)
    print("$ag max: $max_s")
end

1 max: 5×3 DataFrame
│ Row │ epoch │ agent │ state_maximum │
│     │ Int64 │ Int64 │ Int64         │
├─────┼───────┼───────┼───────────────┤
│ 1   │ 1     │ 1     │ 21            │
│ 2   │ 1     │ 2     │ 20            │
│ 3   │ 1     │ 3     │ 19            │
│ 4   │ 1     │ 4     │ 18            │
│ 5   │ 1     │ 5     │ 17            │2 max: 5×3 DataFrame
│ Row │ epoch │ agent │ state_maximum │
│     │ Int64 │ Int64 │ Int64         │
├─────┼───────┼───────┼───────────────┤
│ 1   │ 1     │ 1     │ 21            │
│ 2   │ 1     │ 2     │ 20            │
│ 3   │ 1     │ 3     │ 19            │
│ 4   │ 1     │ 4     │ 18            │
│ 5   │ 1     │ 5     │ 17            │3 max: 5×3 DataFrame
│ Row │ epoch │ agent │ state_maximum │
│     │ Int64 │ Int64 │ Int64         │
├─────┼───────┼───────┼───────────────┤
│ 1   │ 1     │ 1     │ 21            │
│ 2   │ 1     │ 2     │ 20            │
│ 3   │ 1     │ 3     │ 19            │
│ 4   │ 1     │ 4     │ 18            │
│ 5   │ 1     │ 5  