In [215]:
using POMDPs
using Random # for AbstractRNG
using POMDPModelTools

In [216]:
struct ChainMDP <: MDP{Int, Symbol}
    len::Int
    p_success::Float64
    discount::Float64
end


In [217]:
function POMDPs.generate_s(p::ChainMDP, s::Int, a::Int, rng::AbstractRNG)
    if a == 2
          success = min(s+1, p.len)
        failure = max(s-1, 1)
    else # a == :left
        success = max(s-1, 1)
        failure = min(s+1, p.len)
    end
    if a == 2 && s + 2 == p.len
        return p.len - 1
    elseif a == 1 && s == 2
        return 1
    end
    return rand(rng) < p.p_success ? success : failure
end


In [218]:
function POMDPs.reward(p::ChainMDP, s::Int, a::Int)
    if s == 2
        return -10
    end
    if s + 2 == p.len
        return 10
    end
    return 0
end

In [219]:
POMDPs.initialstate_distribution(m::ChainMDP) = Deterministic(4)

In [220]:
function POMDPs.isterminal(p::ChainMDP, s::Int)
    if s == 1
        return true
    end
    if s+1 == p.len
        return true
    end
    return false
end        

In [221]:
using POMDPSimulators
using POMDPPolicies

ChainMDP() = ChainMDP(6+2,.9,.9)
m = ChainMDP()

# policy that maps every input to a right action
policy = FunctionPolicy(s->2)

for (s, a, r) in stepthrough(m, policy, "s,a,r", max_steps=10)
    @show s
    @show a
    @show r
    render(m, (s,a,r))
    println()
end


s = 4
a = 2
r = 0

s = 5
a = 2
r = 0

s = 6
a = 2
r = 10



In [222]:
using POMDPSimulators
using POMDPPolicies

ChainMDP() = ChainMDP(6+2,.9,.9)
m = ChainMDP()

# policy that maps every input to a left action
policy = FunctionPolicy(s->1)

for (s, a, r) in stepthrough(m, policy, "s,a,r", max_steps=10)
    # @show s
    # @show a
    # @show r
    render(m, (s,a,r))
    println("s,a,r:($s,$a,$r)")
end


s,a,r:(4,1,0)
s,a,r:(3,1,0)
s,a,r:(2,1,-10)


In [226]:



n_agents = 5
agents = Any[]
n_states = 10
# setup agents

# policy that maps every input to a right action
policy = FunctionPolicy(s->2)
for i in 1:n_agents
    m = ChainMDP(n_states+2,.9,.9)
    POMDPs.initialstate_distribution(m::ChainMDP) = Deterministic(Int64((n_states+2)/2))
    push!(agents, Iterators.Stateful(stepthrough(m, policy, "s,a,r", max_steps=10)))
end

done = false
while ! done
   done = true
   for i in 1:n_agents
        if isempty(agents[i])
            println("agent $i is done")
            continue
        end
        res = popfirst!(agents[i])
        println("print agent $i result: $res")
        
   end
    
   for i in 1:n_agents
        if ! isempty(agents[i])
            done = false
        end
    end
end

print agent 1 result: (s = 6, a = 2, r = 0)
print agent 2 result: (s = 6, a = 2, r = 0)
print agent 3 result: (s = 6, a = 2, r = 0)
print agent 4 result: (s = 6, a = 2, r = 0)
print agent 5 result: (s = 6, a = 2, r = 0)
print agent 1 result: (s = 7, a = 2, r = 0)
print agent 2 result: (s = 7, a = 2, r = 0)
print agent 3 result: (s = 7, a = 2, r = 0)
print agent 4 result: (s = 7, a = 2, r = 0)
print agent 5 result: (s = 7, a = 2, r = 0)
print agent 1 result: (s = 8, a = 2, r = 0)
print agent 2 result: (s = 8, a = 2, r = 0)
print agent 3 result: (s = 8, a = 2, r = 0)
print agent 4 result: (s = 8, a = 2, r = 0)
print agent 5 result: (s = 6, a = 2, r = 0)
print agent 1 result: (s = 7, a = 2, r = 0)
print agent 2 result: (s = 9, a = 2, r = 0)
print agent 3 result: (s = 7, a = 2, r = 0)
print agent 4 result: (s = 9, a = 2, r = 0)
print agent 5 result: (s = 7, a = 2, r = 0)
print agent 1 result: (s = 8, a = 2, r = 0)
print agent 2 result: (s = 10, a = 2, r = 10)
print agent 3 result: (s = 8, 

In [198]:
?MDPSimIterator

search:

Couldn't find [36mMDPSimIterator[39m
Perhaps you meant iterator


No documentation found.

Binding `MDPSimIterator` does not exist.
