In [1]:
# activate project environment
# include these lines of code in any future scripts/notebooks
#---
import Pkg
if !haskey(Pkg.installed(), "AA228FinalProject")
    jenv = joinpath(dirname(@__FILE__()), ".") # this assumes the notebook is in the same dir
    # as the Project.toml file, which should be in top level dir of the project. 
    # Change accordingly if this is not the case.
    Pkg.activate(jenv)
end
Pkg.instantiate()
Pkg.build("Cairo")
#---

[32m[1m  Updating[22m[39m registry at `~/.julia/registries/General`
[32m[1m  Updating[22m[39m git-repo `https://github.com/JuliaRegistries/General.git`
[?25l[2K[?25h[32m[1m  Updating[22m[39m registry at `~/.julia/registries/JuliaPOMDP`
[32m[1m  Updating[22m[39m git-repo `https://github.com/JuliaPOMDP/Registry`
[?25l[2K[?25h[32m[1m  Building[22m[39m LibCURL ─→ `~/.julia/packages/LibCURL/OoXMv/deps/build.log`
[32m[1m  Building[22m[39m WinRPM ──→ `~/.julia/packages/WinRPM/Y9QdZ/deps/build.log`
[32m[1m  Building[22m[39m Homebrew → `~/.julia/packages/Homebrew/l8kUw/deps/build.log`
[32m[1m  Building[22m[39m Cairo ───→ `~/.julia/packages/Cairo/CXPG1/deps/build.log`


In [3]:
# import necessary packages
using AA228FinalProject
using TabularTDLearning
using POMDPs
using MCTS
using ARDESPOT
using POMCPOW
using POMDPModels
using POMDPPolicies
using BasicPOMCP
using POMDPPolicies
using BeliefUpdaters
using ParticleFilters
using POMDPSimulators
using Cairo
using Gtk
using Random
using Printf

In [105]:
sensor = Bumper()
config = 1 # 1,2, or 3

1

In [106]:
ds = DiscreteRoombaStateSpace(50, 50, 50)
v_steps = range(0.0, stop = 10.0, length = 10) 
om_steps = range(-1.0 * pi + 0.01, stop = 1.0 * pi, length = 200) 

# discrete states space, discrete action space
m_mcts = RoombaPOMDP(sensor=sensor, mdp=RoombaMDP(sspace=ds, aspace=[RoombaAct(v,om) for v in v_steps for om in om_steps], config=config));

# Cont states space, cont action space
m_pomcpow = RoombaPOMDP(sensor=sensor, mdp=RoombaMDP(config=config));
                                    
# Cont states space, discrete action space
m_pomcp = RoombaPOMDP(sensor=sensor, mdp=RoombaMDP(aspace=[RoombaAct(v,om) for v in v_steps for om in om_steps], config=config));

In [112]:
num_particles = 5000
resampler = BumperResampler(num_particles)

spf_mcts = SimpleParticleFilter(m_mcts, resampler)
spf_pomcpow = SimpleParticleFilter(m_pomcpow, resampler)
spf_pomcp = SimpleParticleFilter(m_pomcp, resampler)
spf_ardespot = SimpleParticleFilter(m_pomcp, resampler)


v_noise_coefficient = 2.0
om_noise_coefficient = 0.5

belief_updater_mcts = RoombaParticleFilter(spf_mcts, v_noise_coefficient, om_noise_coefficient);
belief_updater_pomcp = RoombaParticleFilter(spf_pomcp, v_noise_coefficient, om_noise_coefficient);
belief_updater_pomcpow = RoombaParticleFilter(spf_pomcpow, v_noise_coefficient, om_noise_coefficient);
belief_updater_ardespot = RoombaParticleFilter(spf_ardespot, v_noise_coefficient, om_noise_coefficient);


# POMCP SOLVER
pomcp_solver = POMCPSolver()
pomcp_policy = solve(pomcp_solver, m_pomcp);

# POMCPOW SOLVER
pomcpow_solver = POMCPOWSolver(criterion=MaxUCB(20.0))
pomcpow_policy = solve(pomcpow_solver, m_pomcpow);

# MCTS SOLVER
mcts_solver = MCTSSolver(n_iterations=50, depth=10, exploration_constant=5.0) # initializes the Solver type
mcts_policy = solve(mcts_solver, m_mcts);

# ARDESPOT SOLVER
ardespot_solver = DESPOTSolver(bounds=(-20.0, 0.0))
# ardespot_solver = DESPOTSolver(bounds=(DefaultPolicyLB(RandomSolver()), 0.0))
ardespot_policy = solve(ardespot_solver, m_pomcp);

In [114]:
# first seed the environment
Random.seed!(5)

# reset the policy
# p = ToEnd(0) # here, the argument sets the time-steps elapsed to 0

# run the simulation
c = @GtkCanvas()
win = GtkWindow(c, "Roomba Environment", 600, 600)
m = mcts_solver
policy = mcts_policy
belief_updater = belief_updater_ardespot
for (t, step) in enumerate(stepthrough(m, policy, belief_updater, max_steps=100))
    @guarded draw(c) do widget
        
        # the following lines render the room, the particles, and the roomba
        ctx = getgc(c)
        set_source_rgb(ctx,1,1,1)
        paint(ctx)
        render(ctx, m, step)
        
        # render some information that can help with debugging
        # here, we render the time-step, the state, and the observation
        move_to(ctx,300,400)
        show_text(ctx, @sprintf("t=%d, state=%s, o=%.3f",t,string(step.s),step.o))
    end
    show(c)
    sleep(0.1) # to slow down the simulation
end

MethodError: MethodError: no method matching stepthrough(::MCTSSolver, ::MCTSPlanner{RoombaPOMDP{Bumper,Bool},RoombaState,RoombaAct,MCTS.SolvedRolloutEstimator{RandomPolicy{MersenneTwister,RoombaPOMDP{Bumper,Bool},NothingUpdater},MersenneTwister},MersenneTwister}, ::RoombaParticleFilter; max_steps=100)
Closest candidates are:
  stepthrough(!Matched::MDP{S,A} where A, ::Policy, ::S) where S at /Users/sarahradz/.julia/packages/POMDPSimulators/xyfJM/src/stepthrough.jl:201 got unsupported keyword argument "max_steps"
  stepthrough(!Matched::MDP{S,A} where A, ::Policy, ::S, !Matched::Union{String, Symbol, Tuple}; kwargs...) where S at /Users/sarahradz/.julia/packages/POMDPSimulators/xyfJM/src/stepthrough.jl:201
  stepthrough(!Matched::POMDP, ::Policy, ::Any...; kwargs...) at /Users/sarahradz/.julia/packages/POMDPSimulators/xyfJM/src/stepthrough.jl:211
  ...

In [109]:
using Statistics

v_discrete_mean = []
v_discrete_std = []
num_exp = 10
for l = 10:10:100
    total_rewards = []
    v_steps = range(0.0, stop = 10.0, length = l) 
    om_steps = range(-1.0 * pi + 0.01, stop = 1.0 * pi, length = 100) 
    for exp = 1:num_exp
        println(string(exp))

        Random.seed!(exp)
        
        # Cont states space, discrete action space
        m_pomcp = RoombaPOMDP(sensor=sensor, mdp=RoombaMDP(aspace=[RoombaAct(v,om) for v in v_steps for om in om_steps], config=config));
        pomcp_solver = POMCPSolver()
        pomcp_policy = solve(pomcp_solver, m_pomcp);
                            
        m = m_pomcp
        p = pomcp_policy
        belief_updater = belief_updater_pomcp
        traj_rewards = sum([step.r for step in stepthrough(m,p,belief_updater, max_steps=100)])
        push!(total_rewards, traj_rewards)
    end

    push!(v_discrete_mean, mean(total_rewards))
    push!(v_discrete_std, std(total_rewards)/sqrt(num_exp))                        
end

1
1


InterruptException: InterruptException:

In [30]:
using Plots
plot(10:10:100, v_discrete_mean, yerror = v_discrete_std, xlabel="Discretized Velocity Steps", ylabel="Reward")

savefig("discrete_v")

In [24]:
v_discrete_mean

10-element Array{Any,1}:
  -5.379999999999999 
  -3.4200000000000004
   1.5000000000000007
  -6.319999999999999 
  -2.6599999999999993
 -10.040000000000001 
  -9.1               
   1.7599999999999998
  -1.8               
  -2.8799999999999986

In [110]:
using Statistics


total_steps = []
total_rewards = []
num_exp = 10
println(string(config))
for exp = 1:num_exp
    println(string(exp))

    Random.seed!(exp)

    
    # Cont states space, discrete action space
    """
    m_pomcp = RoombaPOMDP(sensor=sensor, mdp=RoombaMDP(aspace=[RoombaAct(v,om) for v in v_steps for om in om_steps], config=config));
    pomcp_solver = POMCPSolver()
    pomcp_policy = solve(pomcp_solver, m_pomcp);

    m = m_pomcp
    p = pomcp_policy
    belief_updater = belief_updater_pomcp
    """
    
    
                        
    num_steps = 0
    traj_rewards = 0
    for (t, step) in enumerate(stepthrough(m, p, belief_updater, max_steps=100))
        traj_rewards += step.r 
        num_steps = t
    end        
    println("num steps: ", string(num_steps), ", traj rewards: ",  string(traj_rewards))
    push!(total_steps, num_steps)
    push!(total_rewards, traj_rewards)
end


@printf(" ")
@printf("Mean Total Reward: %.3f, StdErr Total Reward: %.3f", mean(total_rewards), std(total_rewards)/sqrt(num_exp))

1
1
num steps: 30, traj rewards: 2.0000000000000053
2
num steps: 22, traj rewards: 3.799999999999999
3
num steps: 35, traj rewards: 1.500000000000007
4
num steps: 13, traj rewards: 6.699999999999999
5
num steps: 27, traj rewards: 3.3000000000000007
6
num steps: 29, traj rewards: 4.100000000000002
7
num steps: 36, traj rewards: 2.4000000000000075
8
num steps: 16, traj rewards: 6.3999999999999995
9
num steps: 36, traj rewards: 1.4000000000000075
10
num steps: 37, traj rewards: 1.3000000000000078
 Mean Total Reward: 3.290, StdErr Total Reward: 0.627

In [111]:
ave_steps = sum(total_steps) / num_exp


28.1