In [2]:
# activate project environment
# include these lines of code in any future scripts/notebooks
#---
import Pkg
if !haskey(Pkg.installed(), "AA228FinalProject")
    jenv = joinpath(dirname(@__FILE__()), ".") # this assumes the notebook is in the same dir
    # as the Project.toml file, which should be in top level dir of the project. 
    # Change accordingly if this is not the case.
    Pkg.activate(jenv)
end
Pkg.instantiate()
Pkg.build("Cairo")
#---

[32m[1m  Updating[22m[39m registry at `~/.julia/registries/General`
[32m[1m  Updating[22m[39m git-repo `https://github.com/JuliaRegistries/General.git`
[?25l[2K[?25h[32m[1m  Updating[22m[39m registry at `~/.julia/registries/JuliaPOMDP`
[32m[1m  Updating[22m[39m git-repo `https://github.com/JuliaPOMDP/Registry`
[?25l[2K[?25h[32m[1m  Building[22m[39m LibCURL ─→ `~/.julia/packages/LibCURL/OoXMv/deps/build.log`
[32m[1m  Building[22m[39m WinRPM ──→ `~/.julia/packages/WinRPM/Y9QdZ/deps/build.log`
[32m[1m  Building[22m[39m Homebrew → `~/.julia/packages/Homebrew/l8kUw/deps/build.log`
[32m[1m  Building[22m[39m Cairo ───→ `~/.julia/packages/Cairo/CXPG1/deps/build.log`


In [70]:
# import necessary packages
using AA228FinalProject
using TabularTDLearning
using POMDPs
using MCTS
using ARDESPOT
using POMCPOW
using POMDPModels
using POMDPPolicies
using BasicPOMCP
using POMDPPolicies
using BeliefUpdaters
using ParticleFilters
using POMDPSimulators
using Cairo
using Gtk
using Random
using Printf

┌ Info: Precompiling ARDESPOT [d96c9ae4-3372-47d5-8a88-316ae77be8cf]
└ @ Base loading.jl:1192
│ This may mean CPUTime [a9c8d775-2e2e-55fc-8582-045d282d599e] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:947


In [63]:
sensor = Bumper()
config = 3 # 1,2, or 3

3

In [71]:
ds = DiscreteRoombaStateSpace(50, 50, 50)
v_steps = range(0.0, stop = 10.0, length = 10) 
om_steps = range(-1.0 * pi + 0.01, stop = 1.0 * pi, length = 200) 

# discrete states space, discrete action space
m_mcts = RoombaPOMDP(sensor=sensor, mdp=RoombaMDP(sspace=ds, aspace=[RoombaAct(v,om) for v in v_steps for om in om_steps], config=config))

# Cont states space, cont action space
m_pomcpow = RoombaPOMDP(sensor=sensor, mdp=RoombaMDP(config=config))
                                    
# Cont states space, discrete action space
m_pomcp = RoombaPOMDP(sensor=sensor, mdp=RoombaMDP(aspace=[RoombaAct(v,om) for v in v_steps for om in om_steps], config=config))

RoombaPOMDP{Bumper,Bool}(Bumper(), RoombaMDP{ContinuousRoombaStateSpace,Array{RoombaAct,1}}
  v_max: Float64 10.0
  om_max: Float64 1.0
  dt: Float64 0.5
  contact_pen: Float64 -1.0
  time_pen: Float64 -0.1
  goal_reward: Float64 10.0
  stairs_penalty: Float64 -10.0
  config: Int64 3
  room: AA228FinalProject.Room
  sspace: ContinuousRoombaStateSpace ContinuousRoombaStateSpace()
  aspace: Array{RoombaAct}((2000,))
  _amap: Dict{RoombaAct,Int64}
)

In [72]:
num_particles = 2000
resampler = BumperResampler(num_particles)

spf = SimpleParticleFilter(m, resampler)

v_noise_coefficient = 2.0
om_noise_coefficient = 0.5

belief_updater = RoombaParticleFilter(spf, v_noise_coefficient, om_noise_coefficient);

# POMCP SOLVER
pomcp_solver = POMCPSolver()
pomcp_policy = solve(solver, m_pomcp);

# POMCPOW SOLVER
pomcpow_solver = POMCPOWSolver(criterion=MaxUCB(20.0))
pomcpow_policy = solve(solver, m_pomcpow)

# MCTS SOLVER
mcts_solver = MCTSSolver(n_iterations=50, depth=10, exploration_constant=5.0) # initializes the Solver type
mcts_policy = solve(solver, m_mcts)

# ARDESPOT SOLVER
ardespot_solver = DESPOTSolver(bounds=(DefaultPolicyLB(RandomSolver()), 0.0))
ardespot_policy = solve(solver, m_pomcp)

POMCPOWPlanner{RoombaPOMDP{Bumper,Bool},POMCPOW.POWNodeFilter,MaxUCB,RandomActionGenerator{MersenneTwister},BasicPOMCP.SolvedPORollout{RandomPolicy{MersenneTwister,RoombaPOMDP{Bumper,Bool},NothingUpdater},NothingUpdater,MersenneTwister},Int64,Float64,POMCPOWSolver}(POMCPOWSolver
  eps: Float64 0.01
  max_depth: Int64 9223372036854775807
  criterion: MaxUCB
  final_criterion: MaxQ MaxQ()
  tree_queries: Int64 1000
  max_time: Float64 Inf
  rng: MersenneTwister
  node_sr_belief_updater: POMCPOW.POWNodeFilter POMCPOW.POWNodeFilter()
  estimate_value: RolloutEstimator
  enable_action_pw: Bool true
  check_repeat_obs: Bool true
  check_repeat_act: Bool true
  tree_in_info: Bool false
  alpha_observation: Float64 0.5
  k_observation: Float64 10.0
  alpha_action: Float64 0.5
  k_action: Float64 10.0
  init_V: Float64 0.0
  init_N: Int64 0
  next_action: RandomActionGenerator{MersenneTwister}
  default_action: ExceptionRethrow ExceptionRethrow()
, RoombaPOMDP{Bumper,Bool}(Bumper(), RoombaMDP{C

In [74]:
# first seed the environment
Random.seed!(5)

# reset the policy
policy_in_use = ardespot_policy

# run the simulation
c = @GtkCanvas()
win = GtkWindow(c, "Roomba Environment", 600, 600)
for (t, step) in enumerate(stepthrough(m, policy_in_use, belief_updater, max_steps=100))
    @guarded draw(c) do widget
        
        # the following lines render the room, the particles, and the roomba
        ctx = getgc(c)
        set_source_rgb(ctx,1,1,1)
        paint(ctx)
        render(ctx, m, step)
        
        # render some information that can help with debugging
        # here, we render the time-step, the state, and the observation
        move_to(ctx,300,400)
        show_text(ctx, @sprintf("t=%d, state=%s, o=%.3f",t,string(step.s),step.o))
    end
    show(c)
    sleep(0.1) # to slow down the simulation
end