In [2]:
# activate project environment
# include these lines of code in any future scripts/notebooks
#---
import Pkg
if !haskey(Pkg.installed(), "AA228FinalProject")
    jenv = joinpath(dirname(@__FILE__()), ".") # this assumes the notebook is in the same dir
    # as the Project.toml file, which should be in top level dir of the project. 
    # Change accordingly if this is not the case.
    Pkg.activate(jenv)
end
Pkg.instantiate()
Pkg.build("Cairo")
#---

[32m[1m  Updating[22m[39m registry at `~/.julia/registries/General`
[32m[1m  Updating[22m[39m git-repo `https://github.com/JuliaRegistries/General.git`
[?25l[2K[?25h[32m[1m  Updating[22m[39m registry at `~/.julia/registries/JuliaPOMDP`
[32m[1m  Updating[22m[39m git-repo `https://github.com/JuliaPOMDP/Registry`
[?25l[2K[?25h[32m[1m  Building[22m[39m LibCURL ─→ `~/.julia/packages/LibCURL/OoXMv/deps/build.log`
[32m[1m  Building[22m[39m WinRPM ──→ `~/.julia/packages/WinRPM/Y9QdZ/deps/build.log`
[32m[1m  Building[22m[39m Homebrew → `~/.julia/packages/Homebrew/l8kUw/deps/build.log`
[32m[1m  Building[22m[39m Cairo ───→ `~/.julia/packages/Cairo/CXPG1/deps/build.log`


In [3]:
# import necessary packages
using AA228FinalProject
using TabularTDLearning
using POMDPs
using POMDPModels
using POMDPPolicies
using BasicPOMCP
using POMDPPolicies
using BeliefUpdaters
using ParticleFilters
using POMDPSimulators
using Cairo
using Gtk
using Random
using Printf

┌ Info: Recompiling stale cache file /Users/sarahradz/.julia/compiled/v1.0/POMDPModels/GHWgR.ji for POMDPModels [355abbd5-f08e-5560-ac9e-8b5f2592a0ca]
└ @ Base loading.jl:1190
┌ Info: Precompiling BasicPOMCP [d721219e-3fc6-5570-a8ef-e5402f47c49e]
└ @ Base loading.jl:1192
│ This may mean POMDPSimulators [e0d0a172-29c6-5d4e-96d0-f262df5d01fd] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:947
┌ Info: Precompiling CPUTime [a9c8d775-2e2e-55fc-8582-045d282d599e]
└ @ Base loading.jl:1192
│ This may mean Compat [34da2185-b29b-5c13-b0c7-acf172513d20] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:947
┌ Info: Precompiling MCTS [e12ccd36-dcad-5f33-8774-9175229e7b33]
└ @ Base loading.jl:1192
│ This may mean POMDPSimulators [e0d0a172-29c6-5d4e-96d0-f262df5d01fd] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:947
┌ Info: Precompiling D3Trees [e3df1716-f71e-5df9-9e2d-98

In [4]:
sensor = Bumper()
config = 3 # 1,2, or 3

3

In [5]:
ds = ContinuousRoombaStateSpace#DiscreteRoombaStateSpace(50, 50, 50)
v_steps = range(0.0, stop = 10.0, length = 10) 
om_steps = range(0.0, stop = 1.0, length = 10) 
as = AA228FinalProject.gen_amap([RoombaAct(v,om) for v in v_steps for om in om_steps])

m = RoombaPOMDP(sensor=sensor, mdp=RoombaMDP(sspace=ds, aspace=[RoombaAct(v,om) for v in v_steps for om in om_steps], config=config))

RoombaPOMDP{Bumper,Bool}(Bumper(), RoombaMDP{DataType,Array{RoombaAct,1}}
  v_max: Float64 10.0
  om_max: Float64 1.0
  dt: Float64 0.5
  contact_pen: Float64 -1.0
  time_pen: Float64 -0.1
  goal_reward: Float64 10.0
  stairs_penalty: Float64 -10.0
  config: Int64 3
  room: AA228FinalProject.Room
  sspace: ContinuousRoombaStateSpace <: Any
  aspace: Array{RoombaAct}((100,))
  _amap: Dict{RoombaAct,Int64}
)

In [12]:
num_particles = 2000
resampler = BumperResampler(num_particles)

spf = SimpleParticleFilter(m, resampler)

v_noise_coefficient = 2.0
om_noise_coefficient = 0.5

belief_updater = RoombaParticleFilter(spf, v_noise_coefficient, om_noise_coefficient);
solver = POMCPSolver()
planner = solve(solver, m);


In [13]:
# Define the policy to test
mutable struct ToEnd <: Policy
    ts::Int64 # to track the current time-step.
end

# extract goal for heuristic controller
goal_xy = get_goal_xy(m)


# define a new function that takes in the policy struct and current belief,
# and returns the desired action
function POMDPs.action(p::ToEnd, b::ParticleCollection{RoombaState})

    # for 50 steps, act randomly and quickly to learn surrounding info
    if p.ts < 50
        p.ts += 1
        return RoombaAct(5, rand()) 
    end
    p.ts += 1
    
    # after 50 steps of random learning, we follow a proportional controller 
    # to navigate directly to the goal, using the mean belief state
    
    # compute mean belief of a subset of particles
    s = mean(b)
    goal_x, goal_y = goal_xy
    x,y,th = s[1:3]
    ang_to_goal = atan(goal_y - y, goal_x - x)
    del_angle = wrap_to_pi(ang_to_goal - th)
    
    # apply proportional control to compute the turn-rate
    Kprop = 1.0
    om = Kprop * del_angle
    
    # always travel at some fixed velocity
    v = 5.0
    
    return RoombaAct(v, om)
end

In [None]:
# first seed the environment
Random.seed!(5)

# reset the policy
p = ToEnd(0) # here, the argument sets the time-steps elapsed to 0

# run the simulation
c = @GtkCanvas()
win = GtkWindow(c, "Roomba Environment", 600, 600)
for (t, step) in enumerate(stepthrough(m, p, belief_updater, max_steps=100))
    @guarded draw(c) do widget
        
        # the following lines render the room, the particles, and the roomba
        ctx = getgc(c)
        set_source_rgb(ctx,1,1,1)
        paint(ctx)
        render(ctx, m, step)
        
        # render some information that can help with debugging
        # here, we render the time-step, the state, and the observation
        move_to(ctx,300,400)
        show_text(ctx, @sprintf("t=%d, state=%s, o=%.3f",t,string(step.s),step.o))
    end
    show(c)
    sleep(0.1) # to slow down the simulation
end

#solver = QLearningSolver(m, learning_rate=0.1, n_episodes=5000, max_episode_length=50, eval_every=50, n_eval_traj=100)
#solver = FIBSolver()
solver = QMDPSolver(max_iterations=20, tolerance=1e-3) 
#fib_policy = solve(solver, m)
#rand_policy = RandomPolicy(m);
policy = solve(solver, m) # compute a pomdp policy

#policy = create_policy(solver, m)