In [1]:
using Plots
using ParticleFilters
using Distributions
using StaticArrays
using LinearAlgebra
using Random
using StatsBase
using Reel
using POMDPs
using POMDPSimulators
using POMDPPolicies
using POMDPModelTools
using GridInterpolations
using RLInterface

In [284]:
include("atan2.jl")
include("obs.jl")
include("polargrid.jl")
rng = MersenneTwister(2);


In [285]:
# random walk

POS_0 = [6.0, 60.0]
CRS_0 = 90 # target's course
HDG_0 = 90 # o/s heading
SPD_0 = 1 # 1 or 2
TGT_SPD = 1

#input is course in degrees and rng
#returns next course in degrees
function next_crs(crs,rng)
    if rand(rng) < .9
        return crs
    end
    crs = (crs + rand(rng,[-1,1])*30) % 360
    if crs < 0 crs += 360 end
    return crs
end

# state as tuple (x, y, crs, hdg) of target (hdg of o/s)
function f(state, control, rng)
    r, θ, crs, hdg, spd = state
    x = r*cos(π/180*θ)
    y = r*sin(π/180*θ)
    pos = [x + TGT_SPD*cos(π/180*crs) - spd*cos(π/180*hdg), y + 
        TGT_SPD*sin(π/180*crs) - spd*sin(π/180*hdg)]
    crs = next_crs(crs,rng)
    hdg = hdg + control[1]
    hdg = hdg % 360
    if hdg < 0
        hdg += 360
    end
    spd = control[2]
    r = sqrt(pos[1]^2 + pos[2]^2)
    θ = atan2(pos[1],pos[2])*180/π
    #if trunc(Int, atan2(pos[1],pos[2])*180/π) == 0
    #    @show pos
    #end
    if θ < 0 θ += 360 end
    return (r, θ, crs, hdg, spd)::NTuple{5, Real}
end


In [286]:
function r(s::NTuple{5,Real})
    range = s[1]
    if range > 150 return -1 end  # reward to not lose track of contact
    if range <= 5 return -10000 end  # collision avoidance
    return 2  # being in "sweet spot" maximizes reward
end

r (generic function with 1 method)

In [5]:
# implement POMDP here

mutable struct targetPOMDP <: POMDP{NTuple{5,Real}, Tuple{Float64}, Int64}
    p_walk::Float64
    discount::Float64
end
angles = [0, 30, 60, 90, 120, 150, 210, 240, 270, 300, 330]
#statespace = [(10.0*r,θ*30.0, crs*30.0, hdg*30.0, spd) for r in 0:30, θ in 0:11, crs in 1:12, hdg in 1:12, spd in 1:2]
statespace = thestates
actionspace = ((30,1), (0,1), (-30,1), (30, 2), (0,2), (-30,2))
action_index(a) = trunc(Int, 2*(a[1]/30+1) + a[2])
actions_ = ((-30,1), (-30, 2), (0, 1), (0, 2), (30, 1), (30, 2))

targetPOMDP() = targetPOMDP(0.9, 0.9)
POMDPs.actions(::targetPOMDP) = ((-30,1), (-30, 2), (0, 1), (0, 2), (30, 1), (30, 2))
POMDPs.actionindex(::targetPOMDP, a) = trunc(Int, 2*(a[1]/30+1) + a[2])
POMDPs.states(::targetPOMDP) = statespace
POMDPs.stateindex(::targetPOMDP, s::NTuple{5,Real}) = LinearIndices(statespace)[s[1]/10, s[2]/30, s[3]/30, s[4]/30, s[5]]
#POMDPs.stateindex(::targetPOMDP, s::NTuple{5,Int64}) = LinearIndices(statespace)[round(Int,s[1]/10)+1, round(Int,s[2]/30)+1,
#    (round(Int,s[3]/30)+1, round(Int,s[4]/30)+1, round(Int,s[5])+1]
POMDPs.observations(::targetPOMDP) = (0, 1, 2, 3)
POMDPs.obsindex(::targetPOMDP, o::Int64) = o + 1
POMDPs.initialobs(::targetPOMDP, s::NTuple{5,Real}, rng::AbstractRNG) = h(s,rng)
POMDPs.initialstate_distribution(::targetPOMDP) = ParticleCollection([(5, 60, 90, 90, 1) for i in 1:N])
POMDPs.initialstate(::targetPOMDP, rng::AbstractRNG) = (rand(rng,1:10), rand(rng, angles),
    rand(rng, angles), rand(rng, angles), rand(rng,1:2))
POMDPs.isterminal(::targetPOMDP, s) = s[1] >= 300
POMDPs.discount(::targetPOMDP) = 0.9

function POMDPs.gen(m::targetPOMDP, s, a, rng)
    return (sp=f(s,a,rng), r=r(s), o=h(s,rng))
end

In [251]:
N = 1000
pomdp = targetPOMDP(0.9, 0.9)
#initialstate_distribution(pomdp) = ParticleCollection([[5, 60, 90, 90, 1] for i in 1:N])


updater = SIRParticleFilter(pomdp, N);

function f2(x, u, rng)
    temp = [i for i in f(x, u, rng)]
    if temp[1] > 500
        temp[1] = 5
    end
    return temp
end


model = ParticleFilterModel{Vector{Float64}}(f2, g)
pfilter = SIRParticleFilter(model, N);


In [275]:

#θ = zeros(length(grid),6);
θ = [r(Tuple(ind2x(grid, j))) for j in 1:length(grid), i in 1:6]                    

112320×6 Array{Int64,2}:
 -10000  -10000  -10000  -10000  -10000  -10000
      1       1       1       1       1       1
      1       1       1       1       1       1
      1       1       1       1       1       1
      1       1       1       1       1       1
      1       1       1       1       1       1
      1       1       1       1       1       1
      1       1       1       1       1       1
      1       1       1       1       1       1
      1       1       1       1       1       1
     -1      -1      -1      -1      -1      -1
     -1      -1      -1      -1      -1      -1
     -1      -1      -1      -1      -1      -1
      ⋮                                       ⋮
     -1      -1      -1      -1      -1      -1
     -1      -1      -1      -1      -1      -1
     -1      -1      -1      -1      -1      -1
     -1      -1      -1      -1      -1      -1
     -1      -1      -1      -1      -1      -1
     -1      -1      -1      -1      -1      -1
     -1      -1

In [281]:
α = 0.5
γ = 0.95
ϵ = .2
x = [15, 60, 90, 90, 1];
b = ParticleCollection([[15, 60, 90, 90, 1] for i in 1:N]);


In [283]:
## particle filter


β = zeros(length(grid),6);

counter = 0
last = 0
total = 0
for i in 1:2500
    counter += 1
    ξ = weighted_grid_2(b)/1000
    
    # choose next action
    u = next_action([transpose(θ[:,i])*ξ for i in 1:size(θ)[2]], ϵ, rng)

 
    #observe new state and reward
    xp = f2(x, actions_[u], rng)
    y = h(x, rng)
    b = update(pfilter, b, actions_[u], y)
    
    ξ = weighted_grid_2(b)/1000
    β[:,u] = ξ
    
    rew = r(Tuple(xp))
    total += rew
    if counter %50 == 0
        @show xp, total
    end
    #update θ
    θ += α * (rew + γ * max2([transpose(θ[:,i])*ξ for i in 1:size(θ)[2]], rng) - last)*β

    # complete transition
    x = xp
    last = transpose(θ[:,u])*ξ
end


(xp, total) = (Real[145.5616292941981, 218.0411458496233, 240, 270, 2], -50)
(xp, total) = (Real[201.47551322079553, 213.92266651354458, 210, 90, 2], -100)
(xp, total) = (Real[296.8980373532726, 219.8326387256659, 210, 90, 1], -150)
(xp, total) = (Real[373.6640315548369, 220.3760059514208, 180, 60, 2], -200)
(xp, total) = (Real[487.8992010408213, 221.09911888041597, 180, 30, 1], -250)
(xp, total) = (Real[42.12577320625103, 154.73928897082666, 150, 210, 2], -50178)
(xp, total) = (Real[52.26877138525288, 151.8987056949083, 240, 210, 1], -50078)
(xp, total) = (Real[26.52716778699286, 196.13092408739965, 270, 270, 2], -49978)
(xp, total) = (Real[61.818937256883444, 210.56114200720455, 180, 240, 1], -49878)
(xp, total) = (Real[63.343144528162725, 191.82254733219688, 180, 30, 2], -49778)
(xp, total) = (Real[161.06618483990874, 186.63556382064047, 120, 90, 2], -49777)
(xp, total) = (Real[238.53390979715414, 175.0518052556404, 60, 330, 1], -49827)
(xp, total) = (Real[289.10142816090035, 156.22

In [279]:
θ

112320×6 Array{Float64,2}:
 -10000.0       -10000.0  -10000.0       -10000.0  -10000.0       -10000.0
      1.0            1.0       1.0            1.0       1.0            1.0
      1.0            1.0       1.0            1.0       1.0            1.0
      1.0            1.0       1.0            1.0       1.0            1.0
      1.0            1.0       0.997884       1.0       1.0            1.0
      1.0            1.0       0.999496       1.0       1.0            1.0
      1.0            1.0       1.0            1.0       1.0            1.0
      0.9998         1.0       0.999339       1.0       0.998681       1.0
      0.999045       1.0       0.998336       1.0       0.99798        1.0
      0.998373       1.0       0.996975       1.0       0.993843       1.0
     -1.00946       -1.0      -1.00561       -1.0      -1.01353       -1.0
     -1.00942       -1.0      -1.01627       -1.0      -1.02494       -1.0
     -1.0132        -1.0      -1.01274       -1.0      -1.02687       -1.

In [116]:
ϵ = .6

θ = zeros(length(grid),6);
β = weighted_grid_2(b)/1000
θ[:,3] += 5*β
θ[:,5] += 4*β
transpose(θ[:,3])*β


next_action([transpose(θ[:,i])*β for i in 1:size(θ)[2]], ϵ, rng)

3

In [None]:
test = zeros(5,3)
ℵ = [1, 2, 3, 4, 5]
test[:,2] = ℵ
test


In [239]:
180/π*atan2(180, 2)

0.6365935759634865

In [None]:
env = POMDPEnvironment(targetPOMDP())


function simulate(env::AbstractEnvironment, nsteps::Int = 100)
    done = false
    r_tot = 0.0
    step = 1
    o = reset!(env)
    while !done && step <= nsteps
        action = sample_action(env) # take random action 
        action
        obs, rew, done, info = step!(env, action)
        obs = trunc(Int,obs[1])
        #@show obs, rew, done, info
        r_tot += rew
        step += 1
    end
    return r_tot
end

@show simulate(env)
#ac = sample_action(env)
#step!(env, ac)