In [1]:
using Plots
using ParticleFilters
using Distributions
using StaticArrays
using LinearAlgebra
using Random
using StatsBase
using Reel
using POMDPs
using POMDPSimulators
using POMDPPolicies
using POMDPModelTools
using GridInterpolations
using RLInterface

In [284]:
include("atan2.jl")
include("obs.jl")
include("polargrid.jl")
rng = MersenneTwister(2);


In [285]:
# random walk

POS_0 = [6.0, 60.0]
CRS_0 = 90 # target's course
HDG_0 = 90 # o/s heading
SPD_0 = 1 # 1 or 2
TGT_SPD = 1

#input is course in degrees and rng
#returns next course in degrees
function next_crs(crs,rng)
    if rand(rng) < .9
        return crs
    end
    crs = (crs + rand(rng,[-1,1])*30) % 360
    if crs < 0 crs += 360 end
    return crs
end

# state as tuple (x, y, crs, hdg) of target (hdg of o/s)
function f(state, control, rng)
    r, θ, crs, hdg, spd = state
    x = r*cos(π/180*θ)
    y = r*sin(π/180*θ)
    pos = [x + TGT_SPD*cos(π/180*crs) - spd*cos(π/180*hdg), y + 
        TGT_SPD*sin(π/180*crs) - spd*sin(π/180*hdg)]
    crs = next_crs(crs,rng)
    hdg = hdg + control[1]
    hdg = hdg % 360
    if hdg < 0
        hdg += 360
    end
    spd = control[2]
    r = sqrt(pos[1]^2 + pos[2]^2)
    θ = atan2(pos[1],pos[2])*180/π
    #if trunc(Int, atan2(pos[1],pos[2])*180/π) == 0
    #    @show pos
    #end
    if θ < 0 θ += 360 end
    return (r, θ, crs, hdg, spd)::NTuple{5, Real}
end


In [432]:
function r(s::NTuple{5,Real})
    range = s[1]
    if range > 150 return -1 end  # reward to not lose track of contact
    if range <= 10 return -1000 end  # collision avoidance
    return 2  # being in "sweet spot" maximizes reward
end

r (generic function with 1 method)

In [287]:
angles = [0, 30, 60, 90, 120, 150, 210, 240, 270, 300, 330]

statespace = thestates
actionspace = ((30,1), (0,1), (-30,1), (30, 2), (0,2), (-30,2))

action_index(a) = trunc(Int, 2*(a[1]/30+1) + a[2])
actions_ = ((-30,1), (-30, 2), (0, 1), (0, 2), (30, 1), (30, 2))



((-30, 1), (-30, 2), (0, 1), (0, 2), (30, 1), (30, 2))

In [362]:

updater = SIRParticleFilter(pomdp, N);

function f2(x, u, rng)
    temp = [i for i in f(x, u, rng)]
    return temp
end





In [435]:
totals = [0]
#θ = zeros(length(grid),6);
θ = [r(Tuple(ind2x(grid, j))) for j in 1:length(grid), i in 1:6];

In [439]:

N = 500
model = ParticleFilterModel{Vector{Float64}}(f2, g)
pfilter = SIRParticleFilter(model, N);
α = 0.5
γ = 0.95
ϵ = .3
x = [20, 60, 90, 90, 1];
b = ParticleCollection([[20, 60, 90, 90, 1] for i in 1:N]);


In [None]:
## Q-learning loop

β = zeros(length(grid),6);

counter = 0
last = 0

total = 0
thet = []
ξ = weighted_grid_2(b)/N
for i in 1:2500000
    counter += 1
    
    
    # choose next action
    u = next_action([transpose(θ[:,j])*ξ for j in 1:size(θ)[2]], ϵ, rng)

    #observe new state and reward
    xp = f2(x, actions_[u], rng)
    rew = r(Tuple(xp))
    y = h(xp, rng)
    b = update(pfilter, b, actions_[u], y)
    rew = r(Tuple(xp))
       
    ξ = weighted_grid_2(b)/N
    β[:,u] = ξ
       
    total += rew
    v = 10^3*sqrt(var(ξ))
    #if counter %50 == 0
    #    @show xp, total, v
    #end
    if counter %1000 == 0
        thet = θ
    end
    
    #update θ
    θ += α * (rew + γ * max2([transpose(θ[:,j])*ξ for j in 1:size(θ)[2]], rng) 
        - last)*β
    
    #θ += α * (rew + (v - 1) + γ * max2([transpose(θ[:,i])*ξ for i in 1:size(θ)[2]], rng) 
    #    - last)*β

    # complete transition
    
    last = transpose(θ[:,u])*ξ
    if counter % 1000 == 0
        push!(totals, total)
        ϵ = max(min((20000 - 2*total)/80000, 1), 0)
        println("--------- CURRENT: ", total, " AVG: ", mean(totals), " EPS: ", 
            ϵ, " -----------")
        total = 0
        xp = [rand(rng, 20:35), rand(rng,0:360), rand(rng,0:11)*30, rand(rng,0:11)*30, 1];
        b = ParticleCollection([xp for i in 1:N]);
        last = 0
    end
    x = xp
end


--------- CURRENT: 200 AVG: -7895.925 EPS: 0.245 -----------
--------- CURRENT: -595 AVG: -7717.8536585365855 EPS: 0.264875 -----------
--------- CURRENT: -6895 AVG: -7698.261904761905 EPS: 0.422375 -----------
--------- CURRENT: -6508 AVG: -7670.581395348837 EPS: 0.4127 -----------
--------- CURRENT: 467 AVG: -7485.636363636364 EPS: 0.238325 -----------
--------- CURRENT: -412 AVG: -7328.444444444444 EPS: 0.2603 -----------
--------- CURRENT: -8275 AVG: -7349.021739130435 EPS: 0.456875 -----------
--------- CURRENT: -130 AVG: -7195.425531914893 EPS: 0.25325 -----------
--------- CURRENT: -529 AVG: -7056.541666666667 EPS: 0.263225 -----------
--------- CURRENT: -742 AVG: -6927.673469387755 EPS: 0.26855 -----------
--------- CURRENT: -9454 AVG: -6978.2 EPS: 0.48635 -----------
--------- CURRENT: 458 AVG: -6832.392156862745 EPS: 0.23855 -----------
--------- CURRENT: 1502 AVG: -6672.115384615385 EPS: 0.21245 -----------
--------- CURRENT: -21067 AVG: -6943.7169811320755 EPS: 0.776675 ---

--------- CURRENT: 1196 AVG: -5601.071895424837 EPS: 0.2201 -----------
--------- CURRENT: -154 AVG: -5565.701298701299 EPS: 0.25385 -----------
--------- CURRENT: 695 AVG: -5525.309677419355 EPS: 0.232625 -----------
--------- CURRENT: -814 AVG: -5495.108974358975 EPS: 0.27035 -----------
--------- CURRENT: -9880 AVG: -5523.03821656051 EPS: 0.497 -----------
--------- CURRENT: -370 AVG: -5490.4240506329115 EPS: 0.25925 -----------
--------- CURRENT: 167 AVG: -5454.842767295598 EPS: 0.245825 -----------
--------- CURRENT: -466 AVG: -5423.6625 EPS: 0.26165 -----------
--------- CURRENT: -6781 AVG: -5432.093167701863 EPS: 0.419525 -----------
--------- CURRENT: -7465 AVG: -5444.641975308642 EPS: 0.436625 -----------
--------- CURRENT: 950 AVG: -5405.411042944786 EPS: 0.22625 -----------
--------- CURRENT: -14869 AVG: -5463.115853658536 EPS: 0.621725 -----------
--------- CURRENT: 335 AVG: -5427.975757575758 EPS: 0.241625 -----------
--------- CURRENT: -42874 AVG: -5653.55421686747 EPS: 1

--------- CURRENT: 698 AVG: -4961.547169811321 EPS: 0.23255 -----------
--------- CURRENT: -439 AVG: -4944.545112781955 EPS: 0.260975 -----------
--------- CURRENT: 494 AVG: -4924.176029962547 EPS: 0.23765 -----------
--------- CURRENT: 176 AVG: -4905.145522388059 EPS: 0.2456 -----------
--------- CURRENT: -763 AVG: -4889.74721189591 EPS: 0.269075 -----------
--------- CURRENT: 200 AVG: -4870.896296296296 EPS: 0.245 -----------
--------- CURRENT: -760 AVG: -4855.726937269373 EPS: 0.269 -----------
--------- CURRENT: -706 AVG: -4840.470588235294 EPS: 0.26765 -----------
--------- CURRENT: -27229 AVG: -4922.479853479854 EPS: 0.930725 -----------
--------- CURRENT: -6580 AVG: -4928.529197080292 EPS: 0.4145 -----------
--------- CURRENT: 128 AVG: -4910.141818181818 EPS: 0.2468 -----------
--------- CURRENT: -8098 AVG: -4921.692028985507 EPS: 0.45245 -----------
--------- CURRENT: -562 AVG: -4905.953068592058 EPS: 0.26405 -----------
--------- CURRENT: -346 AVG: -4889.550359712231 EPS: 0.25

--------- CURRENT: 611 AVG: -4397.39417989418 EPS: 0.234725 -----------
--------- CURRENT: -1546 AVG: -4389.870712401055 EPS: 0.28865 -----------
--------- CURRENT: -628 AVG: -4379.971052631579 EPS: 0.2657 -----------
--------- CURRENT: -14266 AVG: -4405.918635170604 EPS: 0.60665 -----------
--------- CURRENT: 326 AVG: -4393.531413612565 EPS: 0.24185 -----------
--------- CURRENT: -775 AVG: -4384.083550913838 EPS: 0.269375 -----------
--------- CURRENT: 299 AVG: -4371.888020833333 EPS: 0.242525 -----------
--------- CURRENT: -655 AVG: -4362.233766233766 EPS: 0.266375 -----------
--------- CURRENT: 1340 AVG: -4347.461139896373 EPS: 0.2165 -----------
--------- CURRENT: 1946 AVG: -4331.1989664082685 EPS: 0.20135 -----------
--------- CURRENT: -15199 AVG: -4359.208762886598 EPS: 0.629975 -----------
--------- CURRENT: -139 AVG: -4348.359897172237 EPS: 0.253475 -----------
--------- CURRENT: -415 AVG: -4338.274358974359 EPS: 0.260375 -----------
--------- CURRENT: -82 AVG: -4327.3887468030

--------- CURRENT: 575 AVG: -4003.3285714285716 EPS: 0.235625 -----------
--------- CURRENT: -589 AVG: -3996.3747454175154 EPS: 0.264725 -----------
--------- CURRENT: -517 AVG: -3989.3028455284552 EPS: 0.262925 -----------
--------- CURRENT: -12826 AVG: -4007.2271805273836 EPS: 0.57065 -----------
--------- CURRENT: 353 AVG: -3998.4008097165993 EPS: 0.241175 -----------
--------- CURRENT: -784 AVG: -3991.9070707070705 EPS: 0.2696 -----------
--------- CURRENT: -151 AVG: -3984.1633064516127 EPS: 0.253775 -----------
--------- CURRENT: -448 AVG: -3977.0482897384304 EPS: 0.2612 -----------
--------- CURRENT: -598 AVG: -3970.2630522088352 EPS: 0.26495 -----------
--------- CURRENT: -8146 AVG: -3978.6312625250503 EPS: 0.45365 -----------
--------- CURRENT: -733 AVG: -3972.14 EPS: 0.268325 -----------
--------- CURRENT: 56 AVG: -3964.0998003992017 EPS: 0.2486 -----------
--------- CURRENT: 245 AVG: -3955.7151394422312 EPS: 0.243875 -----------
--------- CURRENT: 1265 AVG: -3945.335984095427

--------- CURRENT: -24502 AVG: -3962.3388704318936 EPS: 0.86255 -----------
--------- CURRENT: 80 AVG: -3955.635157545605 EPS: 0.248 -----------
--------- CURRENT: 194 AVG: -3948.764900662252 EPS: 0.24515 -----------
--------- CURRENT: -784 AVG: -3943.5338842975207 EPS: 0.2696 -----------
--------- CURRENT: -160 AVG: -3937.290429042904 EPS: 0.254 -----------
--------- CURRENT: -709 AVG: -3931.971993410214 EPS: 0.267725 -----------
--------- CURRENT: -88 AVG: -3925.6496710526317 EPS: 0.2522 -----------
--------- CURRENT: -32179 AVG: -3972.0426929392447 EPS: 1.0 -----------
--------- CURRENT: -253 AVG: -3965.9459016393444 EPS: 0.256325 -----------
--------- CURRENT: -22954 AVG: -3997.0229132569557 EPS: 0.82385 -----------
--------- CURRENT: -1663 AVG: -3993.2091503267975 EPS: 0.291575 -----------
--------- CURRENT: -4306 AVG: -3993.7194127243065 EPS: 0.35765 -----------
--------- CURRENT: -15868 AVG: -4013.058631921824 EPS: 0.6467 -----------
--------- CURRENT: -307 AVG: -4007.0325203252

--------- CURRENT: -565 AVG: -4057.763305322129 EPS: 0.264125 -----------
--------- CURRENT: 275 AVG: -4051.7034965034964 EPS: 0.243125 -----------
--------- CURRENT: -292 AVG: -4046.4525139664806 EPS: 0.2573 -----------
--------- CURRENT: -7390 AVG: -4051.115760111576 EPS: 0.43475 -----------
--------- CURRENT: -8758 AVG: -4057.6713091922006 EPS: 0.46895 -----------
--------- CURRENT: -631 AVG: -4052.905424200278 EPS: 0.265775 -----------
--------- CURRENT: 1430 AVG: -4045.2902777777776 EPS: 0.21425 -----------
--------- CURRENT: 605 AVG: -4038.8404993065187 EPS: 0.234875 -----------
--------- CURRENT: -421 AVG: -4033.8296398891966 EPS: 0.260525 -----------
--------- CURRENT: -649 AVG: -4029.1479944674966 EPS: 0.266225 -----------
--------- CURRENT: 167 AVG: -4023.3522099447514 EPS: 0.245825 -----------
--------- CURRENT: -20857 AVG: -4046.5710344827585 EPS: 0.771425 -----------
--------- CURRENT: 749 AVG: -4039.965564738292 EPS: 0.231275 -----------
--------- CURRENT: -9139 AVG: -404

--------- CURRENT: -544 AVG: -3955.518787878788 EPS: 0.2636 -----------
--------- CURRENT: 605 AVG: -3949.997578692494 EPS: 0.234875 -----------
--------- CURRENT: -331 AVG: -3945.621523579202 EPS: 0.258275 -----------
--------- CURRENT: 35 AVG: -3940.814009661836 EPS: 0.249125 -----------
--------- CURRENT: -13240 AVG: -3952.031363088058 EPS: 0.581 -----------
--------- CURRENT: -358 AVG: -3947.701204819277 EPS: 0.25895 -----------
--------- CURRENT: -703 AVG: -3943.7966305655837 EPS: 0.267575 -----------
--------- CURRENT: -649 AVG: -3939.8365384615386 EPS: 0.266225 -----------
--------- CURRENT: -622 AVG: -3935.8535414165667 EPS: 0.26555 -----------
--------- CURRENT: -283 AVG: -3931.4736211031177 EPS: 0.257075 -----------
--------- CURRENT: -715 AVG: -3927.6215568862276 EPS: 0.267875 -----------
--------- CURRENT: -91 AVG: -3923.032296650718 EPS: 0.252275 -----------
--------- CURRENT: 275 AVG: -3918.016726403823 EPS: 0.243125 -----------
--------- CURRENT: -505 AVG: -3913.94391408

--------- CURRENT: -337 AVG: -3821.790598290598 EPS: 0.258425 -----------
--------- CURRENT: -583 AVG: -3818.334044823906 EPS: 0.264575 -----------
--------- CURRENT: -10267 AVG: -3825.208955223881 EPS: 0.506675 -----------
--------- CURRENT: -490 AVG: -3821.6570820021298 EPS: 0.26225 -----------
--------- CURRENT: 293 AVG: -3817.2797872340425 EPS: 0.242675 -----------
--------- CURRENT: 761 AVG: -3812.414452709883 EPS: 0.230975 -----------
--------- CURRENT: 770 AVG: -3807.5498938428873 EPS: 0.23075 -----------
--------- CURRENT: -283 AVG: -3803.8123011664898 EPS: 0.257075 -----------
--------- CURRENT: -3673 AVG: -3803.673728813559 EPS: 0.341825 -----------
--------- CURRENT: -19726 AVG: -3820.5227513227514 EPS: 0.74315 -----------
--------- CURRENT: 191 AVG: -3816.282241014799 EPS: 0.245225 -----------
--------- CURRENT: -4384 AVG: -3816.881731784583 EPS: 0.3596 -----------
--------- CURRENT: -820 AVG: -3813.7204641350213 EPS: 0.2705 -----------
--------- CURRENT: -229 AVG: -3809.94

--------- CURRENT: 434 AVG: -3755.663801337154 EPS: 0.23915 -----------
--------- CURRENT: -6670 AVG: -3758.4446564885498 EPS: 0.41675 -----------
--------- CURRENT: 1019 AVG: -3753.89037178265 EPS: 0.224525 -----------
--------- CURRENT: 1187 AVG: -3749.184761904762 EPS: 0.220325 -----------
--------- CURRENT: -307 AVG: -3745.909609895338 EPS: 0.257675 -----------
--------- CURRENT: -9586 AVG: -3751.46102661597 EPS: 0.48965 -----------
--------- CURRENT: -529 AVG: -3748.400759734093 EPS: 0.263225 -----------
--------- CURRENT: -166 AVG: -3745.001897533207 EPS: 0.25415 -----------
--------- CURRENT: -4633 AVG: -3745.8436018957345 EPS: 0.365825 -----------
--------- CURRENT: -223 AVG: -3742.507575757576 EPS: 0.255575 -----------
--------- CURRENT: -787 AVG: -3739.7114474929044 EPS: 0.269675 -----------
--------- CURRENT: -304 AVG: -3736.4640831758034 EPS: 0.2576 -----------
--------- CURRENT: -574 AVG: -3733.477809254013 EPS: 0.26435 -----------
--------- CURRENT: -274 AVG: -3730.214150

In [431]:
#thet

In [None]:
env = POMDPEnvironment(targetPOMDP())


function simulate(env::AbstractEnvironment, nsteps::Int = 100)
    done = false
    r_tot = 0.0
    step = 1
    o = reset!(env)
    while !done && step <= nsteps
        action = sample_action(env) # take random action 
        action
        obs, rew, done, info = step!(env, action)
        obs = trunc(Int,obs[1])
        #@show obs, rew, done, info
        r_tot += rew
        step += 1
    end
    return r_tot
end

@show simulate(env)
#ac = sample_action(env)
#step!(env, ac)