# Actor Critic method in Mountain Car environment 

In [14]:
using ReinforcementLearningBase, ReinforcementLearningEnvironments
using Flux
using Flux:params
using Plots; gr(); 
import StatsBase.sample, StatsBase.Weights

In [15]:
env = MountainCarEnv();

Define Brain:

In [16]:
mutable struct Brain
    β::Float64
    batch_size::Int
    memory_size::Int
    min_memory_size::Int
    memory::Array{Tuple,1}
    policy_net::Chain
    value_net::Chain
    ηₚ::Float64
    ηᵥ::Float64
end

In [17]:
function Brain(env; β = 0.99, ηₚ = 0.00001, ηᵥ = 0.001)
    policy_net = Chain(Dense(length(env.state), 40, identity),
                Dense(40,40,identity),
                Dense(40,length(action_space(env)), identity), softmax)
    value_net = Chain(Dense(length(env.state), 128, relu), 
                    Dense(128, 52, relu), 
                    Dense(52, 1, identity))
    Brain(β, 64 , 50_000, 1000, [], policy_net, value_net, ηₚ, ηᵥ)
end


Brain

and Agent:

In [18]:
mutable struct Agent
    env::AbstractEnv
    brain::Brain
    position::Float64
    reward::Float64
end

In [19]:
Agent(env::AbstractEnv) = Agent(env, Brain(env), -Inf, 0.0)

Agent

Loss functions:

In [20]:
function actor_loss(x, A, γ = 0.001) 
    p = agent.brain.policy_net(x)
    loss = sum(-log.(p .+ 1e-7) .* A)/size(A,1) 
    entropy = sum(-log.(p .+ 1e-7) .* p)/size(A,1)
    return loss - γ * entropy
end  

actor_loss (generic function with 2 methods)

In [21]:
critic_loss(x, y, ξ = 0.5) = ξ*Flux.mse(agent.brain.value_net(x), y)

critic_loss (generic function with 2 methods)

Now, learning functions:

In [22]:
function replay!(agent::Agent)
    x = zeros(Float32,length(agent.env.state), agent.brain.batch_size)
    A = zeros(Float32,length(action_space(agent.env)), agent.brain.batch_size)
    y = zeros(Float32,1, agent.brain.batch_size)
    for (i,step)  in enumerate(sample(agent.brain.memory, agent.brain.batch_size, replace = false))
        s,a,r,s′,v,v′,terminal = step
        terminal ? (R  = r) : (R = r + agent.brain.β * v′)
        adv = R - v
        Adv = zeros(Float32,length(action_space(agent.env)))
        Adv[a] = adv
        x[:, i] .= s
        A[:, i] .= Adv
        y[:, i] .= R
    end
    Flux.train!(actor_loss, params(agent.brain.policy_net), [(x, A)], ADAM(agent.brain.ηₚ))
    Flux.train!(critic_loss, params(agent.brain.value_net), [(x, y)], ADAM(agent.brain.ηᵥ))
end

function remember!(brain::Brain, step::Tuple)
    length(brain.memory) == brain.memory_size && deleteat!(brain.memory,1)
    push!(brain.memory, step)
end

function forward(brain::Brain, state)
    π = agent.brain.policy_net(state)
    v = agent.brain.value_net(state)[1]
    return π,v
end

forward (generic function with 1 method)

and finally, control functions:

In [23]:
function step!(agent::Agent, train::Bool)
    s = deepcopy(agent.env.state)
    π,v = forward(agent.brain, s)
    a = sample(1:length(action_space(agent.env)),Weights(π))
    agent.env(a)
    r, s′, terminal = deepcopy(reward(agent.env)), deepcopy(state(agent.env)), 
    deepcopy(is_terminated(agent.env))
    _,v′ = forward(agent.brain, s′)
    agent.position = s′[1]
    agent.reward += r
    remember!(agent.brain, (s,a,r,s′,v,v′,terminal))
    (train && length(agent.brain.memory) > agent.brain.min_memory_size) && replay!(agent)
    terminal 
end

step! (generic function with 1 method)

In [24]:
function run!(agent::Agent, episodes::Int; train::Bool = true,
            plotting::Bool = true, summary::Bool = true)
    rewards = []
    success_rates = []
    ep = 1.0
    success = 0.0
    while ep ≤ episodes
        plotting && (plot(env); sleep(0.0001))
        if step!(agent, train) 
            reset!(agent.env)
            agent.position > 0.5 && (success += 1.0)
            push!(rewards, agent.reward)
            push!(success_rates, success/ep)
            if summary
                println("episode $(Int(ep)) ends! Reward: $(agent.reward)")
                println("success rate: $(success/ep)")
            end
            ep += 1.0
            agent.reward = 0.0
            agent.position = -Inf
        end
    end
    return rewards, success_rates
end

run! (generic function with 1 method)

In [25]:
agent = Agent(env);

rewards, success_rates = run!(agent,2000; train = true, plotting = false);

episode 1 ends! Reward: -199.0
success rate: 0.0
episode 2 ends! Reward: -199.0
success rate: 0.0
episode 3 ends! Reward: -199.0
success rate: 0.0
episode 4 ends! Reward: -199.0
success rate: 0.0
episode 5 ends! Reward: -199.0
success rate: 0.0
episode 6 ends! Reward: -199.0
success rate: 0.0
episode 7 ends! Reward: -199.0
success rate: 0.0
episode 8 ends! Reward: -199.0
success rate: 0.0
episode 9 ends! Reward: -199.0
success rate: 0.0
episode 10 ends! Reward: -199.0
success rate: 0.0
episode 11 ends! Reward: -199.0
success rate: 0.0
episode 12 ends! Reward: -199.0
success rate: 0.0
episode 13 ends! Reward: -199.0
success rate: 0.0
episode 14 ends! Reward: -199.0
success rate: 0.0
episode 15 ends! Reward: -199.0
success rate: 0.0
episode 16 ends! Reward: -199.0
success rate: 0.0
episode 17 ends! Reward: -199.0
success rate: 0.0
episode 18 ends! Reward: -199.0
success rate: 0.0
episode 19 ends! Reward: -199.0
success rate: 0.0
episode 20 ends! Reward: -199.0
success rate: 0.0
episode 2

episode 164 ends! Reward: -199.0
success rate: 0.0
episode 165 ends! Reward: -199.0
success rate: 0.0
episode 166 ends! Reward: -199.0
success rate: 0.0
episode 167 ends! Reward: -199.0
success rate: 0.0
episode 168 ends! Reward: -199.0
success rate: 0.0
episode 169 ends! Reward: -199.0
success rate: 0.0
episode 170 ends! Reward: -199.0
success rate: 0.0
episode 171 ends! Reward: -199.0
success rate: 0.0
episode 172 ends! Reward: -199.0
success rate: 0.0
episode 173 ends! Reward: -199.0
success rate: 0.0
episode 174 ends! Reward: -199.0
success rate: 0.0
episode 175 ends! Reward: -199.0
success rate: 0.0
episode 176 ends! Reward: -199.0
success rate: 0.0
episode 177 ends! Reward: -199.0
success rate: 0.0
episode 178 ends! Reward: -199.0
success rate: 0.0
episode 179 ends! Reward: -199.0
success rate: 0.0
episode 180 ends! Reward: -199.0
success rate: 0.0
episode 181 ends! Reward: -199.0
success rate: 0.0
episode 182 ends! Reward: -199.0
success rate: 0.0
episode 183 ends! Reward: -199.

episode 325 ends! Reward: -199.0
success rate: 0.0
episode 326 ends! Reward: -199.0
success rate: 0.0
episode 327 ends! Reward: -199.0
success rate: 0.0
episode 328 ends! Reward: -199.0
success rate: 0.0
episode 329 ends! Reward: -199.0
success rate: 0.0
episode 330 ends! Reward: -199.0
success rate: 0.0
episode 331 ends! Reward: -199.0
success rate: 0.0
episode 332 ends! Reward: -199.0
success rate: 0.0
episode 333 ends! Reward: -199.0
success rate: 0.0
episode 334 ends! Reward: -199.0
success rate: 0.0
episode 335 ends! Reward: -199.0
success rate: 0.0
episode 336 ends! Reward: -199.0
success rate: 0.0
episode 337 ends! Reward: -199.0
success rate: 0.0
episode 338 ends! Reward: -199.0
success rate: 0.0
episode 339 ends! Reward: -199.0
success rate: 0.0
episode 340 ends! Reward: -199.0
success rate: 0.0
episode 341 ends! Reward: -199.0
success rate: 0.0
episode 342 ends! Reward: -199.0
success rate: 0.0
episode 343 ends! Reward: -199.0
success rate: 0.0
episode 344 ends! Reward: -199.

episode 486 ends! Reward: -199.0
success rate: 0.0
episode 487 ends! Reward: -199.0
success rate: 0.0
episode 488 ends! Reward: -199.0
success rate: 0.0
episode 489 ends! Reward: -199.0
success rate: 0.0
episode 490 ends! Reward: -199.0
success rate: 0.0
episode 491 ends! Reward: -199.0
success rate: 0.0
episode 492 ends! Reward: -199.0
success rate: 0.0
episode 493 ends! Reward: -199.0
success rate: 0.0
episode 494 ends! Reward: -199.0
success rate: 0.0
episode 495 ends! Reward: -199.0
success rate: 0.0
episode 496 ends! Reward: -199.0
success rate: 0.0
episode 497 ends! Reward: -199.0
success rate: 0.0
episode 498 ends! Reward: -199.0
success rate: 0.0
episode 499 ends! Reward: -199.0
success rate: 0.0
episode 500 ends! Reward: -199.0
success rate: 0.0
episode 501 ends! Reward: -199.0
success rate: 0.0
episode 502 ends! Reward: -199.0
success rate: 0.0
episode 503 ends! Reward: -199.0
success rate: 0.0
episode 504 ends! Reward: -199.0
success rate: 0.0
episode 505 ends! Reward: -199.

episode 647 ends! Reward: -199.0
success rate: 0.0
episode 648 ends! Reward: -199.0
success rate: 0.0
episode 649 ends! Reward: -199.0
success rate: 0.0
episode 650 ends! Reward: -199.0
success rate: 0.0
episode 651 ends! Reward: -199.0
success rate: 0.0
episode 652 ends! Reward: -199.0
success rate: 0.0
episode 653 ends! Reward: -199.0
success rate: 0.0
episode 654 ends! Reward: -199.0
success rate: 0.0
episode 655 ends! Reward: -199.0
success rate: 0.0
episode 656 ends! Reward: -199.0
success rate: 0.0
episode 657 ends! Reward: -199.0
success rate: 0.0
episode 658 ends! Reward: -199.0
success rate: 0.0
episode 659 ends! Reward: -199.0
success rate: 0.0
episode 660 ends! Reward: -199.0
success rate: 0.0
episode 661 ends! Reward: -199.0
success rate: 0.0
episode 662 ends! Reward: -199.0
success rate: 0.0
episode 663 ends! Reward: -199.0
success rate: 0.0
episode 664 ends! Reward: -199.0
success rate: 0.0
episode 665 ends! Reward: -199.0
success rate: 0.0
episode 666 ends! Reward: -199.

episode 808 ends! Reward: -199.0
success rate: 0.0
episode 809 ends! Reward: -199.0
success rate: 0.0
episode 810 ends! Reward: -199.0
success rate: 0.0
episode 811 ends! Reward: -199.0
success rate: 0.0
episode 812 ends! Reward: -199.0
success rate: 0.0
episode 813 ends! Reward: -199.0
success rate: 0.0
episode 814 ends! Reward: -199.0
success rate: 0.0
episode 815 ends! Reward: -199.0
success rate: 0.0
episode 816 ends! Reward: -199.0
success rate: 0.0
episode 817 ends! Reward: -199.0
success rate: 0.0
episode 818 ends! Reward: -199.0
success rate: 0.0
episode 819 ends! Reward: -199.0
success rate: 0.0
episode 820 ends! Reward: -199.0
success rate: 0.0
episode 821 ends! Reward: -199.0
success rate: 0.0
episode 822 ends! Reward: -199.0
success rate: 0.0
episode 823 ends! Reward: -199.0
success rate: 0.0
episode 824 ends! Reward: -199.0
success rate: 0.0
episode 825 ends! Reward: -199.0
success rate: 0.0
episode 826 ends! Reward: -199.0
success rate: 0.0
episode 827 ends! Reward: -199.

episode 969 ends! Reward: -199.0
success rate: 0.0
episode 970 ends! Reward: -199.0
success rate: 0.0
episode 971 ends! Reward: -199.0
success rate: 0.0
episode 972 ends! Reward: -199.0
success rate: 0.0
episode 973 ends! Reward: -199.0
success rate: 0.0
episode 974 ends! Reward: -199.0
success rate: 0.0
episode 975 ends! Reward: -199.0
success rate: 0.0
episode 976 ends! Reward: -199.0
success rate: 0.0
episode 977 ends! Reward: -199.0
success rate: 0.0
episode 978 ends! Reward: -199.0
success rate: 0.0
episode 979 ends! Reward: -199.0
success rate: 0.0
episode 980 ends! Reward: -199.0
success rate: 0.0
episode 981 ends! Reward: -199.0
success rate: 0.0
episode 982 ends! Reward: -199.0
success rate: 0.0
episode 983 ends! Reward: -199.0
success rate: 0.0
episode 984 ends! Reward: -199.0
success rate: 0.0
episode 985 ends! Reward: -199.0
success rate: 0.0
episode 986 ends! Reward: -199.0
success rate: 0.0
episode 987 ends! Reward: -199.0
success rate: 0.0
episode 988 ends! Reward: -199.

success rate: 0.0
episode 1128 ends! Reward: -199.0
success rate: 0.0
episode 1129 ends! Reward: -199.0
success rate: 0.0
episode 1130 ends! Reward: -199.0
success rate: 0.0
episode 1131 ends! Reward: -199.0
success rate: 0.0
episode 1132 ends! Reward: -199.0
success rate: 0.0
episode 1133 ends! Reward: -199.0
success rate: 0.0
episode 1134 ends! Reward: -199.0
success rate: 0.0
episode 1135 ends! Reward: -199.0
success rate: 0.0
episode 1136 ends! Reward: -199.0
success rate: 0.0
episode 1137 ends! Reward: -199.0
success rate: 0.0
episode 1138 ends! Reward: -199.0
success rate: 0.0
episode 1139 ends! Reward: -199.0
success rate: 0.0
episode 1140 ends! Reward: -199.0
success rate: 0.0
episode 1141 ends! Reward: -199.0
success rate: 0.0
episode 1142 ends! Reward: -199.0
success rate: 0.0
episode 1143 ends! Reward: -199.0
success rate: 0.0
episode 1144 ends! Reward: -199.0
success rate: 0.0
episode 1145 ends! Reward: -199.0
success rate: 0.0
episode 1146 ends! Reward: -199.0
success rate

success rate: 0.0
episode 1286 ends! Reward: -199.0
success rate: 0.0
episode 1287 ends! Reward: -199.0
success rate: 0.0
episode 1288 ends! Reward: -199.0
success rate: 0.0
episode 1289 ends! Reward: -199.0
success rate: 0.0
episode 1290 ends! Reward: -199.0
success rate: 0.0
episode 1291 ends! Reward: -199.0
success rate: 0.0
episode 1292 ends! Reward: -199.0
success rate: 0.0
episode 1293 ends! Reward: -199.0
success rate: 0.0
episode 1294 ends! Reward: -199.0
success rate: 0.0
episode 1295 ends! Reward: -199.0
success rate: 0.0
episode 1296 ends! Reward: -199.0
success rate: 0.0
episode 1297 ends! Reward: -199.0
success rate: 0.0
episode 1298 ends! Reward: -199.0
success rate: 0.0
episode 1299 ends! Reward: -199.0
success rate: 0.0
episode 1300 ends! Reward: -199.0
success rate: 0.0
episode 1301 ends! Reward: -199.0
success rate: 0.0
episode 1302 ends! Reward: -199.0
success rate: 0.0
episode 1303 ends! Reward: -199.0
success rate: 0.0
episode 1304 ends! Reward: -199.0
success rate

success rate: 0.0
episode 1444 ends! Reward: -199.0
success rate: 0.0
episode 1445 ends! Reward: -199.0
success rate: 0.0
episode 1446 ends! Reward: -199.0
success rate: 0.0
episode 1447 ends! Reward: -199.0
success rate: 0.0
episode 1448 ends! Reward: -199.0
success rate: 0.0
episode 1449 ends! Reward: -199.0
success rate: 0.0
episode 1450 ends! Reward: -199.0
success rate: 0.0
episode 1451 ends! Reward: -199.0
success rate: 0.0
episode 1452 ends! Reward: -199.0
success rate: 0.0
episode 1453 ends! Reward: -199.0
success rate: 0.0
episode 1454 ends! Reward: -199.0
success rate: 0.0
episode 1455 ends! Reward: -199.0
success rate: 0.0
episode 1456 ends! Reward: -199.0
success rate: 0.0
episode 1457 ends! Reward: -199.0
success rate: 0.0
episode 1458 ends! Reward: -199.0
success rate: 0.0
episode 1459 ends! Reward: -199.0
success rate: 0.0
episode 1460 ends! Reward: -199.0
success rate: 0.0
episode 1461 ends! Reward: -199.0
success rate: 0.0
episode 1462 ends! Reward: -199.0
success rate

success rate: 0.0
episode 1602 ends! Reward: -199.0
success rate: 0.0
episode 1603 ends! Reward: -199.0
success rate: 0.0
episode 1604 ends! Reward: -199.0
success rate: 0.0
episode 1605 ends! Reward: -199.0
success rate: 0.0
episode 1606 ends! Reward: -199.0
success rate: 0.0
episode 1607 ends! Reward: -199.0
success rate: 0.0
episode 1608 ends! Reward: -199.0
success rate: 0.0
episode 1609 ends! Reward: -199.0
success rate: 0.0
episode 1610 ends! Reward: -199.0
success rate: 0.0
episode 1611 ends! Reward: -199.0
success rate: 0.0
episode 1612 ends! Reward: -199.0
success rate: 0.0
episode 1613 ends! Reward: -199.0
success rate: 0.0
episode 1614 ends! Reward: -199.0
success rate: 0.0
episode 1615 ends! Reward: -199.0
success rate: 0.0
episode 1616 ends! Reward: -199.0
success rate: 0.0
episode 1617 ends! Reward: -199.0
success rate: 0.0
episode 1618 ends! Reward: -199.0
success rate: 0.0
episode 1619 ends! Reward: -199.0
success rate: 0.0
episode 1620 ends! Reward: -199.0
success rate

success rate: 0.0
episode 1760 ends! Reward: -199.0
success rate: 0.0
episode 1761 ends! Reward: -199.0
success rate: 0.0
episode 1762 ends! Reward: -199.0
success rate: 0.0
episode 1763 ends! Reward: -199.0
success rate: 0.0
episode 1764 ends! Reward: -199.0
success rate: 0.0
episode 1765 ends! Reward: -199.0
success rate: 0.0
episode 1766 ends! Reward: -199.0
success rate: 0.0
episode 1767 ends! Reward: -199.0
success rate: 0.0
episode 1768 ends! Reward: -199.0
success rate: 0.0
episode 1769 ends! Reward: -199.0
success rate: 0.0
episode 1770 ends! Reward: -199.0
success rate: 0.0
episode 1771 ends! Reward: -199.0
success rate: 0.0
episode 1772 ends! Reward: -199.0
success rate: 0.0
episode 1773 ends! Reward: -199.0
success rate: 0.0
episode 1774 ends! Reward: -199.0
success rate: 0.0
episode 1775 ends! Reward: -199.0
success rate: 0.0
episode 1776 ends! Reward: -199.0
success rate: 0.0
episode 1777 ends! Reward: -199.0
success rate: 0.0
episode 1778 ends! Reward: -199.0
success rate

episode 1918 ends! Reward: -199.0
success rate: 0.0
episode 1919 ends! Reward: -199.0
success rate: 0.0
episode 1920 ends! Reward: -199.0
success rate: 0.0
episode 1921 ends! Reward: -199.0
success rate: 0.0
episode 1922 ends! Reward: -199.0
success rate: 0.0
episode 1923 ends! Reward: -199.0
success rate: 0.0
episode 1924 ends! Reward: -199.0
success rate: 0.0
episode 1925 ends! Reward: -199.0
success rate: 0.0
episode 1926 ends! Reward: -199.0
success rate: 0.0
episode 1927 ends! Reward: -199.0
success rate: 0.0
episode 1928 ends! Reward: -199.0
success rate: 0.0
episode 1929 ends! Reward: -199.0
success rate: 0.0
episode 1930 ends! Reward: -199.0
success rate: 0.0
episode 1931 ends! Reward: -199.0
success rate: 0.0
episode 1932 ends! Reward: -199.0
success rate: 0.0
episode 1933 ends! Reward: -199.0
success rate: 0.0
episode 1934 ends! Reward: -199.0
success rate: 0.0
episode 1935 ends! Reward: -199.0
success rate: 0.0
episode 1936 ends! Reward: -199.0
success rate: 0.0
episode 1937