# Cliff Waking<br>
<font size= +1>
<p style="text-align: justify">
El problema _Cliff Walking_ consiste en que se tiene una cadrícula por la cual te puedes desplazar en cuatro direcciones, una celda a la vez. Las acciones que puedes tomar son __"up"__, __"down"__, __"left"__, __"right"__. 
El objetivo es llegar a una celda en especial que será la meta y como es de pensarse, lo más rapido posible, partiendo de una celda. Toda una sección de esta cuadrícula tiene una pérdida mucho más grande que las demás celdas, por lo que se evitará pasar por esa parte para llegar a la meta.
</p>


In [95]:
using Plots
using NBInclude
@nbinclude("algoritmos_reforzamiento.ipynb")

q_learning (generic function with 1 method)

In [2]:
struct CWs<: Estado
    x ::Int64 # posición en el eje horizontal
    y ::Int64 # posición en el eje vertical
end

In [3]:
struct CWa <:Accion
    a
    function CWa(a)
        if a!=:up && a!=:down && a!=:left && a!=:right
            error("acción inválida")
        else new(a)
        end
    end
end

In [4]:
struct CW <:MDP
    rows ::Int64
    columns ::Int64
    goal::CWs #Tuple{Int64,Int64}
    start::CWs #Tuple{Int64,Int64}
    redSection::NTuple{4,Int64}
    actions::Array{CWa,1}
    states::Array{CWs,1}
end

In [67]:
function step(mdp::CW,s::CWs, a::CWa)
    """
    if (a.a == :down && s.x == 2 && 1 <= s.y <= 10) || (
        a.a == :right && s == mdp.start)
        return mdp.start
    end
    """
    if (a.a == :down && mdp.redSection[3]<=s.y<=mdp.redSection[4] &&  mdp.redSection[1]<=s.x+1<=mdp.redSection[2])||
        (a.a == :up && mdp.redSection[3]<=s.y<=mdp.redSection[4] &&  mdp.redSection[1]<=s.x-1<=mdp.redSection[2])||
        (a.a == :right && mdp.redSection[3]<=s.y+1<=mdp.redSection[4] &&  mdp.redSection[1]<=s.x<=mdp.redSection[2])||
        (a.a == :left && mdp.redSection[3]<=s.y-1<=mdp.redSection[4] &&  mdp.redSection[1]<=s.x<=mdp.redSection[2])
        return mdp.start
    end
    
    
    if a.a == :up
        return  CWs(max(s.x - 1, 0), s.y)
    elseif a.a == :left
        return  CWs(s.x, max(s.y - 1, 1))
    elseif a.a == :right
        return  CWs(s.x, min(s.y + 1, mdp.columns))
    elseif a.a == :down
        return CWs(min(s.x + 1, mdp.rows), s.y)
    end
end

step (generic function with 1 method)

In [68]:
#terminal
function terminal(modelo::CW,s::CWs)
    s.x==modelo.goal.x && s.y==modelo.goal.y
end

function aLegales(mdp::CW,s::CWs)
    
    acciones=[]
    #push!(acciones,r_accion(mdp))
    #pop!(acciones)
    if s.y > 1
        push!(acciones,CWa(:left))
    end
    if s.y<mdp.columns
        push!(acciones,CWa(:right))
    end
    if s.x >1
        push!(acciones,CWa(:up))
    end
    if s.x<mdp.rows
        push!(acciones,CWa(:down))
    end

    acciones
end

aLegales (generic function with 1 method)

In [69]:
function r(mdp::CW,s::CWs,a::CWa,s2::CWs)

    if terminal(mdp,s) return r(mdp,s) end
    
    """
    if (a.a == :down && s.x == 2 && 1 <= s.y <= 10) || (
        a.a == :right && s == mdp.start)
        return -100
    end
    """
    if (a.a == :down && mdp.redSection[3]<=s.y<=mdp.redSection[4] &&  mdp.redSection[1]<=s.x+1<=mdp.redSection[2])||
        (a.a == :up && mdp.redSection[3]<=s.y<=mdp.redSection[4] &&  mdp.redSection[1]<=s.x-1<=mdp.redSection[2])||
        (a.a == :right && mdp.redSection[3]<=s.y+1<=mdp.redSection[4] &&  mdp.redSection[1]<=s.x<=mdp.redSection[2])||
        (a.a == :left && mdp.redSection[3]<=s.y-1<=mdp.redSection[4] &&  mdp.redSection[1]<=s.x<=mdp.redSection[2])
        return -100
    end
    
        
    -1

end
    
function r(mdp::CW,s::CWs)
        1
end

r (generic function with 2 methods)

In [97]:
function figure_6_4(mdp::CW, episode_limit,ϵ,α,γ)
    
    q_value=Dict((mdp.start,rand(mdp.actions))=>0.0 )
    for s ∈ mdp.states
        for a ∈ aLegales(mdp,s)
        q_value[(s,a)]=0.0
        end
    end
    #q_value = Dict( (s,a)=> 0 for s in mdp.states, a in aLegales(s))
    
    π_ ,steps = expected_sarsa(mdp, q_value, mdp.start, ϵ, α, γ, episode_limit, false)
    
    
    optimal_policy = []
    
    for i in 1:mdp.rows
        push!(optimal_policy,[])
        for j in 1:mdp.columns
            s=CWs(i,j)
            if s==mdp.goal 
                push!(optimal_policy[i],'G')
                continue 
            end
            if mdp.redSection[3]<=j<=mdp.redSection[4] && mdp.redSection[1]<=i<=mdp.redSection[2]
                push!(optimal_policy[i],'X')
                continue
            end
            if π_[s].a==:up
                push!(optimal_policy[i],'U')
            elseif π_[s].a==:down
                push!(optimal_policy[i],'D')
            elseif π_[s].a==:right
                push!(optimal_policy[i],'R')
            elseif π_[s].a==:left
                push!(optimal_policy[i],'L')
            end
        end
    end
    
    
    println("Optimal policy is:")
    for row in optimal_policy
        println(row)
    end
    print("[")
    for i in 1:length(mdp.wind)
        w=mdp.wind[i]
        print("$w , ")
    end
    print("]")

    
    
    steps_ = accumulate(+, (steps))
    y = [i for i in 1:length(steps)]
    
    font = Plots.font("Helvetica", 18)
    pyplot(guidefont=font, xtickfont=font, ytickfont=font, legendfont=font)
    plot(steps_,y , label="line")  
    xlabel!("Time steps")
    ylabel!("Episodes")
    title!("steps vs episodios")
    

end


figure_6_4 (generic function with 2 methods)

In [71]:
# world height
HEIGHT = 4
# world width
WIDTH = 12
# probability for exploration
EPSILON = 0.1
# step size
ALPHA = 0.5
# gamma for Q-Learning and Expected Sarsa
GAMMA = 1.0

ACTIONS = [CWa(:up), CWa(:down), CWa(:left), CWa(:right)]

# initial state action pair values
START = CWs(4, 1)
GOAL = CWs(4, 12)
RED_SECTION=(4,4,2,11) #del renglon 4 al renglon 4 y de la columna 2 a la columna 9

episodes= 500


500

In [72]:
STATES=[]
for i in 1:HEIGHT,j in 1:WIDTH
    if !( RED_SECTION[3]<=j<=RED_SECTION[4] &&  RED_SECTION[1]<=i<=RED_SECTION[2])
        push!(STATES,CWs(i,j))
    end
end

mdp=CW(HEIGHT,WIDTH,GOAL,START,RED_SECTION,ACTIONS,STATES)

CW(4, 12, CWs(4, 12), CWs(4, 1), (4, 4, 2, 11), CWa[CWa(:up), CWa(:down), CWa(:left), CWa(:right)], CWs[CWs(1, 1), CWs(1, 2), CWs(1, 3), CWs(1, 4), CWs(1, 5), CWs(1, 6), CWs(1, 7), CWs(1, 8), CWs(1, 9), CWs(1, 10)  …  CWs(3, 5), CWs(3, 6), CWs(3, 7), CWs(3, 8), CWs(3, 9), CWs(3, 10), CWs(3, 11), CWs(3, 12), CWs(4, 1), CWs(4, 12)])

In [96]:
figure_6_4(mdp, episodes,EPSILON,ALPHA,GAMMA)

KeyError: KeyError: key CWs(4, 2) not found