# MDP Para el juego Black Jack (21)

$ MDP = <S,A, \rho ,S_f,r > $

$ S = (d,sum,n)$ 

$ A = \{ fin, otra \} $

$ \rho = ...$ 

$ S_f =  \{ (0,0,0,1), (0,0,0,-1) \} $

$ r = $ 1 si gana, -1 si pierde y 0 en otro caso


## Definiciones principales del Modelo

In [1]:
abstract type MDP end
abstract type estado end
abstract type accion end

In [2]:
struct BlackJ_s <:estado
    d #dealer
    sum #suma del jugador
    n #numero de cartas
    as #indica si se puede sumar 10 a sum o no. (1,0)

    function BlackJ_s(d::Int,sum::Int,n::Int,as::Int)
        if (d > 10 || sum>22 || n>4 || as>1 )
            error("estado invalido")
        else
            return new(d,sum,n,as)
        end
    end
end


In [72]:
BlackJ_s(5,0,0,-1)

BlackJ_s(5, 0, 0, -1)

In [4]:
struct BlackJ_a <:accion
    a
    function BlackJ_a(a)
        if (a != :fin && a!= :otra)
            error("accion invalida")
        else
            new(a)
        end
    end
end

In [5]:
BlackJ_a(:otra)

BlackJ_a(:otra)

## Funciones del Modelo

In [81]:
#terminal
function terminal(s::BlackJ_s)
    (s.d==s.sum==s.n==0) && (s.as==1 || s.as==-1) ? true : false
end





#acciones legales
function aLegales(s::BlackJ_s)
    if terminal(s)
        return nothing
    end
    acciones = []
    if s.sum<21 && s.n<4
        push!(acciones,BlackJ_a(:otra))
    end
    push!(acciones,BlackJ_a(:fin))
    acciones
end




#probabilidad de Transicion
function ρ(s::BlackJ_s,a::BlackJ_a,s2::BlackJ_s)
    
    # si s2 es terminal
    if s2.d==s2.sum==s2.n==0 && s2.as==1
        return 0.5
    
    elseif s2.d==s2.sum==s2.n==0 && s2.as==-1
        return 0.3
    
    # si s2 no es terminal
    else
        
        if a.a == :otra # elige otra
            if s2.d ==s.d && s2.n==s.n+1
                if s2.sum-s.sum<9 && s2.sum-s.sum>0
                    return 1/13
                else if s2.sum-s.sum==10
                    return 4/10
                else
                    return 0
            else
                return 0
        else if a.a == :fin # elige fin
        
    end
        
end

            
            
            
#recompenza
function r(s::BlackJ_s)
    if s.d==s.sum==s.n==0 && s.as==1
        return 1
    elseif s.d==s.sum==s.n==0 && s.as==-1
        return -1
    end
    0
end


r (generic function with 1 method)

In [78]:
s1 = BlackJ_s(5,13,2,0)

println(typeof(s1))

println(terminal(s1))

show(aLegales(s1))

println()

println(r(s1))

println(ρ(s1, BlackJ_a(:fin), BlackJ_s(0,0,0,1)))

BlackJ_s
false
Any[BlackJ_a(:otra), BlackJ_a(:fin)]
0
0.2


## Generacion de estados

In [80]:
est=[BlackJ_s(1,1,1,0)]
pop!(est)

for k in 1:4 #num
    if k==1
        x=10 
    elseif k==2 
        x=20
    else
        x=22
    end
    for i in 1:10 #dealer
        for j in 1:x #sum
            for l in 0:1
                push!(est,BlackJ_s(i,j,k,l))
            end
        end
    end
end


est

1480-element Array{BlackJ_s,1}:
 BlackJ_s(1, 1, 1, 0)  
 BlackJ_s(1, 1, 1, 1)  
 BlackJ_s(1, 2, 1, 0)  
 BlackJ_s(1, 2, 1, 1)  
 BlackJ_s(1, 3, 1, 0)  
 BlackJ_s(1, 3, 1, 1)  
 BlackJ_s(1, 4, 1, 0)  
 BlackJ_s(1, 4, 1, 1)  
 BlackJ_s(1, 5, 1, 0)  
 BlackJ_s(1, 5, 1, 1)  
 BlackJ_s(1, 6, 1, 0)  
 BlackJ_s(1, 6, 1, 1)  
 BlackJ_s(1, 7, 1, 0)  
 ⋮                     
 BlackJ_s(10, 17, 4, 0)
 BlackJ_s(10, 17, 4, 1)
 BlackJ_s(10, 18, 4, 0)
 BlackJ_s(10, 18, 4, 1)
 BlackJ_s(10, 19, 4, 0)
 BlackJ_s(10, 19, 4, 1)
 BlackJ_s(10, 20, 4, 0)
 BlackJ_s(10, 20, 4, 1)
 BlackJ_s(10, 21, 4, 0)
 BlackJ_s(10, 21, 4, 1)
 BlackJ_s(10, 22, 4, 0)
 BlackJ_s(10, 22, 4, 1)

## Generacion de acciones

In [9]:
ac = [BlackJ_a(:fin), BlackJ_a(:otra)]
ac

2-element Array{BlackJ_a,1}:
 BlackJ_a(:fin) 
 BlackJ_a(:otra)

## Modelo MDP

In [10]:
struct BlackJ <: MDP
    estados
    acciones
    function BlackJ(estados::Array{BlackJ_s,1}, acciones::Array{BlackJ_a,1})
        new(estados,acciones)
    end
end

In [14]:
BJ = BlackJ(est,ac)

BlackJ(BlackJ_s[BlackJ_s(1, 1, 1, 0), BlackJ_s(1, 1, 1, 1), BlackJ_s(1, 2, 1, 0), BlackJ_s(1, 2, 1, 1), BlackJ_s(1, 3, 1, 0), BlackJ_s(1, 3, 1, 1), BlackJ_s(1, 4, 1, 0), BlackJ_s(1, 4, 1, 1), BlackJ_s(1, 5, 1, 0), BlackJ_s(1, 5, 1, 1)  …  BlackJ_s(10, 18, 4, 0), BlackJ_s(10, 18, 4, 1), BlackJ_s(10, 19, 4, 0), BlackJ_s(10, 19, 4, 1), BlackJ_s(10, 20, 4, 0), BlackJ_s(10, 20, 4, 1), BlackJ_s(10, 21, 4, 0), BlackJ_s(10, 21, 4, 1), BlackJ_s(10, 22, 4, 0), BlackJ_s(10, 22, 4, 1)], BlackJ_a[BlackJ_a(:fin), BlackJ_a(:otra)])

## Algoritmo de Programacion Dinamica

In [50]:
function policy_value(MDP,π, γ)
    
    v = Dict(si => 0 for si in MDP.estados)
    flag=true
    
    while flag
        flag=false
        for s in keys(v)
            temp=sum([ρ(s,π[s],sp)*(r(s)+γ*v[sp]) for sp in MDP.estados])
            if temp!=v[s]
                flag=true
            end
            v[s]=temp
        end
    end
    
    v
end

policy_value (generic function with 1 method)

In [70]:
function choice(a::Array)
    n = length(a)
    idx = rand(1:n)
    return a[idx]
end

choice([1,5,8,9])

9

In [55]:
function policy_iteration(MDP,γ)
    
    π = Dict(s => choice(aLegales(s)) for s in MDP.estados)
    
    optima=false
    
    
    while !optima
        v = policy_value(MDP,π,γ)
        optima=true
        
        for s in keys(v)
            for a in MDP.acciones
                temp=sum(ρ(s,a,s2)*(r(s)+γ*v[s2]) for s2 in keys(v))
                if temp < v[s] # menor o mayor?
                    optima=false
                    π[s]=a
                end
            end
        end
    end
    
    π
end

policy_iteration (generic function with 1 method)

In [57]:
π_opt= policy_iteration(BJ,0.7)

π_opt

Dict{BlackJ_s,BlackJ_a} with 1480 entries:
  BlackJ_s(5, 5, 4, 0)   => BlackJ_a(:fin)
  BlackJ_s(5, 8, 2, 0)   => BlackJ_a(:fin)
  BlackJ_s(3, 19, 2, 0)  => BlackJ_a(:otra)
  BlackJ_s(3, 12, 2, 1)  => BlackJ_a(:fin)
  BlackJ_s(5, 18, 3, 1)  => BlackJ_a(:fin)
  BlackJ_s(9, 8, 4, 0)   => BlackJ_a(:fin)
  BlackJ_s(6, 11, 3, 1)  => BlackJ_a(:fin)
  BlackJ_s(3, 8, 2, 1)   => BlackJ_a(:otra)
  BlackJ_s(6, 5, 4, 1)   => BlackJ_a(:fin)
  BlackJ_s(6, 8, 4, 0)   => BlackJ_a(:fin)
  BlackJ_s(1, 3, 1, 0)   => BlackJ_a(:otra)
  BlackJ_s(3, 21, 3, 0)  => BlackJ_a(:fin)
  BlackJ_s(4, 7, 2, 0)   => BlackJ_a(:otra)
  BlackJ_s(1, 17, 4, 0)  => BlackJ_a(:fin)
  BlackJ_s(10, 7, 1, 0)  => BlackJ_a(:fin)
  BlackJ_s(1, 22, 4, 0)  => BlackJ_a(:fin)
  BlackJ_s(7, 8, 2, 0)   => BlackJ_a(:otra)
  BlackJ_s(6, 12, 3, 0)  => BlackJ_a(:otra)
  BlackJ_s(7, 12, 3, 1)  => BlackJ_a(:otra)
  BlackJ_s(2, 19, 3, 1)  => BlackJ_a(:fin)
  BlackJ_s(10, 14, 4, 0) => BlackJ_a(:fin)
  BlackJ_s(4, 1, 2, 1)   => BlackJ_a(:fin)
  Bl