# Markov Decision Process

In [6]:
using DataFrames
using Random

### Let us define Monopoly game environment:

In [7]:
 Spaces = [
                ("Go", 0.0, Inf),
                ("Mediterranean Avenue", 2.0, 60.0),
                ("Community Chest", 0.0, Inf),
                ("Baltic Avenue", 4.0, 60.0),
                ("Income Tax", 0.0, Inf),
                ("Reading Railroad", 25.0, 200.0),
                ("Oriental Avenue", 6.0, 100.0),
                ("Chance", 0.0, Inf),
                ("Vermont Avenue", 6.0, 100.0),
                ("Connecticut Avenue", 8.0, 120.0),
                ("Jail", 0.0, Inf),
                ("St. Charles Place", 10.0, 140.0),
                ("Electric Company", 4.0 * 6.0, 150.0),
                ("States Avenue", 10.0, 140.0),
                ("Virginia Avenue", 12.0, 160.0),
                ("Pennsylvania Railroad", 25.0, 200.0),
                ("St. James Place", 14.0, 180.0),
                ("Community Chest", 0.0, Inf),
                ("Tennessee Avenue", 14.0, 180.0),
                ("New York Avenue", 16.0, 200.0),
                ("Free Parking", 0.0, Inf),
                ("Kentucky Avenue", 18.0, 220.0),
                ("Chance", 0.0, Inf),
                ("Indiana Avenue", 18.0, 220.0),
                ("Illinois Avenue", 20.0, 240.0),
                ("B & O Railroad", 25.0, 200.0),
                ("Atlantic Avenue", 22.0, 260.0),
                ("Ventnor Avenue", 22.0, 260.0),
                ("Water Works", 4.0 * 6.0, 150.0),
                ("Marvin Gardens", 24.0, 280.0),
                ("Go To Jail", 0.0, Inf),
                ("Pacific Avenue", 26.0, 300.0),
                ("North Carolina Avenue", 26.0, 300.0),
                ("Community Chest", 0.0, Inf),
                ("Pennsylvania Avenue", 28.0, 320.0),
                ("Short Line", 25.0, 200.0),
                ("Chance", 0.0, Inf),
                ("Park Place", 35.0, 350.0),
                ("Luxury Tax", 0.0, Inf),
                ("Boardwalk", 50.0, 400.0)];

### Example 1.  Monopoly as a Markov Chain

We will start with the Monopoly game defined as a simple Markov chain. Our goal is to find the stationary distribution of this process:

In [8]:
function transition_matrix(n=40)
    #basic transition matrix
    T = zeros(Float64,n,n)
    for i = 1:n
        for j = 2:12
            T[i, mod(i + j - 1,n)+1] = mod(min(j -1,13-j),7)/ 36
        end
    end
    #special matrix
    S = zeros(Float64,n,n)
    for i = 1:n
        if !in(i,(3,18,31,34))
            S[i,i] = 1.0
        elseif i == 31
            S[i,11] = 1.0
        else #community chest
            #advance to go
            S[i,1] = 1.0 / 16.0;
            #go to jail
            S[i,11] = 1.0 / 16.0;
            #stay put
            S[i,i] = 14.0 / 16.0;
        end
    end
    T * S
end

function solve(k,n=40)
    T = transition_matrix(n)
    s = hcat(1.0,zeros(Float64,1,n-1))
    s * T^k
end

solve (generic function with 2 methods)

In [9]:
#first as a simple Markov chain:
solve(100000,40)'


40×1 adjoint(::Matrix{Float64}) with eltype Float64:
 0.02718519653634749
 0.022595593909926576
 0.020088902487865122
 0.02337710167023832
 0.023084775209526613
 0.02297863014830041
 0.02301209610078735
 0.023063894117358544
 0.022929801058082913
 0.02284183819086106
 0.0546613299656088
 0.022917164921724145
 0.023774933942587107
 ⋮
 0.027030468446477315
 0.026967717778825596
 0.0
 0.02688541220906093
 0.026137642782802298
 0.022233994190789353
 0.024666218241423055
 0.023787980616430136
 0.02283673499352182
 0.021779991748910883
 0.02209821074932742
 0.022276335016028846

### Example 2.  Monopoly as a Markov Chain with Rewards

Now, let us add some rewards to the problem. With this simple modification, we will be able to compute the return on investment for every field in the game and discuss which one is the most profitable for a player: 

In [10]:
#add some rewards:
function summary(k = 100000, n=40)
    probs = solve(k,n)
    df = DataFrame(Space = String[], Prob = Float64[], Rent = Float64[], ROI = Float64[])
    for (i,prob) in enumerate(probs)
        rent = prob * Spaces[i][2]
        roi = rent /Spaces[i][3] 
        push!(df,(Spaces[i][1], prob, rent,roi * 100))
    end
    df
end

summary()

Unnamed: 0_level_0,Space,Prob,Rent,ROI
Unnamed: 0_level_1,String,Float64,Float64,Float64
1,Go,0.0271852,0.0,0.0
2,Mediterranean Avenue,0.0225956,0.0451912,0.0753186
3,Community Chest,0.0200889,0.0,0.0
4,Baltic Avenue,0.0233771,0.0935084,0.155847
5,Income Tax,0.0230848,0.0,0.0
6,Reading Railroad,0.0229786,0.574466,0.287233
7,Oriental Avenue,0.0230121,0.138073,0.138073
8,Chance,0.0230639,0.0,0.0
9,Vermont Avenue,0.0229298,0.137579,0.137579
10,Connecticut Avenue,0.0228418,0.182735,0.152279


### Example 3.  Monopoly as a Markov Decision Process

Finally, we will define Monopoly as a Markov Decision Process (MDP). We allow players to take actions, and as a result, we could evaluate different strategies. 

We will discuss three possible strategies:
- buy a property randomly (player 1)
- always buy a property (player 2)
- buy a property if its ROI exceeds threshold $\tau$ (player 3)

In [11]:
#Monopoly simulaton with simple strategies

function simulate_game(τ, n = 40, Spaces = Spaces)
    roi = summary()[!,:ROI]
    owned = zeros(Int,n)
    budget = 1500.0 * ones(3)
    position = ones(Int,3)
    seq = shuffle(1:3)
    while sum(budget.> 0.0) > 1
        for player in seq
            budget[player] ≤ 0.0 && continue
            roll = rand(2:12)
            position[player] =  mod(position[player] + roll - 1,n) + 1
            if position[player] == 31
                position[player] = 11
            elseif in(position[player],(3,18,34)) #community chest
                if rand() ≤ 2.0/16.0 
                    if rand() ≤ 0.5 #advance to go
                        position[player] = 1
                    else #go to jail
                        position[player] = 11
                    end
                end
            else
                owner = owned[position[player]] 
                (Spaces[position[player]][3] == Inf || owner == player) && continue
                if owner == 0
                    price = Spaces[position[player]][3]
                    price ≥ budget[player] && continue
                    #decision time
                    if player == 1  &&  rand() ≤ 0.5 
                        #random strategy (player 1)
                        owned[position[player]] = player
                        budget[player] -= price
                    elseif player == 2 
                        #always buy (player 2)
                        owned[position[player]] = player
                        budget[player] -= price
                    elseif player == 3 && roi[position[player]] > τ 
                        #buy only the best (player 3)
                        owned[position[player]] = player
                        budget[player] -= price
                    end  
                else
                    rent = Spaces[position[player]][2]
                    budget[player] -= rent
                    budget[owner] += rent
                end
                budget[player] ≤ 0.0 && replace!(owned, player => 0)
            end
        end
    end
    return budget
end

simulate_game (generic function with 3 methods)

In [12]:
simulate_game(0.15)

3-element Vector{Float64}:
  -9.0
  -1.0
 170.0

In [14]:
n = 10000
τ₁ = 0.15
res = zeros(3)
for k = 1:n
   res .+= (simulate_game(τ₁) .> 0.0)
end
res ./ n

3-element Vector{Float64}:
 0.3695
 0.2541
 0.3764