In [1]:
# This is a brief and by no means complete introduction to the POMDPs.jl package.
# MDPs are Markov decision processes, described by states S, actions A, and rewards R.
# POMDPs are partially-observable Markov decision processes. In POMDPs, states are uncertain, so we add observations O.
# Applications of POMDPs include collision avoidance systems, path planning, and long-term infrastructure maintenance.
# --------------------------------------------------------------------------------------------------------------------
# Julia v 0.6.0
# Packages used: POMDPs.jl (https://github.com/JuliaPOMDP/POMDPs.jl)
# Tutorials are available for MDPs (the grid world example - http://nbviewer.jupyter.org/github/sisl/POMDPs.jl/blob/master/examples/GridWorld.ipynb)
# and for POMDPs (the tiger problem - http://nbviewer.jupyter.org/github/sisl/POMDPs.jl/blob/master/examples/Tiger.ipynb)

In [2]:
using POMDPs

[1m[36mINFO: [39m[22m[36mRecompiling stale cache file /Users/gitanjali/.julia/lib/v0.6/SortingAlgorithms.ji for module SortingAlgorithms.
[39m[1m[36mINFO: [39m[22m[36mRecompiling stale cache file /Users/gitanjali/.julia/lib/v0.6/POMDPs.ji for module POMDPs.
[39m

In [3]:
# In this example, we will consider how to set up an MDP to calculate optimal decision-making 
# for a portfolio of buildings in earthquake country.
# This example is adapted from the grid world example linked above.

# Initialize an MDP type
type PortfolioMDP <: MDP{Int64, Int64} # MDP{StateType, ActionType}
end

# for simplicity, buildings can be damaged or undamaged

struct PortfolioState 
    b1::Int64 # damage state of Building 1
    b2::Int64 # damage state of Building 2
    done::Bool # terminal state 
end

# checks if the position of two states are the same
sequal(s1::PortfolioState, s2::PortfolioState) = s1.b1 == s2.b1 && s1.b1 == s2.b2

sequal (generic function with 1 method)

In [4]:
# Defining binary states - damaged or not damaged - and setting up an initial state constructor
PortfolioState(b1::Int64, b2::Int64) = PortfolioState(b1,b2, false)

# Defining actions - we can retrofit a building or repair a building, if it is damaged, or do nothing
# a1 - action associated with Building 1; a2 - action associated with Building 2
PortfolioAct(a1::Int64, a2::Int64) = PortfolioAct(a1,a2)

PortfolioAct (generic function with 1 method)

In [5]:
# Defining the building MDP type - we will use this as a data container
type Portfolio <: MDP{PortfolioState, PortfolioAct} # Note that our MDP is parametarized by the state and the action
    size::Int64 # number of buildings in the portfolio
    reward_states::Vector{PortfolioState} # the states in which agent recieves reward
    reward_values::Vector{Float64} # reward values for those states
    tprob::Float64 # probability of transitioning to the desired state
    gamma::Float64 # discount factor - this expresses how much we value future reward relative to present rewards (from 0 to 1)
end

# we use key worded arguments so we can change any of the values we pass in 
function Portfolio(size = 2,
                    reward_states::Vector{PortfolioState}=[PortfolioState(0,0), PortfolioState(0,1), PortfolioState(1,0), PortfolioState(1,1)], # reward states
                    reward_values::Vector{Float64}=rv = [10000,4000,6000,-100000], # reward values
                    tprob::Float64=0.5, # tprob
                    gamma::Float64=0.5) # discount factor
    return Portfolio(size, rs, rv, tp, gamma)
end

Portfolio

In [6]:
# we can now create a Portfolio mdp instance like this:
mdp = Portfolio()
mdp.reward_states # mdp contains all the default values from the constructor

4-element Array{PortfolioState,1}:
 PortfolioState(0, 0, false)
 PortfolioState(0, 1, false)
 PortfolioState(1, 0, false)
 PortfolioState(1, 1, false)

In [7]:
function POMDPs.states(mdp::Portfolio)
    s = PortfolioState[] # initialize an array of PortfolioStates
    # loop over all our states, remember there are two binary variables:
    # done (d)
    for d = 0:1, b1 = 0:1, b2 = 0:1
        push!(s, PortfolioState(b1,b2,d))
    end
    return s
end;

In [8]:
mdp = Portfolio()
state_space = states(mdp);
state_space[1]

PortfolioState(0, 0, false)

In [9]:
POMDPs.actions(mdp::Portfolio) = [0, 1, 2];

In [10]:
function POMDPs.transition(mdp::Portfolio, state::PortfolioState, action::Int64)
    a = action
    x = state.x
    y = state.y
    
    if state.done
        return SparseCat([GridWorldState(b1, b2, true)], [1.0])
    elseif state in mdp.reward_states
        return SparseCat([GridWorldState(b1, b2, true)], [1.0])
    end
    
    prob = 0.7
    
    return prob
end
    

In [11]:
function POMDPs.reward(mdp::Portfolio, state::PortfolioState, action::Int64, statep::PortfolioState)
    if state.done
        return 0.0
    end
    r = 0.0
    n = length(mdp.reward_states)
    for i = 1:n
        if sequal(state, mdp.reward_states[i])
            r += mdp.reward_values[i]
        end
    end
    return r
end;

In [12]:
POMDPs.n_states(mdp::Portfolio) = 2*mdp.size
POMDPs.n_actions(mdp::Portfolio) = 3
POMDPs.discount(mdp::Portfolio) = mdp.gamma;

In [None]:
# install support tools we'll use for simulation
#POMDPs.add("POMDPToolbox")
#Pkg.update("POMDPToolbox")

#mdp = Portfolio()
#mdp.tprob=1.0
#sim(mdp, Portfolio(4,1), max_steps=10) do s
#    println("state is: $s")
#    a = :right
#    println("moving $a")
#    return a
#end;

In [13]:
# first let's load the value iteration module
POMDPs.add("DiscreteValueIteration")
using DiscreteValueIteration

# initialize the problem
mdp = Portfolio()

# initialize the solver
# max_iterations: maximum number of iterations value iteration runs for (default is 100)
# belres: the value of Bellman residual used in the solver (defualt is 1e-3)
solver = ValueIterationSolver(max_iterations=100, belres=1e-3)

# initialize the policy by passing in your problem
policy = ValueIterationPolicy(mdp) 

# solve for an optimal policy
# if verbose=false, the text output will be supressed (false by default)
solve(solver, mdp, policy, verbose=true);

Package already installed


[1m[36mINFO: [39m[22m[36mCloning DiscreteValueIteration from https://github.com/JuliaPOMDP/DiscreteValueIteration.jl
[39m[1m[36mINFO: [39m[22m[36mPrecompiling module POMDPToolbox.
[39m

LoadError: [91mMethodError: no method matching ordered_vector(::#PortfolioAct, ::POMDPToolbox.##51#52{Portfolio}, ::Array{Int64,1}, ::Int64, ::String)[0m
Closest candidates are:
  ordered_vector([91m::Type[39m, ::Function, ::Any, ::Any, ::Any) at /Users/gitanjali/.julia/v0.6/POMDPToolbox/src/model/ordered_spaces.jl:31
  ordered_vector([91m::Type[39m, ::Function, ::Any, ::Any, ::Any, [91m::Any[39m) at /Users/gitanjali/.julia/v0.6/POMDPToolbox/src/model/ordered_spaces.jl:31[39m