In [23]:
# 228 Final Project
# Brian Dobkowski and Bianca Jurewicz

# Initialize Packages
### Be sure to install these packages on your local computer first###

# import POMDPs.jl interface
using POMDPs
# POMDPModelTools has tools that help build the MDP definition
using POMDPModelTools
# POMDPPolicies provides functions to help define simple policies
using POMDPPolicies
# POMDPSimulators provide functions for running MDP simulations
using POMDPSimulators


In [27]:
# Define States
struct GridWorldState 
    x::Int64 # x position
    y::Int64 # y position
    done::Bool # are we in a terminal state?
end

# initial state constructor
GridWorldState(x::Int64, y::Int64) = GridWorldState(x,y,false)
# checks if the position of two states are the same
posequal(s1::GridWorldState, s2::GridWorldState) = s1.x == s2.x && s1.y == s2.y

posequal (generic function with 1 method)

In [28]:
# Define Actions
action = :up # can also be :down, :left, :right

:up

In [29]:
# Define Grid World MDP Type
mutable struct GridWorld <: MDP{GridWorldState, Symbol} # Note that our MDP is parametarized by the state and the action
    size_x::Int64 # x size of the grid
    size_y::Int64 # y size of the grid
    reward_states::Vector{GridWorldState} # the states in which agent recieves reward
    reward_values::Vector{Float64} # reward values for those states
    tprob::Float64 # probability of transitioning to the desired state
    discount_factor::Float64 # disocunt factor
end

In [15]:
# Create a Constructor for GridWorld
#we use key worded arguments so we can change any of the values we pass in 
function GridWorld(;sx::Int64=5, # size_x
                    sy::Int64=5, # size_y
                    rs::Vector{GridWorldState}=[GridWorldState(5,3), GridWorldState(3,4), GridWorldState(3,1), GridWorldState(4,3)], # reward states
                    rv::Vector{Float64}=rv = [10,-10,-10,-2], # reward values
                    tp::Float64=0.8, # tprob
                    discount_factor::Float64=0.9)
    return GridWorld(sx, sy, rs, rv, tp, discount_factor)
end


GridWorld

In [16]:
mdp = GridWorld()
mdp.reward_states # mdp contains all the defualt values from the constructor

LoadError: MethodError: no method matching var"#GridWorld#1"(::Int64, ::Int64, ::Vector{GridWorldState}, ::Vector{Int64}, ::Float64, ::Float64, ::Type{GridWorld})
[0mClosest candidates are:
[0m  var"#GridWorld#1"(::Int64, ::Int64, ::Vector{GridWorldState}, [91m::Vector{Float64}[39m, ::Float64, ::Float64, ::Type{GridWorld}) at In[15]:3

In [21]:
# Define State Space
function POMDPs.states(mdp::GridWorld)
    s = GridWorldState[] # initialize an array of GridWorldStates
    # loop over all our states, remeber there are two binary variables:
    # done (d)
    for d = 0:1, y = 1:mdp.size_y, x = 1:mdp.size_x
        push!(s, GridWorldState(x,y,d))
    end
    return s
end;

In [22]:
mdp = GridWorld()
state_space = states(mdp);
state_space[1]

LoadError: MethodError: no method matching var"#GridWorld#6"(::Int64, ::Int64, ::Vector{GridWorldState}, ::Vector{Int64}, ::Float64, ::Float64, ::Type{GridWorld})
[0mClosest candidates are:
[0m  var"#GridWorld#6"(::Int64, ::Int64, ::Vector{GridWorldState}, [91m::Vector{Float64}[39m, ::Float64, ::Float64, ::Type{GridWorld}) at In[18]:3

In [23]:
# Define Action Space
POMDPs.actions(mdp::GridWorld) = [:up, :down, :left, :right];