# Electric Vehicle Charge Scheduling MDP

### Dependencies

You will need to install POMDPs and POMDPToolbox locally before being able to run this notebook. This can be done by running the following in your local Julia 1.0 REPL: 
    - Pkg.add("POMDPs")
    - Pkg.add("POMDPModelTools")
    - Pkg.add("POMDPSimulators")

In [1]:
using POMDPs, POMDPModelTools, POMDPSimulators, Random, Plots, DiscreteValueIteration

### State Structure
Define state structure / make initial constructor

In [2]:
mutable struct evState
    p::Vector{Bool} # array of whether cars are present
    c::Vector{Int64} # array of charge in each car
    renew::Int64 # renewable energy level
    t::Int64 # time
    done::Bool # are we in a terminal state
end

# initial state constructor
evState(p,c,renew::Int64,t::Int64) = evState(p,c,renew,t,false)

evState

### MDP Structure
Define MDP structure with everything you would need / make initial constructor

In [3]:
struct evMDP <: MDP{evState,Vector{Bool}} 
    n::Int64 # number of cars
    T::Int64 # number of timesteps
    renew_levels::Int64 # number of renewable mixture levels, 0:renew_levels
    charge_levels::Int64 # number of charge levels, 0:charge_levels
    λ::Float64 # terminal reward weighting
    addRenewFunc # function handle for adding renewable energy, should be function of t (current time step),T (final time step)
end

# we use key worded arguments so we can change any of the values we pass in 
function evMDP(;n::Int64 = 3, # number of cars
                T::Int64 = 6, # number of timesteps
                renew_levels::Int64 = 3, # number of renewable mixture levels, 0:renew_levels
                charge_levels::Int64 = 3, # number of charge levels, 0:charge_levels
                λ::Float64 = 10.0, #energy reward weighting
                addRenewFunc = addZeroRenew) # function for adding reward 
    return evMDP(n, T, renew_levels, charge_levels, λ, addRenewFunc)
end


evMDP

### States
Define all possible states

In [4]:
# convert number to an array of numbers using requested base system, array length
function num2array(number,base,array_length)
    base==2 ? finalarray = zeros(Bool, array_length) : finalarray = zeros(Int64, array_length)
    idx=1
    while number > 0
        finalarray[idx] = rem(number,base)
        number = div(number,base)
        idx+=1
    end
    return finalarray
end

function POMDPs.states(mdp::evMDP)
    s = [] # initialize an array of GridWorldStates
    
    # add every possible state. This includes every possible combination of present/charge array
    
    for iP = 0:(2^mdp.n-1)
        present = num2array(iP,2,mdp.n)
        
        for iC = 0:((mdp.charge_levels+1)^mdp.n-1)
            charge = num2array(iC,(mdp.charge_levels+1),mdp.n)
            
            for rl=0:mdp.renew_levels, t=1:mdp.T
            
                # if in final time, make sure the done flag is set on
                t==mdp.T ? push!(s,evState(present, charge, rl, t, true)) : push!(s,evState(present, charge, rl, t)) 
            end
        end
    end
    return s
end


In [5]:
POMDPs.initialstate(mdp::evMDP, rng::AbstractRNG) = evState(zeros(Bool,mdp.n),zeros(Int64,mdp.n), mdp.renew_levels, 1)
POMDPs.initialstate(mdp::evMDP) = POMDPs.initialstate(mdp,MersenneTwister(1))

### Actions
Define all possible action vectors

In [6]:
function POMDPs.actions(mdp::evMDP)
    # initialize empty action space a
    a = []    
    # populate with all combinations of actions, ex [true, false, true, true]
    for iA=0:(2^mdp.n-1)
        push!(a,num2array(iA,2,mdp.n))
    end
    return a
end
    

### Reward Function
Define the reward function

In [78]:
function POMDPs.reward(mdp::evMDP, state::evState, action::Vector{Bool}, statep::evState)
    r = mdp.λ*statep.renew 
    if statep.done
        max_c = mdp.charge_levels
        for i in 1:length(state.p)
            if state.p[i] == true
                r -= exp((max_c-state.c[i])/max_c)
            end
        end
    end
    return r
end

### Transition Function
Define the next-state transition probabilities (this is the hard one)

In [8]:

# chance of a car appearing in any slot at time t given total timescale T
carAppearProb(t,T) = 1/(1+exp(-20(t-2)/T))

# function takes a state to generate different charge probabilities for, the previous presence array, the mdp, 
# and the probability of transitioning to 
function carAppearStates(baseState,prevP,mdp,probability)
    
    newCarIdxs = findall(baseState.p .!= prevP)
    
    # distribute probabilities uniformly over possible charge states
    probs = zeros(Int64,mdp.charge_levels^length(newCarIdxs)) .+ probability/(mdp.charge_levels^length(newCarIdxs))
    
    chargeCombs = [baseState.c[:]]
    for ind in newCarIdxs
        chargeCombsPrev = chargeCombs
        
        chargeCombs = []
        for charge in chargeCombsPrev, level in 0:(mdp.charge_levels-1)
            addition = charge[:]
            addition[ind] = level
            push!(chargeCombs,addition)
        end
    end
    
    state_vec = evState[]
    for charge in chargeCombs
        push!(state_vec,evState(baseState.p, charge, baseState.renew, baseState.t, baseState.done))
    end
    return state_vec, probs
end
    
    

carAppearStates (generic function with 1 method)

In [9]:
prevP = [true, false, true, false]

newP = [true, true, true, true]
newC = [3, 4, 4, 4]
baseState = evState(newP,newC,0,1,false)
test_mdp = evMDP(4,4,4,4,0.1)

states,probs = carAppearStates(baseState,prevP,test_mdp,1.0)
println(states)
println(probs)

MethodError: MethodError: no method matching evMDP(::Int64, ::Int64, ::Int64, ::Int64, ::Float64)
Closest candidates are:
  evMDP(::Int64, ::Int64, ::Int64, ::Int64, ::Float64, !Matched::Any) at In[3]:2
  evMDP(::Any, ::Any, ::Any, ::Any, ::Any, !Matched::Any) at In[3]:2

In [10]:
function getNextPs(p,car_prob)
    wherenocars = findall(iszero,p)
    p_next = [p]
    for ind in wherenocars
        p_next_prev = p_next
        p_next = []
        for p in p_next_prev
            push!(p_next,p)
            state_new = p[:]
            state_new[ind] = true
            push!(p_next,state_new)        
        end
    end
    
    
    # calculate probability
    num_initial_spaces = length(wherenocars)
    num_new_cars = []
    for p_check in p_next
        n_new_cars = count(p_check .!= p)
        push!(num_new_cars,n_new_cars)
    end
    probs = [(car_prob^n)*(1-car_prob)^(num_initial_spaces-n) for n in num_new_cars]
    
    return p_next, probs
    
end

getNextPs (generic function with 1 method)

In [11]:
p, pr = getNextPs([true, false, false],.75)
println(p)
println(pr)

Any[Bool[true, false, false], Bool[true, false, true], Bool[true, true, false], Bool[true, true, true]]
[0.0625, 0.1875, 0.1875, 0.5625]


In [65]:
addZeroRenew(t,T) = 0
addRenewFirstHalf(t,T) = 1*(t<=T/2)
addRenewSecondHalf(t,T) = 1*(t>T/2)

function POMDPs.transition(mdp::evMDP, state::evState, action::Vector{Bool})
    
    # deterministic transitions
    
    # time and teriminality
    t_next = state.t + 1
    t_next == mdp.T ? done_bool = true : done_bool = false
    
    # energy level
    renew_next = min(max(0,round.(Int8,state.renew + mdp.addRenewFunc(state.t,mdp.T) - length(findall(action))/mdp.n)),mdp.renew_levels)
    # delta_charge = 0.5/4 # Amount of energy lost per charge per car as a fraction of total charge level
    #renew_next = min(max(0,round.(Int8,state.renew + mdp.addRenewFunc(state.t,mdp.T) - length(findall(action))*delta_charge*mdp.renew_level),mdp.renew_levels))
    
    # charge in each car
    # c_next = round.(Int8, state.c + action*delta_charge*mdp.renew_levels)
    c_next = min.(mdp.charge_levels, state.c + action.*state.p) # increment by one if charge action taken when car present, and cap at max charge
    
    # probabilistic transitions
    
    # car presence
    appear_prob = carAppearProb(t_next,mdp.T)
    p_next, probs = getNextPs(state.p[:],appear_prob)
    
    # build next state array
    next_states = [evState(p_new,c_next,renew_next,t_next,done_bool) for p_new in p_next]
    # updated_states, updated_probs = [carAppearStates(baseState,state.p,mdp,probability) for baseState,probability in zip(next_states,probs)] ]
    new_states = []
    new_probs = Float64[]
    
    for i in 1:length(next_states)
        updated_state, updated_prob = carAppearStates(next_states[i],state.p,mdp,probs[i])
        push!(new_states,updated_state...)
        push!(new_probs,updated_prob...)
    end
    
    # new_states, new_probs = [s[:],p[:] for s,p in zip(updated_states,updated_probs)]
    # add section to incorporate different possible start charges

    return SparseCat(new_states,new_probs)
end

In [13]:
test_mdp = evMDP(4,4,4,4,0.1)


P = [true, false, true, false]
action = [true, true, true, true]
C = [3, 0, 4, 0]
baseState = evState(P,C,0,1,false)

POMDPs.transition(test_mdp,baseState,action)


MethodError: MethodError: no method matching evMDP(::Int64, ::Int64, ::Int64, ::Int64, ::Float64)
Closest candidates are:
  evMDP(::Int64, ::Int64, ::Int64, ::Int64, ::Float64, !Matched::Any) at In[3]:2
  evMDP(::Any, ::Any, ::Any, ::Any, ::Any, !Matched::Any) at In[3]:2

### Miscellaneous Functions
Define other functions that POMDPs.jl needs

In [14]:
POMDPs.n_states(mdp::evMDP) = 2^mdp.n*(mdp.charge_levels+1)^mdp.n*(mdp.renew_levels+1)*mdp.T
POMDPs.n_actions(mdp::evMDP) = 2^mdp.n
POMDPs.discount(mdp::evMDP) = 1.
POMDPs.isterminal(mdp::evMDP, s::evState) = s.done

In [15]:
# define state and action indexing
function indVal(base, a)
    ind = 1
    for i in 1:length(a)
        ind += a[i]*base^(i-1)
    end
    return ind
end    

function POMDPs.stateindex(mdp::evMDP, state::evState)
    indP = indVal(2,state.p)
    indC = indVal(1+mdp.charge_levels,state.c)
    indR = state.renew + 1
    indT = state.t
    maxP = 2^(mdp.n)
    maxC = (mdp.charge_levels + 1)^(mdp.n)
    maxR = mdp.renew_levels + 1
    maxT = mdp.T
    sInd = indP + (maxP*(indC-1)) + (maxC*maxP*(indR-1)) + (maxR*maxC*maxP*(indT-1))
    return sInd
end

function POMDPs.actionindex(mdp::evMDP, act::Vector{Bool})
    return indVal(2,act)
end

### Implement Solvers / Simulators

In [16]:
# initialize the problem
mdp = evMDP(n,renew_levels=4)
#=Defaults
n::Int64 = 3, # number of cars
T::Int64 = 6, # number of timesteps         
renew_levels::Int64 = 3, # number of renewable mixture levels, 0:renew_levels
charge_levels::Int64 = 3, # number of charge levels, 0:charge_levels
λ::Float64 = 10.0)
=#
@requirements_info ValueIterationSolver() mdp


solver = ValueIterationSolver(max_iterations=100, belres=1e-6, verbose=true) # initializes the Solver type
policy = solve(solver, mdp) # runs value iterations

# initialize the policy by passing in your problem
# policy = ValueIterationPolicy(mdp) 

# solve for an optimal policy
# if verbose=false, the text output will be supressed (false by default)
# solve(solver, mdp, policy, verbose=true);

UndefVarError: UndefVarError: n not defined

### Simulations

In [17]:
solve_evMDP(mdp) = solve(ValueIterationSolver(max_iterations=100, belres=1e-6, verbose=true), mdp)

function simulate_evMDP(mdp,policy;seed=1)
    history = simulate(HistoryRecorder(max_steps=100,rng = MersenneTwister(seed)), mdp, policy, POMDPs.initialstate(mdp))

    counter = 0
    ev = zeros(Int8,mdp.T-1,mdp.n)
    # look at what happened
    for (s, a, r) in eachstep(history, "(s, a, r)")
        counter +=1
        ev[counter,:] = s.c + s.p .- 1
        println("State was $s,")
        println("Reward was $r,")
        println("action $a was taken,")
    end
    return ev
end

simulate_evMDP (generic function with 1 method)

In [18]:
ev = simulate_evMDP(mdp,policy,seed=5)
heatmap(ev)

UndefVarError: UndefVarError: mdp not defined

## Example initializing, solving, simulating, and plotting

In [87]:
test_mdp = evMDP(λ=50.,T=7)
policy = solve_evMDP(test_mdp)

[Iteration 1   ] residual:        150 | iteration runtime:   4194.960 ms, (      4.19 s total)
[Iteration 2   ] residual:        150 | iteration runtime:   3973.028 ms, (      8.17 s total)
[Iteration 3   ] residual:        150 | iteration runtime:   3517.122 ms, (      11.7 s total)
[Iteration 4   ] residual:        150 | iteration runtime:   3479.505 ms, (      15.2 s total)
[Iteration 5   ] residual:        150 | iteration runtime:   3454.433 ms, (      18.6 s total)
[Iteration 6   ] residual:        147 | iteration runtime:   5017.243 ms, (      23.6 s total)
[Iteration 7   ] residual:          0 | iteration runtime:   4374.227 ms, (        28 s total)


ValueIterationPolicy{Float64}([-3.01083 -3.01083 … -3.01083 -3.01083; -3.00722 -3.00722 … -3.00722 -3.00722; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [-3.01083, -3.00722, -3.00722, -3.00361, -3.00722, -3.00361, -3.00361, -3.0, -3.01083, -3.00722  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  1, 1, 1, 1, 1, 1, 1, 1, 1, 1], Array{Bool,1}[[false, false, false], [true, false, false], [false, true, false], [true, true, false], [false, false, true], [true, false, true], [false, true, true], [true, true, true]], true, evMDP(3, 7, 3, 3, 50.0, addZeroRenew))

In [88]:
ev = simulate_evMDP(test_mdp,policy,seed=11)

heatmap(ev)
title!("Charge level in $(test_mdp.n)-car simulation with lambda = $(test_mdp.λ)")
yaxis!("Time step")
xaxis!("Car Number")
xticks!(1:test_mdp.n)
savefig("NoSunLambda50.png")

State was evState(Bool[false, false, false], [0, 0, 0], 3, 1, false),
Reward was 150.0,
action Bool[false, false, false] was taken,
State was evState(Bool[false, true, true], [0, 0, 2], 3, 2, false),
Reward was 150.0,
action Bool[false, true, false] was taken,
State was evState(Bool[true, true, true], [1, 1, 2], 3, 3, false),
Reward was 150.0,
action Bool[true, false, false] was taken,
State was evState(Bool[true, true, true], [2, 1, 2], 3, 4, false),
Reward was 150.0,
action Bool[true, false, false] was taken,
State was evState(Bool[true, true, true], [3, 1, 2], 3, 5, false),
Reward was 150.0,
action Bool[false, true, false] was taken,
State was evState(Bool[true, true, true], [3, 2, 2], 3, 6, false),
Reward was 146.20877514982783,
action Bool[false, false, false] was taken,


In [81]:
test_mdp = evMDP(λ=1.,T=7)
policy = solve_evMDP(test_mdp)

[Iteration 1   ] residual:       8.15 | iteration runtime:   4219.489 ms, (      4.22 s total)
[Iteration 2   ] residual:       6.38 | iteration runtime:   3504.327 ms, (      7.72 s total)
[Iteration 3   ] residual:       5.61 | iteration runtime:   3521.447 ms, (      11.2 s total)
[Iteration 4   ] residual:       4.84 | iteration runtime:   3867.667 ms, (      15.1 s total)
[Iteration 5   ] residual:       4.29 | iteration runtime:   3543.263 ms, (      18.7 s total)
[Iteration 6   ] residual:       3.74 | iteration runtime:   3456.291 ms, (      22.1 s total)
[Iteration 7   ] residual:          0 | iteration runtime:   3518.146 ms, (      25.6 s total)


ValueIterationPolicy{Float64}([-3.01083 -3.01083 … -3.01083 -3.01083; -3.00722 -3.00722 … -3.00722 -3.00722; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [-3.01083, -3.00722, -3.00722, -3.00361, -3.00722, -3.00361, -3.00361, -3.0, -3.01083, -3.00722  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  1, 1, 1, 1, 1, 1, 1, 1, 1, 1], Array{Bool,1}[[false, false, false], [true, false, false], [false, true, false], [true, true, false], [false, false, true], [true, false, true], [false, true, true], [true, true, true]], true, evMDP(3, 7, 3, 3, 1.0, addZeroRenew))

In [82]:
ev = simulate_evMDP(test_mdp,policy,seed=11)

heatmap(ev)
title!("Charge level in $(test_mdp.n)-car simulation with lambda = $(test_mdp.λ)")
yaxis!("Time step")
xaxis!("Car Number")
xticks!(1:test_mdp.n)
savefig("NoSunLambda1.png")

State was evState(Bool[false, false, false], [0, 0, 0], 3, 1, false),
Reward was 3.0,
action Bool[false, false, false] was taken,
State was evState(Bool[false, true, true], [0, 0, 2], 3, 2, false),
Reward was 3.0,
action Bool[false, true, false] was taken,
State was evState(Bool[true, true, true], [1, 1, 2], 3, 3, false),
Reward was 3.0,
action Bool[true, false, false] was taken,
State was evState(Bool[true, true, true], [2, 1, 2], 3, 4, false),
Reward was 3.0,
action Bool[true, false, false] was taken,
State was evState(Bool[true, true, true], [3, 1, 2], 3, 5, false),
Reward was 3.0,
action Bool[false, true, false] was taken,
State was evState(Bool[true, true, true], [3, 2, 2], 3, 6, false),
Reward was -0.791224850172179,
action Bool[false, false, false] was taken,


In [83]:
test_mdp = evMDP(λ=0.01,T=7)
policy = solve_evMDP(test_mdp)

[Iteration 1   ] residual:       8.15 | iteration runtime:   4061.370 ms, (      4.06 s total)
[Iteration 2   ] residual:       6.06 | iteration runtime:   3725.500 ms, (      7.79 s total)
[Iteration 3   ] residual:       4.35 | iteration runtime:   3574.845 ms, (      11.4 s total)
[Iteration 4   ] residual:       3.42 | iteration runtime:   3570.414 ms, (      14.9 s total)
[Iteration 5   ] residual:       3.06 | iteration runtime:   7285.177 ms, (      22.2 s total)
[Iteration 6   ] residual:       3.03 | iteration runtime:   4116.010 ms, (      26.3 s total)
[Iteration 7   ] residual:          0 | iteration runtime:   3510.401 ms, (      29.8 s total)


ValueIterationPolicy{Float64}([-3.01083 -3.01083 … -3.01083 -3.01083; -3.00722 -3.00722 … -3.00722 -3.00722; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [-3.01083, -3.00722, -3.00722, -3.00361, -3.00722, -3.00361, -3.00361, -3.0, -3.01083, -3.00722  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  1, 1, 1, 1, 1, 1, 1, 1, 1, 1], Array{Bool,1}[[false, false, false], [true, false, false], [false, true, false], [true, true, false], [false, false, true], [true, false, true], [false, true, true], [true, true, true]], true, evMDP(3, 7, 3, 3, 0.01, addZeroRenew))

In [84]:
ev = simulate_evMDP(test_mdp,policy,seed=11)

heatmap(ev)
title!("Charge level in $(test_mdp.n)-car simulation with lambda = $(test_mdp.λ)")
yaxis!("Time step")
xaxis!("Car Number")
xticks!(1:test_mdp.n)
savefig("NoSunLambda0p1.png")

State was evState(Bool[false, false, false], [0, 0, 0], 3, 1, false),
Reward was 0.03,
action Bool[false, false, false] was taken,
State was evState(Bool[false, true, true], [0, 0, 2], 3, 2, false),
Reward was 0.03,
action Bool[false, true, false] was taken,
State was evState(Bool[true, true, true], [1, 1, 2], 3, 3, false),
Reward was 0.03,
action Bool[true, false, false] was taken,
State was evState(Bool[true, true, true], [2, 1, 2], 3, 4, false),
Reward was 0.03,
action Bool[false, true, false] was taken,
State was evState(Bool[true, true, true], [2, 2, 2], 3, 5, false),
Reward was 0.02,
action Bool[true, true, true] was taken,
State was evState(Bool[true, true, true], [3, 3, 3], 2, 6, false),
Reward was -2.98,
action Bool[false, false, false] was taken,


In [74]:
println(test_mdp)
state = evState(Bool[true, true, true], [1, 1, 2], 3, 5, false)
action = Bool[true, true, false]
POMDPs.transition(test_mdp,state,action)

evMDP(3, 7, 3, 3, 0.01, addZeroRenew)


SparseCat{Array{Any,1},Array{Float64,1}}(Any[evState(Bool[true, true, true], [2, 2, 2], 2, 6, false)], [1.0])

In [85]:
# addRenewFunc options: addZeroRenew (default), addRenewFirstHalf, addRenewSecondHalf
test_mdp = evMDP(λ=50.,T=7,addRenewFunc = addRenewFirstHalf)
policy = solve_evMDP(test_mdp)

[Iteration 1   ] residual:        150 | iteration runtime:   3512.654 ms, (      3.51 s total)
[Iteration 2   ] residual:        150 | iteration runtime:   3838.480 ms, (      7.35 s total)
[Iteration 3   ] residual:        150 | iteration runtime:   3898.921 ms, (      11.3 s total)
[Iteration 4   ] residual:        150 | iteration runtime:   5081.486 ms, (      16.3 s total)
[Iteration 5   ] residual:        150 | iteration runtime:   5538.656 ms, (      21.9 s total)
[Iteration 6   ] residual:        147 | iteration runtime:   6062.133 ms, (      27.9 s total)
[Iteration 7   ] residual:          0 | iteration runtime:   4985.911 ms, (      32.9 s total)


ValueIterationPolicy{Float64}([746.108 746.108 … 446.108 446.108; 745.744 746.191 … 445.744 446.191; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [746.108, 746.191, 746.191, 745.761, 746.191, 745.761, 745.761, 745.261, 746.108, 746.556  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1, 2, 3, 2, 5, 2, 3, 2, 1, 2  …  1, 1, 1, 1, 1, 1, 1, 1, 1, 1], Array{Bool,1}[[false, false, false], [true, false, false], [false, true, false], [true, true, false], [false, false, true], [true, false, true], [false, true, true], [true, true, true]], true, evMDP(3, 7, 3, 3, 50.0, addRenewFirstHalf))

In [86]:
ev = simulate_evMDP(test_mdp,policy,seed=11)

heatmap(ev)
title!("Charge level with lambda = $(test_mdp.λ), added renewables")
yaxis!("Time step")
xaxis!("Car Number")
xticks!(1:test_mdp.n)
savefig("SunnyFirstHalf.png")

State was evState(Bool[false, false, false], [0, 0, 0], 3, 1, false),
Reward was 150.0,
action Bool[false, false, false] was taken,
State was evState(Bool[false, true, true], [0, 0, 2], 3, 2, false),
Reward was 150.0,
action Bool[false, true, false] was taken,
State was evState(Bool[true, true, true], [1, 1, 2], 3, 3, false),
Reward was 150.0,
action Bool[true, true, true] was taken,
State was evState(Bool[true, true, true], [2, 2, 3], 3, 4, false),
Reward was 150.0,
action Bool[true, false, false] was taken,
State was evState(Bool[true, true, true], [3, 2, 3], 3, 5, false),
Reward was 150.0,
action Bool[false, true, false] was taken,
State was evState(Bool[true, true, true], [3, 3, 3], 3, 6, false),
Reward was 147.0,
action Bool[false, false, false] was taken,


In [77]:
length(POMDPs.states(test_mdp))

14336