# T-Maze Interactive Simulation

This notebook generates the expected reward landscapes of Fig. 10 (Sec. 7.2) for the CBFE and EFE agent.

In [None]:
using LinearAlgebra
using ForneyLab
using Plots
using ProgressMeter

# T-maze layout
# [2| |3]
#   | |
#   |1|
#   |4|

# Constrained Model

In [None]:
T = 2

fg_plan = FactorGraph()

u = Vector{Variable}(undef, T)
x = Vector{Variable}(undef, T)
y = Vector{Variable}(undef, T)

@RV x_t_min ~ Categorical(placeholder(:D_t_min, dims=(8,)))

x_k_min = x_t_min
for k=1:T
    @RV u[k]
    @RV x[k] ~ Transition(x_k_min, u[k])
    @RV y[k] ~ Transition(x[k], placeholder(:A, dims=(16,8), var_id=:A_*k))

    placeholder(u[k], :u, index=k, dims=(8,8))
    Categorical(y[k], placeholder(:C, dims=(16,), index=k, var_id=:C_*k))
    PointMassConstraint(y[k])
    
    x_k_min = x[k]
end
;

In [None]:
q_plan_constrained = PosteriorFactorization(y, [x_t_min; x], ids=[:Y, :X])
algo_plan_constrained = messagePassingAlgorithm(y, id=:PlanConstrained, free_energy=true)
code_plan_constrained = algorithmSourceCode(algo_plan_constrained, free_energy=true)
eval(Meta.parse(code_plan_constrained))
;

# Slide Model

In [None]:
fg_slide = FactorGraph()

u = Vector{Variable}(undef, T)
x = Vector{Variable}(undef, T)
y = Vector{Variable}(undef, T)

@RV x_t_min ~ Categorical(placeholder(:D_t_min, dims=(8,)))
@RV x_t ~ Transition(x_t_min, placeholder(:B_t, dims=(8,8)))
@RV y_t ~ Transition(x_t, placeholder(:A, dims=(16,8)))
placeholder(y_t, :o_t, dims=(16,))
;

In [None]:
q_slide = PosteriorFactorization(fg_slide)
algo_slide = messagePassingAlgorithm(x_t, id=:Slide)
code_slide = algorithmSourceCode(algo_slide)
eval(Meta.parse(code_slide))
;

In [None]:
# println(code_slide) # Uncomment to inspect generated source code for slide step

# Results

In [None]:
N = 2 # Number of moves per simulation
alphas = 0.5:0.025:1.0
# alphas = collect(0.5:0.05:0.999)
# push!(alphas, 0.99) # Avoid α=1.0 for stability reasons
cs = 0.0:0.1:2.0
J = length(alphas)
K = length(cs)
S = 10 # Number of simulations

include("environment.jl")
include("agent.jl")
include("update_rules.jl")
include("helpers.jl")
;

## Constrained Agent

In [None]:
R_con = Matrix{Vector{Float64}}(undef, J, K)
P_con = Matrix{Vector{Vector{Int64}}}(undef, J, K)
@showprogress for j=1:J
    for k=1:K
        (A, B, C, D) = constructABCD(alphas[j], cs[k])
        
        R_con[j,k] = []
        P_con[j,k] = []
        for s = 1:S
            (execute, observe) = initializeWorld(A, B, C, D) # Let there be a world
            (plan, _, act, slide) = initializeAgent(A, B, C, D) # Let there be a constrained agent

            # Step through the experimental protocol
            r_t = 0.0 # Reward
            a = Vector{Int64}(undef, N)
            for t = 1:N
                       F_t = plan()
                      a[t] = act(F_t)
                             execute(a[t])
                (o_t, r_t) = observe()
                             slide(a[t], o_t)
            end

            push!(R_con[j,k], r_t) # Reward is collected after second move
            push!(P_con[j,k], a) # Store executed actions
        end
    end
end
;

In [None]:
p = plotReward(alphas, cs, R_con, dpi=300)
# annotateActions(p, alphas, cs, P_con) # Uncomment to annotate figure with performed actions
savefig("figures/constrained_reward.png")

## EFE Agent

In [None]:
R_efe = Matrix{Vector{Float64}}(undef, J, K)
P_efe = Matrix{Vector{Vector{Int64}}}(undef, J, K)
@showprogress for j=1:J
    for k=1:K
        (A, B, C, D) = constructABCD(alphas[j], cs[k])

        R_efe[j,k] = []
        P_efe[j,k] = []
        for s = 1:S
            (execute, observe) = initializeWorld(A, B, C, D) # Let there be a world
            (_, plan, act, slide) = initializeAgent(A, B, C, D) # Let there be an EFE agent

            # Step through the experimental protocol
            r_t = 0.0 # Reward
            a = Vector{Int64}(undef, N)
            for t = 1:N
                       G_t = plan()
                      a[t] = act(G_t)
                             execute(a[t])
                (o_t, r_t) = observe()
                             slide(a[t], o_t)
            end

            push!(R_efe[j,k], r_t) # Reward is collected after second move
            push!(P_efe[j,k], a) # Store actions
        end
    end
end
;

In [None]:
p = plotReward(alphas, cs, R_efe)
# annotateActions(p, alphas, cs, P_efe) # Uncomment to annotate figure with performed actions
savefig("figures/efe_reward.png")