In [1]:
import numpy as np
from TMDP import TMDP
from River_swim import River

from algorithms import *
from model_functions import *

import matplotlib.pyplot as plt
import sympy as sp

#np.set_printoptions(precision=4)
import math
from utils import *



Current seed for result reproducibility: 78700542058381360565023510925848458289


In [2]:
# Definition of the teleport bound equation
alpha, policy_adv, tau_prime, tau, model_adv, gamma, d_inf_model, d_inf_policy, d_exp_model, d_exp_policy, delta_U = sp.symbols('alpha policy_adv tau_prime tau model_adv gamma d_inf_model d_inf_policy d_exp_model d_exp_policy delta_U')
teleport_bound = (alpha * policy_adv + (tau- tau_prime) * model_adv)/(1-gamma) - (gamma*(tau+tau_prime)*d_exp_model)/(1-gamma) - gamma*delta_U/(2*(1-gamma)**2) * (alpha**2*d_exp_policy*d_inf_policy 
                                                      + alpha*(tau-tau_prime)*d_exp_policy*d_inf_model 
                                                      + alpha*(tau-tau_prime)*d_exp_model*d_inf_policy 
                                                      + gamma*(tau-tau_prime)**2*d_exp_model*d_inf_model
                                                      )
# First order derivatives
b_par_alpha = sp.diff(teleport_bound, alpha)
b_par_tau_prime = sp.diff(teleport_bound, tau_prime)

# Second order derivatives
b_par_alpha_2 = sp.diff(b_par_alpha, alpha)
b_par_alpha_tau_prime = sp.diff(b_par_alpha, tau_prime)
b_par_tau_prime_alpha = sp.diff(b_par_tau_prime, alpha)
b_par_tau_prime_2 = sp.diff(b_par_tau_prime, tau_prime)



In [3]:
# Solution of first order derivatives
alpha_tau = sp.solve(b_par_alpha.subs(tau_prime, tau), alpha) # tau_prime = tau
print(sp.factor(sp.simplify(alpha_tau[0])))
alpha_0 = sp.solve(b_par_alpha.subs(tau_prime, 0), alpha) # tau_prime = 0
print(sp.factor(sp.simplify(alpha_0[0])))

tau_prime_0 = sp.solve(b_par_tau_prime.subs(alpha, 0), tau_prime) # alpha = 0
print(sp.factor(sp.simplify(tau_prime_0[0])))
tau_prime_1 = sp.solve(b_par_tau_prime.subs(alpha, 1), tau_prime) # alpha = 1
print(sp.factor(sp.simplify(tau_prime_1[0])))

-policy_adv*(gamma - 1)/(d_exp_policy*d_inf_policy*delta_U*gamma)
-(d_exp_model*d_inf_policy*delta_U*gamma*tau + d_exp_policy*d_inf_model*delta_U*gamma*tau + 2*gamma*policy_adv - 2*policy_adv)/(2*d_exp_policy*d_inf_policy*delta_U*gamma)
(d_exp_model*d_inf_model*delta_U*gamma**2*tau + d_exp_model*gamma**2 - d_exp_model*gamma + gamma*model_adv - model_adv)/(d_exp_model*d_inf_model*delta_U*gamma**2)
(2*d_exp_model*d_inf_model*delta_U*gamma**2*tau + d_exp_model*d_inf_policy*delta_U*gamma + 2*d_exp_model*gamma**2 - 2*d_exp_model*gamma + d_exp_policy*d_inf_model*delta_U*gamma + 2*gamma*model_adv - 2*model_adv)/(2*d_exp_model*d_inf_model*delta_U*gamma**2)


In [4]:
# Substituting the solutions of the first order derivatives in the teleport bound equation
b_alpha_tau = teleport_bound.subs({alpha: alpha_tau[0], tau_prime: tau}) # alpha = alpha_tau, tau_prime = tau
print(b_alpha_tau)

b_alpha_0 = teleport_bound.subs({alpha: alpha_0[0], tau_prime: 0}) # alpha = alpha_0, tau_prime = 0
print(sp.factor(sp.simplify(b_alpha_0)))

print("\n")
b_tau_prime_0 = teleport_bound.subs({alpha: 0, tau_prime: tau_prime_0[0]}) # alpha = 0, tau_prime = tau_prime_0
print(b_tau_prime_0)
print("\n")

b_tau_prime_1 = teleport_bound.subs({alpha: 1, tau_prime: tau_prime_1[0]}) # alpha = 1, tau_prime = tau_prime_1
print(sp.factor(sp.simplify(b_tau_prime_1)))

-2*d_exp_model*gamma*tau/(1 - gamma) + policy_adv**2/(2*d_exp_policy*d_inf_policy*delta_U*gamma)
(d_exp_model**2*d_inf_policy**2*delta_U**2*gamma**2*tau**2 - 4*d_exp_model*d_exp_policy*d_inf_model*d_inf_policy*delta_U**2*gamma**3*tau**2 + 2*d_exp_model*d_exp_policy*d_inf_model*d_inf_policy*delta_U**2*gamma**2*tau**2 + 8*d_exp_model*d_exp_policy*d_inf_policy*delta_U*gamma**3*tau - 8*d_exp_model*d_exp_policy*d_inf_policy*delta_U*gamma**2*tau + 4*d_exp_model*d_inf_policy*delta_U*gamma**2*policy_adv*tau - 4*d_exp_model*d_inf_policy*delta_U*gamma*policy_adv*tau + d_exp_policy**2*d_inf_model**2*delta_U**2*gamma**2*tau**2 + 4*d_exp_policy*d_inf_model*delta_U*gamma**2*policy_adv*tau - 4*d_exp_policy*d_inf_model*delta_U*gamma*policy_adv*tau - 8*d_exp_policy*d_inf_policy*delta_U*gamma**2*model_adv*tau + 8*d_exp_policy*d_inf_policy*delta_U*gamma*model_adv*tau + 4*gamma**2*policy_adv**2 - 8*gamma*policy_adv**2 + 4*policy_adv**2)/(8*d_exp_policy*d_inf_policy*delta_U*gamma*(gamma - 1)**2)


-d_exp_m