In [1]:
# Bibliotecas
import sys, os, pandas as pd, numpy as np
from os.path import exists

# Ambiente
sys.path.append('../ppgsi_mdp_risk')
from ppgsi_mdp_risk.env.SimpleMDP import SimpleMDP
from ppgsi_mdp_risk.models.ExponentialUtility_RSPI import ExponentialUtility_RSPI
from ppgsi_mdp_risk.models.ExponentialUtility_RSVI import ExponentialUtility_RSVI
from ppgsi_mdp_risk.models.CVAR_PECVaR import CVAR_PECVaR

# Utils
import ppgsi_mdp_risk.rl_utils.NBPlotting as nbp
import ppgsi_mdp_risk.rl_utils.NBManipulation as nbm
import ppgsi_mdp_risk.rl_utils.NBEvaluations as nbe

# Configuration Region
from ppgsi_mdp_risk.function import FunctionFactory
from ppgsi_mdp_risk.curves import EquivalentCostCurve

# Reload
%load_ext autoreload
%autoreload 2

## Parâmetros

In [2]:
# Configurações do ambiente
states, actions = 1, 1
epsilon = 0.000001

# Define Custo (C)
C = {}
for a in range(0, actions):
    C[a] = a + 1
    
c = C[0]

# Define Probabilidade (p)
p = 0.5

# Cria MDP
s_mdp = SimpleMDP(num_states = states, 
                  num_actions = actions, 
                  _fixed_probability = p, 
                  _float_probability = 0.1)

T = s_mdp.build_transition_probabilities()

config_reduzida = False

# Configurações de processamento
run_cvar = True

# Configurações dos modelos
discount_factor = 0.999

rini_p = 0.05
rend_p = 1
step_p = 0.05
range_x = [np.round(v, 2) for v in np.arange(rini_p, rend_p, step_p)]

# Configuration Region
_ecc = {}

nm_exponential_function = 'ExponentialFunction'
_ff = FunctionFactory.FunctionFactory()
_exp = _ff.get_function(nm_exponential_function)
_ecc[nm_exponential_function] = EquivalentCostCurve.EquivalentCostCurve(_exp, nm_exponential_function, rini_p=rini_p, rend_p=rend_p, step_p=step_p)

# 1. Teste Sanidade

## 1.1. Exponential Utility Function

$$
V(s) = \frac{e^{\lambda c} \times p}{(1 - e^{\lambda c} \times (1-p))}
$$

In [3]:
EUF_RSVI = {}
    
# range_lambda = [np.round(-0.5 + 0.5*i/10, 2) for i in range(0, 20)] 
range_lambda = [-0.9, -0.5, -0.1, 0.1, 0.5]

for vl_lambda in range_lambda:
    print(f'Processando: Lambda [{vl_lambda}]', end='\r')
    EUF_RSVI[vl_lambda] = \
        nbe.run_driving_license(s_mdp, T, C, actions, model=ExponentialUtility_RSVI, model_name='EUF_RSVI',
        vl_lambda=vl_lambda, epsilon=epsilon, _log=True)

display(EUF_RSVI.keys())

df_EUF_RSVI = nbm.build_dataframe_driver_license(EUF_RSVI)
display(df_EUF_RSVI)

Número de Iterações: 11......
Número de Iterações: 13......
Número de Iterações: 16......
Número de Iterações: 21......
Número de Iterações: 63......


dict_keys([-0.9, -0.5, -0.1, 0.1, 0.5])

Unnamed: 0,0,sG
Policy -0.9,0,0
Policy -0.5,0,0
Policy -0.1,0,0
Policy 0.1,0,0
Policy 0.5,0,0


In [4]:
vl_lambda = 0.1
policy_value_configuration_region = _exp.get_value_function(vl_lambda, p, c)
policy_value_algorithm = EUF_RSVI[vl_lambda]._policy_value[0][0]

print(f'Policy Value w/ Configuration Region Method: {policy_value_configuration_region}')
print(f'Policy Value w/ Algorithm: {policy_value_algorithm[-1]}')

print(f'Diferença: {policy_value_algorithm[-1] - policy_value_configuration_region}')

Policy Value w/ Configuration Region Method: 1.2350637014377652
Policy Value w/ Algorithm: 1.235062786115401
Diferença: -9.153223641256147e-07


# 1. Teste Sanidade - CVaR

In [45]:
_cvar_test = \
    nbe.run_driving_license(s_mdp, T, C, actions, model=CVAR_PECVaR, model_name='CVAR_PECVaR',
        alpha=0.99, discount_factor=0.99, max_iter=100, _log=True)

Número de Iterações: 1......


In [46]:
actions

1

In [47]:
_cvar_test.CVaR0

{0: 63.39676587267702, 'sG': 62.762798213950255}

In [48]:
_cvar_test.V

{0: 1.9801980198019802, 'sG': 0.0}

In [49]:
_cvar_test._iteration

100

In [50]:
_cvar_test.PI

{0: 0, 'sG': 0}