In [1]:
# Bibliotecas
import sys, os, pandas as pd, numpy as np
from os.path import exists
sys.path.append('../')

# Ambiente
from env.DrivingLicense import DrivingLicense

# Modelos
from models.Neutral_VI import Neutral_VI
from models.Neutral_PI import Neutral_PI

from models.ExponentialUtility_RSPI import ExponentialUtility_RSPI
from models.ExponentialUtility_RSVI import ExponentialUtility_RSVI

from models.PieceLinear_RSPI import PieceLinear_RSPI
from models.PieceLinear_RSVI import PieceLinear_RSVI

# Utils
import rl_utils.NBPlotting as nbp
import rl_utils.NBManipulation as nbm
import rl_utils.NBEvaluations as nbe

%load_ext autoreload
%autoreload 2

# Driving License

This scenario describes a candidate that wants to take his 
driving license, and he has two choices: take lessons or do the 
practical exam. However, the more lesson he takes, the greater 
is the chance to pass in the practical exam. The candidate 
wants to minimize his cost to take the driving license. The 
question for this problem is: how many hours of lessons he 
must take before taking the practical exam? 

The candidate 
pays a cost of 2 for each practical exam and the cost to 
have lessons is 1. The candidate can take at most 4 hours of 
lessons before each practical exam and can only accumulates 
a maximum of 10 hours of experience. 

The chance of being 
approved in the practical exam depends on the previous 
accumulated experience (x) and current number of lessons 
taken (y). The function that returns the probability to being 
approved in the practical exam is: p(x, y) = 0.08x + 0.04y .


To model this scenario, we use a GD-RSMDP with 11 states 
and 5 actions. The states {0, 1, ..., 10} keep information of the 
number of hours accumulated before the current lessons and a 
goal state sG which represents that the agent has been aproved. 
The actions {0, 1, ..., 4} show the number of lessons to take 
before each practical exam. 

In [2]:
# Configurações do ambiente
states, actions = 11, 5
epsilon = 0.0001

dl = DrivingLicense(states, actions)
T = dl.build_transition_probabilities()
C = {0: 2, 1: 3, 2: 4, 3: 5, 4: 6}
# C = {0: 4, 1: 6, 2: 8, 3: 10, 4: 12}

config_reduzida = False

# Configurações de processamento
run_neutral_vi = False
run_neutral_pi = False
run_EUF_RSVI = True
run_EUF_RSPI = True
run_PWL_RSVI = True

# Configurações dos modelos
discount_factor = 0.999

# Value Iteration

In [3]:
if run_neutral_vi:
    rdl = nbe.run_driving_license(dl, T, C, actions, Neutral_VI, 'Neutral-VI', discount_factor=discount_factor, epsilon=epsilon, _log=False)
    display(rdl.V)
    display(rdl.PI)

# Policy Iteration

In [4]:
if run_neutral_pi:
    rdl = nbe.run_driving_license(dl, T, C, actions, Neutral_PI, 'Neutral-PI', discount_factor=discount_factor, epsilon=epsilon, _log=False)
    display(rdl.V)
    display(rdl.PI)

# Value Iteration - Exponential Utility Function for RS-MDP

In [5]:
if run_EUF_RSVI:
    EUF_RSVI = {}
    
    if config_reduzida:
        range_lambda = [-0.2, 0.5]
    else:
        range_lambda = [-0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 
                        0.6, 0.65, 0.7, 0.8, 0.9, 1, 2]
        
    for vl_lambda in range_lambda:
        print(f'Processando: Lambda [{vl_lambda}]', end='\r')
        EUF_RSVI[vl_lambda] = \
            nbe.run_driving_license(dl, T, C, actions, model=ExponentialUtility_RSVI, model_name='EUF_RSVI',
            vl_lambda=vl_lambda, epsilon=epsilon, _log=False)
        
    display(EUF_RSVI.keys())
    
    df_EUF_RSVI = nbm.build_dataframe_driver_license(EUF_RSVI)
    display(df_EUF_RSVI)

Processando: Lambda [2]9]]]

  bellman = self.EF._utility(C, self._lambda) * sum(TV)


dict_keys([-0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.8, 0.9, 1, 2])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,sG
Policy -0.9,1,0,0,0,0,0,0,0,0,0,0,0
Policy -0.8,1,0,0,0,0,0,0,0,0,0,0,0
Policy -0.7,1,0,0,0,0,0,0,0,0,0,0,0
Policy -0.6,2,0,0,0,0,0,0,0,0,0,0,0
Policy -0.5,2,0,0,0,0,0,0,0,0,0,0,0
Policy -0.4,2,1,0,0,0,0,0,0,0,0,0,0
Policy -0.3,3,1,0,0,0,0,0,0,0,0,0,0
Policy -0.2,3,2,1,0,0,0,0,0,0,0,0,0
Policy -0.15,4,2,1,0,0,0,0,0,0,0,0,0
Policy -0.1,4,3,2,1,0,0,0,0,0,0,0,0


# Exponential Utility Function for RS-MDP

In [6]:
if run_EUF_RSPI:
    EUF_RSPI = {}
    explogsum = False
    
    if config_reduzida:
        range_lambda = [-0.2, -0.1, 0.2]
    else:
        range_lambda = [-0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 
                        0.6, 0.65, 0.7, 0.8, 0.9, 1, 2]
    
    for vl_lambda in range_lambda:
        print(f'Processando: Lambda [{vl_lambda}]', end='\r')
        EUF_RSPI[vl_lambda] = \
            nbe.run_driving_license(dl, T, C, actions, model=ExponentialUtility_RSPI, model_name='EUF_RSPI',
            vl_lambda=vl_lambda, epsilon=epsilon, explogsum=explogsum, _log=False)
        
    display(EUF_RSPI.keys())
    
    df_EUF_RSPI = nbm.build_dataframe_driver_license(EUF_RSPI)
    display(df_EUF_RSPI)

Processando: Lambda [2]9]]]

dict_keys([-0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.8, 0.9, 1, 2])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,sG
Policy -0.9,1,0,0,0,0,0,0,0,0,0,0,0
Policy -0.8,1,0,0,0,0,0,0,0,0,0,0,0
Policy -0.7,1,0,0,0,0,0,0,0,0,0,0,0
Policy -0.6,2,0,0,0,0,0,0,0,0,0,0,0
Policy -0.5,2,0,0,0,0,0,0,0,0,0,0,0
Policy -0.4,2,1,0,0,0,0,0,0,0,0,0,0
Policy -0.3,3,1,0,0,0,0,0,0,0,0,0,0
Policy -0.2,3,2,1,0,0,0,0,0,0,0,0,0
Policy -0.15,4,2,1,0,0,0,0,0,0,0,0,0
Policy -0.1,4,3,2,1,0,0,0,0,0,0,0,0


# PieceLinear - RSVI (Risk Sensitive Value Iteration)

In [7]:
if run_PWL_RSVI:
    PWL_RSVI = {}
    df_PWL_RSVI = pd.DataFrame()
    
    if config_reduzida:
        range_k     = [-0.9, 0.9]
    else:
        range_k     = [-0.9, -0.875, -0.85, -0.825, -0.8, -0.7, -0.6, -0.5, -0.4, -0.35, -0.3, -0.2, -0.1, 0.0, 
                       0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.925, 0.95, 0.975, 0.99]
    
    gamma       = 1
    range_alpha = [np.round(1/(1+abs(k)), 2) for k in range_k]
    
    for i in range(0, len(range_k)):
        print(f'Processando: K [{range_k[i]}] Alpha [{range_alpha[i]}]', end='\r')
        PWL_RSVI[(range_k[i], gamma, range_alpha[i])] = \
            nbe.run_driving_license(dl, T, C, actions, model=PieceLinear_RSVI, model_name='PL_RSVI',
            gamma=gamma, k=range_k[i], alpha=range_alpha[i], epsilon=epsilon, _log=False)
        
    display(PWL_RSVI.keys())
    
    df_PWL_RSVI = nbm.build_dataframe_driver_license(PWL_RSVI)
    display(df_PWL_RSVI)

Processando: K [0.99] Alpha [0.5]1]]

dict_keys([(-0.9, 1, 0.53), (-0.875, 1, 0.53), (-0.85, 1, 0.54), (-0.825, 1, 0.55), (-0.8, 1, 0.56), (-0.7, 1, 0.59), (-0.6, 1, 0.62), (-0.5, 1, 0.67), (-0.4, 1, 0.71), (-0.35, 1, 0.74), (-0.3, 1, 0.77), (-0.2, 1, 0.83), (-0.1, 1, 0.91), (0.0, 1, 1.0), (0.1, 1, 0.91), (0.2, 1, 0.83), (0.3, 1, 0.77), (0.4, 1, 0.71), (0.5, 1, 0.67), (0.6, 1, 0.62), (0.7, 1, 0.59), (0.8, 1, 0.56), (0.9, 1, 0.53), (0.925, 1, 0.52), (0.95, 1, 0.51), (0.975, 1, 0.51), (0.99, 1, 0.5)])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,sG
"Policy (-0.9, 1, 0.53)",1,0,0,0,0,0,0,0,0,0,0,0
"Policy (-0.875, 1, 0.53)",1,0,0,0,0,0,0,0,0,0,0,0
"Policy (-0.85, 1, 0.54)",2,0,0,0,0,0,0,0,0,0,0,0
"Policy (-0.825, 1, 0.55)",2,0,0,0,0,0,0,0,0,0,0,0
"Policy (-0.8, 1, 0.56)",2,1,0,0,0,0,0,0,0,0,0,0
"Policy (-0.7, 1, 0.59)",2,1,0,0,0,0,0,0,0,0,0,0
"Policy (-0.6, 1, 0.62)",3,1,0,0,0,0,0,0,0,0,0,0
"Policy (-0.5, 1, 0.67)",3,2,1,0,0,0,0,0,0,0,0,0
"Policy (-0.4, 1, 0.71)",3,2,1,0,0,0,0,0,0,0,0,0
"Policy (-0.35, 1, 0.74)",4,2,1,0,0,0,0,0,0,0,0,0


# Comparando Políticas

![image.png](attachment:image.png)

In [8]:
policies = {
    'Policy 1': {0: 4, 1: 4, 2: 3, 3: 2, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 'sG': 0},
    'Policy 2': {0: 4, 1: 4, 2: 4, 3: 3, 4: 2, 5: 1, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 'sG': 0},
    'Policy 3': {0: 4, 1: 4, 2: 4, 3: 4, 4: 3, 5: 2, 6: 1, 7: 0, 8: 0, 9: 0, 10: 0, 'sG': 0},
    'Policy 4': {0: 4, 1: 4, 2: 4, 3: 4, 4: 4, 5: 3, 6: 2, 7: 1, 8: 0, 9: 0, 10: 0, 'sG': 0},
    'Policy 5': {0: 4, 1: 4, 2: 4, 3: 4, 4: 4, 5: 4, 6: 3, 7: 2, 8: 1, 9: 0, 10: 0, 'sG': 0},
    'Policy 6': {0: 4, 1: 4, 2: 4, 3: 4, 4: 4, 5: 4, 6: 4, 7: 3, 8: 2, 9: 1, 10: 0, 'sG': 0},
    'Policy 7': {0: 4, 1: 4, 2: 4, 3: 4, 4: 4, 5: 4, 6: 4, 7: 4, 8: 3, 9: 2, 10: 1, 'sG': 0},
    'Policy 8': {0: 4, 1: 4, 2: 4, 3: 4, 4: 4, 5: 4, 6: 4, 7: 4, 8: 4, 9: 3, 10: 0, 'sG': 0},
    'Policy 9': {0: 4, 1: 4, 2: 4, 3: 4, 4: 4, 5: 4, 6: 4, 7: 4, 8: 4, 9: 4, 10: 1, 'sG': 0},
    'Policy 10': {0: 4, 1: 4, 2: 4, 3: 4, 4: 4, 5: 4, 6: 4, 7: 4, 8: 4, 9: 4, 10: 2, 'sG': 0},
    'Policy 11': {0: 4, 1: 4, 2: 4, 3: 4, 4: 4, 5: 4, 6: 4, 7: 4, 8: 4, 9: 4, 10: 3, 'sG': 0},
    'Policy 12': {0: 4, 1: 4, 2: 4, 3: 4, 4: 4, 5: 4, 6: 4, 7: 4, 8: 4, 9: 4, 10: 4, 'sG': 0},
    'Policy 13': {0: 4, 1: 4, 2: 4, 3: 4, 4: 4, 5: 4, 6: 4, 7: 3, 8: 2, 9: 1, 10: 4, 'sG': 0}
}

df_policies = pd.DataFrame()
for k in policies.keys():
    df_policies = df_policies.append(pd.DataFrame(pd.Series(policies[k], name=k)).T)
    
display(df_policies)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,sG
Policy 1,4,4,3,2,1,0,0,0,0,0,0,0
Policy 2,4,4,4,3,2,1,0,0,0,0,0,0
Policy 3,4,4,4,4,3,2,1,0,0,0,0,0
Policy 4,4,4,4,4,4,3,2,1,0,0,0,0
Policy 5,4,4,4,4,4,4,3,2,1,0,0,0
Policy 6,4,4,4,4,4,4,4,3,2,1,0,0
Policy 7,4,4,4,4,4,4,4,4,3,2,1,0
Policy 8,4,4,4,4,4,4,4,4,4,3,0,0
Policy 9,4,4,4,4,4,4,4,4,4,4,1,0
Policy 10,4,4,4,4,4,4,4,4,4,4,2,0


## Exponential Utility

In [10]:
df_policies_EUF_RSVI = nbe.run_policies_evaluation(nbe.run_driving_license, policies, range_lambda, normalize=True, model=ExponentialUtility_RSVI, model_name='EUF_RSVI',
            epsilon=epsilon, _log=False)

display(df_policies_EUF_RSVI)
nbp.plot_policy_value(policies, df_policies_EUF_RSVI, 'Lambda')

Policy 1 | -0.9...

TypeError: get_PEXP() got an unexpected keyword argument 'model_name'

## Piece-wise Linear

In [None]:
df_policies_PL_RSVI = run_policies_evaluation(policies, range_k, normalize=False, model=PieceLinear_RSVI, model_name='PL_RSVI',
            gamma=gamma, range_alpha=range_alpha, epsilon=epsilon, _log=False)

display(df_policies_PL_RSVI)
nbp.plot_policy_value(policies, df_policies_PL_RSVI, 'K')

In [None]:
nbp.plot_policy_value(policies, df_policies_PL_RSVI, 'K', 30, 300)

In [None]:
nbp.plot_policy_value(policies, df_policies_PL_RSVI, 'K', 30, 150)

# Comparando Valores K e Lambda

## Exponential versus Linear

In [None]:
met1, met2, model1, model2 = 'EXP', 'LIN', 'Lambda', 'K'
policies1, policies2 = EUF_RSPI, PWL_RSVI
model = ExponentialUtility_RSVI
f = nbe.get_PEXP

In [None]:
P_VALUE_EXP, P_VALUE_EXP_LIN, P_VALUE_EXP_LIN_MAX, K_MAX = \
    nbe.comparing_policy_value(dl, T, C, epsilon, f, policies1, policies2, model, met1, met2)

In [None]:
df_VALUE_EXP, df_VALUE_EXP_LIN, df_K_MAX, df_EXP_LIN = \
    nbm.build_dataframes(P_VALUE_EXP, P_VALUE_EXP_LIN_MAX, K_MAX, met1, met2, model1, model2)
df_VALUE_EXP_ALL_K = nbm.build_dataframe_all_point(P_VALUE_EXP_LIN, model1, model2, met1)

In [None]:
plt1 = nbp.g_comparing_values(df_EXP_LIN, model1, met1, met2)
plt2 = nbp.g_comparing_dif_values(df_EXP_LIN, model1, met1, met2)
plt3 = nbp.g_comparing_max_metric(df_EXP_LIN, model1, f'{model2}_MAX')
plt4 = nbp.g_all_points_max_metric(df_VALUE_EXP_ALL_K, model1, model2)

## Linear versus Exponential

In [None]:
met1, met2, model1, model2 = 'LIN', 'EXP', 'K', 'Lambda'
policies1, policies2 = PWL_RSVI, EUF_RSPI
model = PieceLinear_RSVI
f = nbe.get_PLIN

In [None]:
P_VALUE_LIN, P_VALUE_LIN_EXP, P_VALUE_LIN_EXP_MAX, LAMBDA_MAX = \
    nbe.comparing_policy_value(dl, T, C, epsilon, f, policies1, policies2, model, met1, met2)

In [None]:
df_VALUE_LIN, df_VALUE_LIN_EXP, df_LAMBDA_MAX, df_LIN_EXP = \
    nbm.build_dataframes(P_VALUE_LIN, P_VALUE_LIN_EXP_MAX, LAMBDA_MAX, met1, met2, model1, model2)
df_VALUE_LIN_ALL_LAMBDA = nbm.build_dataframe_all_point(P_VALUE_LIN_EXP, model1, model2, met1)

In [None]:
plt1 = nbp.g_comparing_values(df_LIN_EXP, model1, met1, met2)
plt2 = nbp.g_comparing_dif_values(df_LIN_EXP, model1, met1, met2)
plt3 = nbp.g_comparing_max_metric(df_LIN_EXP, model1, f'{model2}_MAX')
plt4 = nbp.g_all_points_max_metric(df_VALUE_LIN_ALL_LAMBDA, model1, model2)

## Comparando MAX Lambda versus MAX K

In [None]:
g5 = nbp.g_comparing_max_metrics(df_EXP_LIN, df_LIN_EXP, 'Lambda', 'K')
nbm.multi_column_df_display([df_EXP_LIN, df_LIN_EXP])