In [1]:
import pandas as pd
import numpy as np
import os

# Atravessando o Rio

In [2]:
class Rio:
    def __init__(self, V0, R, disc_factor, epsilon, LOG=False):
        print('Inicializando Rio!')
        self.mapa = self.define_mapa()
        
        self.prob_t = pd.read_excel(os.path.join('parameters', 
                                                 'rio_transicoes.xlsx'))
        self.recompensa = R
        
        self.acoes = ['N', 'S', 'L', 'O']
        
        self.T_N = self.define_prob_transicao('N')
        self.T_S = self.define_prob_transicao('S')
        self.T_L = self.define_prob_transicao('L')
        self.T_O = self.define_prob_transicao('O')
        
        self.V0 = V0
        self.V = V0
        self.discount_factor = disc_factor
        self.epsilon = epsilon
        
        self._log = LOG
        
    def __repr__(self):
        res = ''
        for i in self.V.items():
            res += f'{i[0]}[{np.round(i[1], 4)}] | '
            if i[0] == 'E': res += '\n'
            
        return res
    
    def reset(self):
        self.V = self.V0
        
    def define_mapa(self):
        return {'A': 'X', 'B': 'X', 'C': 'X', 'D': 'X', 'E': 'X',
               'F': 's0', 'G': 'RIO', 'H': 'RIO', 'I': 'RIO', 'J': 'G'}
    
    def get_T(self, a, s):
        if a == 'N':
            return self.T_N.loc[str(s), :].values
        elif a == 'S':
            return self.T_S.loc[str(s), :].values
        elif a == 'L':
            return self.T_L.loc[str(s), :].values
        elif a == 'O':
            return self.T_O.loc[str(s), :].values
    
    def define_prob_transicao(self, a):
        if a == 'N':
            T = self.prob_t.iloc[5:15, 2:12]
        elif a == 'S':
            T = self.prob_t.iloc[17:27, 2:12]
        elif a == 'L':
            T = self.prob_t.iloc[5:15, 14:24]
        elif a == 'O':
            T = self.prob_t.iloc[17:27, 14:24]
            
        T.index = self.mapa.keys()
        T.columns = self.mapa.keys()
        
        return T
    
    def get_recompensa(self, s, a):
        return self.recompensa[s]
    
    def run_converge(self):
        qtd_iteracoes = 0
        first = True

        while(first or np.max(np.abs(np.array(V_k1) - np.array(V_k))) > 2 * self.epsilon):
            first = False

            V_k = [i[1] for i in self.V.items()]

            self.step()
            V_k1 = [i[1] for i in self.V.items()]
            
            if self._log: print('V_k:', V_k)
            if self._log: print('V_k+1:', V_k1)
            if self._log: print('|| V_k+1 - V_k || inf:', np.max(np.abs(np.array(V_k1) - np.array(V_k))))
            qtd_iteracoes += 1
            
        return qtd_iteracoes
    
    def step(self):
        V_t = {}
        for S in self.V.keys():
            bellman_res = []
            for a in self.acoes:
                if self._log: print(f'Debug [{S}]/[{a}]:')
                if self._log: print('R:', self.get_recompensa(S, a))
                if self._log: print('DF:', self.discount_factor)
                if self._log: print('T:', self.get_T(a, S))
                if self._log: print('V:', [i[1] for i in self.V.items()])
                if self._log: print('Sum:', (self.get_T(a, S) * [i[1] for i in self.V.items()]).sum())
                if self._log: print('---')
                bellman = self.get_recompensa(S, a) + self.discount_factor * (self.get_T(a, S) * [i[1] for i in self.V.items()]).sum()
                bellman_res.append(bellman)

            V_t[S] = np.max(bellman_res)
            
        self.V = V_t
        return True

$$\gamma = 1$$

In [3]:
V0 = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0,
'F': 0, 'G': 0, 'H': 0, 'I': 0, 'J': 0}
R = {'A': -1, 'B': -1, 'C': -1, 'D': -1, 'E': -1,
'F': -1, 'G': -1, 'H': -1, 'I': -1, 'J': 0}
disc_factor = 1
epsilon = 0.001

In [4]:
r = Rio(V0, R, disc_factor, epsilon)

Inicializando Rio!


In [5]:
r.step()
r

A[-1] | B[-1] | C[-1] | D[-1] | E[-1] | 
F[-1] | G[-1.0] | H[-1.0] | I[-1.0] | J[0] | 

In [6]:
r.reset()
iteracoes = r.run_converge()
print('Quantidade de iterações realizadas:', iteracoes)
r

Quantidade de iterações realizadas: 9


A[-5.0] | B[-4.0] | C[-3.0] | D[-2.0] | E[-1.0] | 
F[-6.0] | G[-6.0] | H[-5.5] | I[-4.0] | J[0.0] | 

$$\gamma ≠ 1$$

In [7]:
V0 = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0,
'F': 0, 'G': 0, 'H': 0, 'I': 0, 'J': 0}
R = {'A': -1, 'B': -1, 'C': -1, 'D': -1, 'E': -1,
'F': -1, 'G': -1, 'H': -1, 'I': -1, 'J': 0}
disc_factor = 0.9
epsilon = 0.001

r1 = Rio(V0, R, disc_factor, epsilon)

Inicializando Rio!


In [8]:
r1.step()
r1

A[-1.0] | B[-1.0] | C[-1.0] | D[-1.0] | E[-1.0] | 
F[-1.0] | G[-1.0] | H[-1.0] | I[-1.0] | J[0.0] | 

In [9]:
r1.reset()
iteracoes = r1.run_converge()
print('Quantidade de iterações realizadas:', iteracoes)
r1

Quantidade de iterações realizadas: 9


A[-4.0951] | B[-3.439] | C[-2.71] | D[-1.9] | E[-1.0] | 
F[-4.6856] | G[-4.6561] | H[-4.328] | I[-3.1085] | J[0.0] | 

---

In [10]:
V0 = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0,
'F': 0, 'G': 0, 'H': 0, 'I': 0, 'J': 0}
R = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0,
'F': 0, 'G': 0, 'H': 0, 'I': 0, 'J': 1}
disc_factor = 0.9
epsilon = 0.001

r2 = Rio(V0, R, disc_factor, epsilon)

Inicializando Rio!


In [11]:
r2.step()
r2

A[0.0] | B[0.0] | C[0.0] | D[0.0] | E[0.0] | 
F[0.0] | G[0.0] | H[0.0] | I[0.0] | J[1.0] | 

In [12]:
r2.reset()
iteracoes = r2.run_converge()
print('Quantidade de iterações realizadas:', iteracoes)
r2

Quantidade de iterações realizadas: 9


A[0.5905] | B[0.6561] | C[0.729] | D[0.81] | E[0.9] | 
F[0.5314] | G[0.5344] | H[0.5672] | I[0.6891] | J[1.0] | 

---

In [16]:
V0 = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0,
'F': 0, 'G': 0, 'H': 0, 'I': 0, 'J': 0}
R = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0,
'F': 0, 'G': 0, 'H': 0, 'I': 0, 'J': 1}
disc_factor = 0.3
epsilon = 0.001

r3 = Rio(V0, R, disc_factor, epsilon, False)

Inicializando Rio!


In [17]:
r3.step()
r3

A[0.0] | B[0.0] | C[0.0] | D[0.0] | E[0.0] | 
F[0.0] | G[0.0] | H[0.0] | I[0.0] | J[1.0] | 

In [18]:
r3.reset()
iteracoes = r3.run_converge()
print('Quantidade de iterações realizadas:', iteracoes)
r3

Quantidade de iterações realizadas: 7


A[0.0024] | B[0.0081] | C[0.027] | D[0.09] | E[0.3] | 
F[0.0011] | G[0.0035] | H[0.0227] | I[0.1502] | J[1.0] | 