# Probabilidade

## Markov decision processes (MDPs)

Este notebook demonstra a criação de algoritmos de iteração de valor através de Processos de Decisão de markov (MDP).

In [1]:
# Importando as classes
from mdp import MDP, GridMDP, sequential_decision_environment, value_iteration

In [2]:
# Visualizando o código da classe
%psource MDP

In [3]:
# Matriz de transição como dicionário aninhado. Estado -> Ações no estado -> Estados por cada ação -> Probabilidade
t = {
    "A": {
            "X": {"A":0.3, "B":0.7},
            "Y": {"A":1.0}
         },
    "B": {
            "X": {"End":0.8, "B":0.2},
            "Y": {"A":1.0}
         },
    "End": {}
}

init = "A"

terminals = ["End"]

rewards = {
    "A": 5,
    "B": -10,
    "End": 100
}

In [4]:
# Classe CustomMDP
class CustomMDP(MDP):

    def __init__(self, transition_matrix, rewards, terminals, init, gamma=.9):
        # Todas as ações possíveis
        actlist = []
        for state in transition_matrix.keys():
            actlist.extend(transition_matrix.keys())
        actlist = list(set(actlist))

        MDP.__init__(self, init, actlist, terminals=terminals, gamma=gamma)
        self.t = transition_matrix
        self.reward = rewards
        for state in self.t:
            self.states.add(state)

    def T(self, state, action):
        return [(new_state, prob) for new_state, prob in self.t[state][action].items()]

In [5]:
# Finalmente nós instanciamos a classe com os parâmetros para o nosso MDP na imagem.
our_mdp = CustomMDP(t, rewards, terminals, init, gamma=.9)

In [6]:
%psource GridMDP

In [7]:
sequential_decision_environment

<mdp.GridMDP at 0x7fcd3828bf70>

In [8]:
%psource value_iteration

In [9]:
value_iteration(sequential_decision_environment)

{(0, 1): 0.3984432178350045,
 (1, 2): 0.649585681261095,
 (2, 1): 0.48644001739269643,
 (0, 0): 0.2962883154554812,
 (3, 1): -1.0,
 (2, 0): 0.3447542300124158,
 (3, 0): 0.12987274656746342,
 (0, 2): 0.5093943765842497,
 (2, 2): 0.7953620878466678,
 (1, 0): 0.25386699846479516,
 (3, 2): 1.0}

### Visualização Para a Iteração de Valor


In [10]:
def value_iteration_instru(mdp, iterations=20):
    U_over_time = []
    U1 = {s: 0 for s in mdp.states}
    R, T, gamma = mdp.R, mdp.T, mdp.gamma
    for _ in range(iterations):
        U = U1.copy()
        for s in mdp.states:
            U1[s] = R(s) + gamma * max([sum([p * U[s1] for (p, s1) in T(s, a)])
                                        for a in mdp.actions(s)])
        U_over_time.append(U)
    return U_over_time

In [11]:
%matplotlib inline
import matplotlib.pyplot as plt
from collections import defaultdict
import time

def make_plot_grid_step_function(columns, row, U_over_time):
    '''A função interativa ipywidgets suporta
        um único parâmetro como entrada. Esta função
        cria e devolve tal função, tomando
        Na entrada outros parâmetros
    '''
    def plot_grid_step(iteration):
        data = U_over_time[iteration]
        data = defaultdict(lambda: 0, data)
        grid = []
        for row in range(rows):
            current_row = []
            for column in range(columns):
                current_row.append(data[(column, row)])
            grid.append(current_row)
        grid.reverse() 
        fig = plt.imshow(grid, cmap=plt.cm.bwr, interpolation='nearest')

        plt.axis('off')
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)

        for col in range(len(grid)):
            for row in range(len(grid[0])):
                magic = grid[col][row]
                fig.axes.text(row, col, "{0:.2f}".format(magic), va='center', ha='center')

        plt.show()
    
    return plot_grid_step

def make_visualize(slider):
    ''' Toma uma entrada de um controle deslizante e retorna
         Função callback para timer e animação
    '''
    
    def visualize_callback(Visualize, time_step):
        if Visualize is True:
            for i in range(slider.min, slider.max + 1):
                slider.value = i
                time.sleep(float(time_step))
    
    return visualize_callback
    

In [12]:
columns = 4
rows = 3
U_over_time = value_iteration_instru(sequential_decision_environment)     

In [13]:
plot_grid_step = make_plot_grid_step_function(columns, rows, U_over_time)

### Clique no botão Visualize

In [15]:
import ipywidgets as widgets
from IPython.display import display

iteration_slider = widgets.IntSlider(min=1, max=15, step=1, value=0)
w=widgets.interactive(plot_grid_step,iteration=iteration_slider)
display(w)

visualize_callback = make_visualize(iteration_slider)

visualize_button = widgets.ToggleButton(desctiption = "Visualize", value = False)
time_select = widgets.ToggleButtons(description='Extra Delay:',options=['0', '0.1', '0.2', '0.5', '0.7', '1.0'])
a = widgets.interactive(visualize_callback, Visualize = visualize_button, time_step=time_select)
display(a)

interactive(children=(IntSlider(value=1, description='iteration', max=15, min=1), Output()), _dom_classes=('wi…

interactive(children=(ToggleButton(value=False, description='Visualize'), ToggleButtons(description='Extra Del…