# QLearning : Robot saliendo de casa

## Iniciación de parámetros

### Matriz de aprendizaje

In [1]:
import numpy as np

gamma = 0.7

Q = np.zeros((6,6))


### Diagrama de recompensas

In [2]:
import networkx as nx

R = nx.DiGraph()

for node in range(6):
    R.add_node(node)

R.add_edge(0,4,weight=0)
R.add_edge(4,0,weight=0)

R.add_edge(1,3,weight=0)
R.add_edge(3,1,weight=0)

R.add_edge(1,5,weight=100)
R.add_edge(5,1,weight=0)

R.add_edge(2,3,weight=0)
R.add_edge(3,2,weight=0)

R.add_edge(3,4,weight=0)
R.add_edge(4,3,weight=0)

R.add_edge(4,5,weight=100)
R.add_edge(5,4,weight=0)

R.add_edge(5,5,weight=100)


In [3]:
for e in R.edges():
    print(str(e) + " <=> " + str(R[e[0]][e[1]]['weight']))

(0, 4) <=> 0
(1, 3) <=> 0
(1, 5) <=> 100
(2, 3) <=> 0
(3, 1) <=> 0
(3, 2) <=> 0
(3, 4) <=> 0
(4, 0) <=> 0
(4, 3) <=> 0
(4, 5) <=> 100
(5, 1) <=> 0
(5, 4) <=> 0
(5, 5) <=> 100


In [4]:

def get_outgoing_edges(graph, node):
    destination_node = []
    weight = []
    for e in graph[node]:
        destination_node.append(e)
        weight.append(graph[node][e]['weight'])
    return destination_node, weight


## QLearning

In [5]:
def updateQMatrix(s,a):
    nodes, weight_out = get_outgoing_edges(R, a)
    maximo_q = max(weight_out)
    Q[s][a] = R[s][a]['weight'] + gamma*maximo_q


### Q-Learning algorithm

In [6]:
import random

def learning_episode():
    current_state = random.randint(0, len(R)-1)
    
    learning_path = [current_state]
    while current_state != 5:
        adjacent_nodes, w = get_outgoing_edges(R, current_state)
        a = random.choice(adjacent_nodes)
        
        updateQMatrix(current_state, a)
        
        current_state = a
        learning_path.append(current_state)
    return learning_path


def q_learning_process(tries):
    for i in range(tries):
        learning_path = learning_episode()
        print("Try " + str(i) + " " + str(learning_path))


q_learning_process(100)



Try 0 [2, 3, 2, 3, 4, 0, 4, 5]
Try 1 [3, 1, 5]
Try 2 [3, 2, 3, 2, 3, 2, 3, 4, 0, 4, 5]
Try 3 [0, 4, 3, 2, 3, 1, 5]
Try 4 [1, 3, 1, 5]
Try 5 [5]
Try 6 [0, 4, 3, 1, 5]
Try 7 [2, 3, 1, 5]
Try 8 [0, 4, 0, 4, 0, 4, 0, 4, 5]
Try 9 [4, 5]
Try 10 [1, 5]
Try 11 [4, 5]
Try 12 [2, 3, 1, 5]
Try 13 [0, 4, 5]
Try 14 [4, 0, 4, 3, 2, 3, 1, 3, 4, 5]
Try 15 [1, 5]
Try 16 [4, 0, 4, 5]
Try 17 [3, 1, 5]
Try 18 [2, 3, 2, 3, 1, 5]
Try 19 [3, 4, 0, 4, 5]
Try 20 [2, 3, 4, 5]
Try 21 [0, 4, 3, 2, 3, 4, 0, 4, 3, 2, 3, 1, 3, 1, 5]
Try 22 [2, 3, 1, 3, 2, 3, 1, 3, 4, 3, 4, 0, 4, 5]
Try 23 [1, 5]
Try 24 [1, 3, 2, 3, 1, 3, 2, 3, 4, 3, 4, 5]
Try 25 [0, 4, 0, 4, 3, 4, 0, 4, 5]
Try 26 [3, 2, 3, 2, 3, 4, 0, 4, 5]
Try 27 [5]
Try 28 [2, 3, 4, 0, 4, 3, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 3, 2, 3, 1, 5]
Try 29 [1, 3, 1, 5]
Try 30 [1, 3, 1, 3, 2, 3, 4, 0, 4, 3, 1, 5]
Try 31 [2, 3, 4, 5]
Try 32 [2, 3, 4, 0, 4, 0, 4, 5]
Try 33 [2, 3, 4, 3, 2, 3, 2, 3, 4, 0, 4, 0, 4, 3, 1, 3, 1, 3, 2, 3, 2, 3, 2, 3, 4, 3, 2, 3, 1, 5]
Try 34 [3

In [7]:
Q

array([[  0.,   0.,   0.,   0.,  70.,   0.],
       [  0.,   0.,   0.,   0.,   0., 170.],
       [  0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,  70.,   0.,   0.,  70.,   0.],
       [  0.,   0.,   0.,   0.,   0., 170.],
       [  0.,   0.,   0.,   0.,   0.,   0.]])

### Testing QLearning algorithm results

In [8]:

def get_out_home(start_room):
    path = [start_room]
    current_room = start_room
    while current_room != 5:
        adjacent_nodes, w = get_outgoing_edges(R, current_room)
        q_values = np.array([ Q[current_room][node] for node in adjacent_nodes ])

        next_room = adjacent_nodes[np.argmax(q_values)]

        path.append(next_room)
        current_room = next_room
    return path


In [20]:
from tkinter import *
from random import *

master = Tk()

canvas_width = 500
canvas_height = 300


def setup():
    global w
    w = Canvas(master,
               width=canvas_width,
               height=canvas_height)
    w.pack()


def draw_house():

    # Draw outer walls
    w.create_line(10, 10, 100, 10, fill="#000000")  # a
    w.create_line(10, 10, 10, 130, fill="#000000")  # b
    w.create_line(10, 130, 20, 130, fill="#000000")  # c
    w.create_line(40, 130, 100, 130, fill="#000000")  # d
    w.create_line(100, 130, 100, 110, fill="#000000")  # e
    w.create_line(100, 110, 200, 110, fill="#000000")  # f
    w.create_line(200, 110, 200, 40, fill="#000000")  # g
    w.create_line(120, 40, 200, 40, fill="#000000")  # h
    w.create_line(100, 10, 100, 40, fill="#000000")  # i

    # Draw interior walls
    w.create_line(10, 70, 40, 70, fill="#000000")  # j
    w.create_line(60, 70, 80, 70, fill="#000000")  # k
    w.create_line(70, 10, 70, 100, fill="#000000")  # l
    w.create_line(130, 40, 130, 80, fill="#000000")  # m
    w.create_line(100, 70, 130, 70, fill="#000000")  # n
    w.create_line(70, 120, 70, 130, fill="#000000")  # o
    w.create_line(130, 100, 130, 110, fill="#000000")  # p

    # Number rooms
    w.create_text(40,40,fill="black", font="Arial 8 bold",
                  text="0")
    w.create_text(90, 50, fill="black", font="Arial 8 bold",
                  text="1")
    w.create_text(160, 70, fill="black", font="Arial 8 bold",
                  text="2")
    w.create_text(100, 90, fill="black", font="Arial 8 bold",
                  text="3")
    w.create_text(40, 100, fill="black", font="Arial 8 bold",
                  text="4")
    w.create_text(160, 30, fill="black", font="Arial 8 bold",
                  text="5")


def draw_move(a,b):
    cero = 50, 40
    cuatro = 50, 110
    uno = 100, 60
    tres = 90, 110
    dos = 150, 80
    cinco_up = 120, 30
    cinco_down = 20, 140
    
    color = "#ff0000"

    if a == 0 and b == 4:
        return w.create_line(cero, cuatro, fill=color, arrow=LAST)
    elif a == 4 and b == 0:
        return w.create_line(cuatro, cero, fill=color, arrow=LAST)
    elif a == 4 and b == 3:
        return w.create_line(cuatro, tres, fill=color, arrow=LAST)
    elif a == 3 and b == 4:
        return w.create_line(tres, cuatro, fill=color, arrow=LAST)
    elif a == 1 and b == 3:
        return w.create_line(uno, tres, fill=color, arrow=LAST)
    elif a == 3 and b == 1:
        return w.create_line(tres, uno, fill=color, arrow=LAST)
    elif a == 3 and b == 2:
        return w.create_line(tres, dos, fill=color, arrow=LAST)
    elif a == 2 and b == 3:
        return w.create_line(dos, tres, fill=color, arrow=LAST)
    elif a == 1 and b == 5:
        return w.create_line(uno, cinco_up, fill=color, arrow=LAST)
    elif a == 5 and b == 1:
        return w.create_line(cinco_up, uno, fill=color, arrow=LAST)
    elif a == 4 and b == 5:
        return w.create_line(cuatro, cinco_down, fill=color, arrow=LAST)
    elif a == 5 and b == 4:
        return w.create_line(cinco_down, cuatro, fill=color, arrow=LAST)

def draw_start_room(room):
    cero = 50, 40
    cuatro = 50, 110
    uno = 100, 60
    tres = 90, 110
    dos = 150, 80
    cinco = 120, 30
    
    diam = 10
    color = "#ffff00"
    
    if room == 0:
        return w.create_oval(cero, cero[0] + diam, cero[1] + diam, fill=color)
    elif room == 1:
        return w.create_oval(uno, uno[0] + diam, uno[1] + diam, fill=color)
    elif room == 2:
        return w.create_oval(dos, dos[0] + diam, dos[1] + diam, fill=color)
    elif room == 3:
        return w.create_oval(tres, tres[0] + diam, tres[1] + diam, fill=color)
    elif room == 4:
        return w.create_oval(cuatro, cuatro[0] + diam, cuatro[1] + diam, fill=color)
    elif room == 5:
        return w.create_oval(cinco, cinco[0] + diam, cinco[1] + diam, fill=color)
    


def place_robot():
    def event_button(start_room):
        path = get_out_home(start_room)
        draw_path(path)
    
    b0 = Button(master, text="0")
    b0.pack()
    b0.bind('<Button-1>', lambda event : event_button(0))
    
    b1 = Button(master, text="1")
    b1.pack()
    b1.bind('<Button-1>', lambda event : event_button(1))
    
    b2 = Button(master, text="2")
    b2.pack()
    b2.bind('<Button-1>', lambda event : event_button(2))
    
    b3 = Button(master, text="3")
    b3.pack()
    b3.bind('<Button-1>', lambda event : event_button(3))
    
    b4 = Button(master, text="4")
    b4.pack()
    b4.bind('<Button-1>', lambda event : event_button(4))
    
    b5 = Button(master, text="5")
    b5.pack()
    b5.bind('<Button-1>', lambda event : event_button(5))


def draw_path(path):
    global step
    step = 0
    
    def draw_path_aux():
        global step
        global start_room_id
        if step == 0:
            start_room_id = draw_start_room(path[0])
        if step < len(path):
            a = path[step - 1]
            b = path[step]
            w.after(1000, w.delete, draw_move(a, b))
            step +=1
        if step == len(path):
            w.delete(start_room_id)
        w.after(1000, draw_path_aux)
    draw_path_aux()


setup()
draw_house()
place_robot()
mainloop()
