In [1]:
from random import randint, choice, uniform
from typing import AsyncIterator
import numpy as np
from numpy.core.fromnumeric import argmax

In [2]:
EPSILON = 0.1
GAMMA = 0.6
ALPHA = 0.1 #  learning rate (beta u Wawrzyńskiego)

In [3]:
def generate_lab(n, holes):
    lab = []
    for i in range(n):
        lab.append([])
    for row in lab:
        for i in range(n):
            row.append(0)
    made_holes = 0
    while made_holes < holes:
        x, y = randint(0, n-1), randint(0, n-1)
        if x == 0 and y == 0 or x == n-1 and y == n-1:
            pass
        else:
            if lab[x][y] != 1:
                lab[x][y] = 1
                made_holes += 1
    return lab

In [4]:
def deep_first_search(lab) -> bool: 
    return True

In [5]:
def save_lab_to_file(lab, file_name):
    with open(file_name, 'w') as data:
        for row in lab:
            line = ""
            for element in row:
                line += str(element) + " "
            line += '\n'
            data.write(line)

In [6]:
def read_lab_from_file(file_name):
    lab = []
    with open(file_name, 'r') as data:
        content = data.read()
        for number, line in enumerate(content.splitlines()):
            lab.append([])
            for element in range(len(line.split())):
                lab[number].append(int(line.split()[element]))
    return lab

In [9]:
class Musk_Taxi:
    def __init__(self, rows, holes):
        self.rows = rows
        self.holes = holes
        # self.lab = generate_lab(self.rows, holes)
        self.lab = read_lab_from_file('saved_lab.txt')
        self.x = 0
        self.y = 0
        self.x_done = self.rows - 1
        self.y_done = self.rows - 1
        self.done = False
        self.available_moves = self.check_available_moves()
        

    def check_available_moves(self):
        available_moves = []
        for y, x in zip([-1, 1, 0, 0], [0, 0, -1, 1]):
            if self.x + x > -1 and self.x + x < self.rows:
                if self.y + y > -1 and self.y + y < self.rows:
                    if self.lab[self.x + x][self.y + y] == 0:
                        available_moves.append([self.y+y,self.x+x])
        return available_moves


    def make_action(self, action):
        # w dół
        if action == 0:     
            if self.x + 1 == self.rows: 
                return -10
            elif self.lab[self.x + 1][self.y] == 1:
                self.done = True
                return -10
            else:
                self.x += 1
                if self.y == self.y_done and self.x == self.x_done:
                    self.done = True
                    return 20
                else:
                    return -1
        # w górę
        elif action == 1:   
            if self.x - 1 < 0:
                return -10
            elif self.lab[self.x - 1][self.y] == 1:
                self.done = True
                return -10
            else:
                self.x -= 1
                if self.y == self.y_done and self.x == self.x_done:
                    self.done = True
                    return 20
                else:
                    return -1
        # w lewo
        elif action == 2:   
            if self.y - 1 < 0:
                return -10
            elif self.lab[self.x][self.y - 1] == 1:
                self.done = True
                return -10
            else:
                self.y -= 1
                if self.y == self.y_done and self.x == self.x_done:
                    self.done = True
                    return 20
                else:
                    return -1
        # w prawo
        elif action == 3:   
            if self.y + 1 == self.rows:
                return -10
            elif self.lab[self.x][self.y + 1] == 1:
                self.done = True
                return -10
            else:
                self.y += 1
                if self.y == self.y_done and self.x == self.x_done:
                    self.done = True
                    return 20
                else:
                    return -1
    

    def is_done(self):
        if self.y == self.y_done and self.x == self.x_done:
            return True

    def no_of_states(self):
        return self.rows ** 2

    def get_state(self):
        state = self.x * self.rows + self.y
        return state

    def q_table(self):
        q_table = np.zeros((self.no_of_states(), 4))

In [10]:
class Random_car:
    def __init__(self, musk_taxi = Musk_Taxi(5, 3)):
        self.taxi = musk_taxi

    def steps(self):
        steps = 0
        while not self.taxi.is_done():
            move = choice(self.taxi.check_available_moves())
            self.taxi.y, self.taxi.x = move
            steps += 1
        return steps

In [18]:
taxi = Musk_Taxi(5, 5)
q_table = np.zeros((taxi.no_of_states(), 4))
interval_steps = []
for i in range(10000):
    steps = []
    taxi = Musk_Taxi(5, 5)
    while not taxi.done:
        state = taxi.get_state()
        if uniform(0,1) < EPSILON:
            action = randint(0,3)
        else:
            action = np.argmax(q_table[state])
        if i % 20 == 0:
            steps.append(action)
        reward = taxi.make_action(action)
        new_state = taxi.get_state()
        new_state_max = np.max(q_table[new_state])
        q_table[state, action] = (1-ALPHA) * q_table[state, action] + ALPHA * (reward + GAMMA * new_state_max - q_table[state, action])
    if i % 20 == 0:
        interval_steps.append(steps)

In [19]:
for step in interval_steps:
    print(step)

[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3]
[0, 1, 0, 0, 1, 0, 3, 2, 0, 1, 3, 0, 3, 2, 2, 1, 1, 0, 0, 3, 3, 1, 3, 1, 1, 1, 2]
[0, 0, 0, 3, 2, 3, 3, 0, 3, 3]
[3]
[0, 0, 3, 0, 3, 0, 3, 2, 3, 3]
[0, 0, 3, 3, 0, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 0, 3, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 3, 0, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 1, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[2, 0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 3, 0, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 2, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 3]
[0, 0, 3, 0, 3, 0, 3, 1]
[0,

In [20]:
taxi = Musk_Taxi(5, 5)
for row in taxi.lab:
    print(row)
steps = []
while not taxi.done:
    state = taxi.get_state()
    if uniform(0,1) < EPSILON:
        action = randint(0,3)
    else:
        action = np.argmax(q_table[state])
    reward = taxi.make_action(action)
    new_state = taxi.get_state()
    new_state_max = np.max(q_table[new_state])
    q_table[state, action] = (1-ALPHA) * q_table[state, action] + ALPHA * (reward + GAMMA * new_state_max - q_table[state, action])
    steps.append(action)

[0, 1, 1, 0, 0]
[0, 1, 0, 0, 0]
[0, 0, 0, 0, 1]
[0, 0, 0, 1, 0]
[0, 1, 0, 0, 0]


In [21]:
print(steps)

[0, 2, 0, 0, 3, 3, 0, 3, 3]
