In [4]:
import tkinter as tk
import numpy as np
import random
from tqdm import tqdm
import time

def generate_grid(size):
    grid = np.zeros((size, size))
    for i in range(size):
        for j in range(size):
            if random.random() < 0.3:
                grid[i, j] = -10
    # Choisir au hasard l'emplacement du 100
    x = random.randint(0, size - 1)
    y = random.randint(0, size - 1)
    grid[x, y] = 100
    return grid

def plot_grid(canvas, grid, robot_pos):
    canvas.delete("all")
    for i in range(len(grid)):
        for j in range(len(grid[0])):
            x0, y0 = j * 50, i * 50
            x1, y1 = x0 + 50, y0 + 50
            color = "white"
            if (i, j) == robot_pos:  # Position du robot
                color = "red"
            elif grid[i, j] == -10:  # Mur
                color = "blue"
            elif grid[i, j] == 100:  # Sortie
                color = "green"
            canvas.create_rectangle(x0, y0, x1, y1, fill=color)
    canvas.update()

def choose_action(state, epsilon, Q):
    if random.random() < epsilon:
        return random.choice([0, 1, 2, 3,4,5,6,7])  # Exploration aléatoire
    else:
        return np.argmax(Q[state[0], state[1]])  # Exploitation de la table Q

def get_new_state(state, action, size):
    i, j = state
    new_i, new_j = i, j
    if action == 0 and i > 0:
        new_i = i - 1  # Move up
    elif action == 1 and i < size - 1:
        new_i = i + 1  # Move down
    elif action == 2 and j > 0:
        new_j = j - 1  # Move left
    elif action == 3 and j < size - 1:
        new_j = j + 1  # Move right
    elif action == 4 and i > 0 and j > 0:
        new_i, new_j = i - 1, j - 1  # Move up-left diagonally
    elif action == 5 and i > 0 and j < size - 1:
        new_i, new_j = i - 1, j + 1  # Move up-right diagonally
    elif action == 6 and i < size - 1 and j > 0:
        new_i, new_j = i + 1, j - 1  # Move down-left diagonally
    elif action == 7 and i < size - 1 and j < size - 1:
        new_i, new_j = i + 1, j + 1  # Move down-right diagonally
    if 0 <= new_i < size and 0 <= new_j < size:  # Check if the new state is within the grid boundaries
        return (new_i, new_j)
    return state


def Q_learning(canvas, epsilon, gamma, alpha, episodes, size,grid):
    Q = np.zeros((size, size, 8))
    robot_pos = (0, 0)
    for _ in tqdm(range(episodes)):
        state = (0, 0)
        reward=0
        grid2 = np.copy(grid)
        while reward != 100:  # Tant que l'objectif n'est pas atteint
            action = choose_action(state, epsilon, Q)
            new_state = get_new_state(state, action, size)
            reward=grid2[new_state[0]][new_state[1]]
            if reward==0:
                grid2[new_state[0]][new_state[1]]=-1
            best_future_reward = np.max(Q[new_state[0], new_state[1]])
            Q[state[0], state[1], action] += alpha * (reward + gamma * best_future_reward - Q[state[0], state[1], action])
            state = new_state
            robot_pos = state
            plot_grid(canvas, grid, robot_pos)
              # Pause pour visualiser l'apprentissage
    return Q







In [43]:
def generate_grid(size):
    grid = np.full((size, size), -1)
    for i in range(size):
        for j in range(size):
            if random.random() < 0.3:
                grid[i, j] = -10
    # Choisir au hasard l'emplacement du 100
    x = random.randint(0, size - 1)
    y = random.randint(0, size - 1)
    grid[x, y] = 100
    return grid

In [44]:
def plot_grid1(grid):
    root = tk.Tk()
    root.title("Grid Representation")
    
    rows = len(grid)
    cols = len(grid[0])
    
    cell_width = 50
    cell_height = 50
    
    canvas = tk.Canvas(root, width=cols * cell_width, height=rows * cell_height)
    canvas.pack()
    
    for i in range(rows):
        for j in range(cols):
            x0, y0 = j * cell_width, i * cell_height
            x1, y1 = x0 + cell_width, y0 + cell_height
            color = 'white' if grid[i][j] == -1 else 'blue' if grid[i][j] == -10 else 'green'
            canvas.create_rectangle(x0, y0, x1, y1, fill=color, outline='black')
    
    root.mainloop()

In [45]:
epsilon = 0.1
gamma = 0.9
alpha = 0.1
episodes = 1000
size = 20

In [46]:
# Génération de la grille
grid = generate_grid(size)
plot_grid1(grid)

In [52]:
plot_grid1(grid)

In [47]:
# Initialisation de la fenêtre tkinter
root = tk.Tk()
root.title("Q-Learning Robot")

# Création du canevas
canvas = tk.Canvas(root, width=size * 50, height=size * 50)
canvas.pack()

# Apprentissage Q-Learning
Matrice_Q=Q_learning(canvas, epsilon, gamma, alpha, episodes, size,grid)

# Lancement de la boucle principale tkinter
root.mainloop()

 11%|█         | 107/1000 [14:46<2:03:18,  8.29s/it]


KeyboardInterrupt: 

: 

In [33]:
print(grid)

[[  0.   0.   0. -10. -10.   0. -10. -10.   0.   0. -10.   0. -10.   0.
    0.   0. -10. -10.   0. -10.]
 [-10.   0. -10.   0.   0.   0.   0.   0.   0. -10. -10.   0.   0. -10.
  -10.   0. -10. -10.   0.   0.]
 [-10.   0.   0. -10. -10.   0. -10.   0. -10. -10.   0.   0. -10.   0.
  -10.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0. -10.   0.   0.   0.   0. -10.   0. -10.   0.
    0.   0. -10.   0.   0. -10.]
 [  0.   0. -10.   0.   0.   0.   0.   0.   0.   0. -10. -10.   0.   0.
    0.   0. -10. -10. -10.   0.]
 [  0.   0.   0.   0. -10.   0.   0.   0. -10.   0. -10.   0.   0.   0.
    0. -10.   0.   0.   0. -10.]
 [  0. -10.   0.   0.   0.   0.   0.   0. -10.   0. -10.   0. -10.   0.
  -10.   0. -10.   0.   0. -10.]
 [  0.   0.   0.   0. -10. -10. -10.   0. -10.   0.   0.   0. -10.   0.
    0. -10. -10.   0.   0. -10.]
 [  0.   0. -10. -10.   0.   0.   0.   0. -10.   0.   0.   0.   0.   0.
  -10. -10.   0. -10.   0.   0.]
 [  0.   0.   0.   0. -10.   0. -10.   0.   0.   0. -10

In [35]:
print(Matrice_Q)

[[[ 3.58788141 -5.76703351  4.00409755 ...  4.50580789  3.39927275
    6.87165763]
  [ 4.49151655  2.59257828  2.83623097 ...  2.2521479  -4.99988255
   -5.44944833]
  [ 5.92166485 -5.82638309  3.03000834 ...  3.83525763  0.44372808
    9.59411554]
  ...
  [-1.         -1.         -1.         ... -1.         -1.
    0.        ]
  [-0.58519851 -0.63916272 -1.         ... -0.58367986 -1.
   -0.62150605]
  [-1.         -0.32185359 -0.22467621 ... -1.         -0.22456359
   -1.        ]]

 [[ 5.517551   -1.         -1.42668476 ... -0.35895547 -1.
   -0.27829489]
  [ 7.67611337  0.92118222 -5.78757459 ...  4.97078597 -7.11777673
    3.02099579]
  [-0.27130091  0.41158899  5.59612922 ... -1.26117965 -0.02880391
   -1.00238172]
  ...
  [-1.         -0.216658   -1.9        ... -0.109      -0.109
   -0.109     ]
  [-0.4000995  -0.48186172 -1.9        ... -1.90981    -0.40918859
   -0.4806451 ]
  [-3.465739   -0.47387512 -0.47385658 ... -0.4900995  -0.48192109
   -0.4900995 ]]

 [[-1.         -0

In [39]:
grid.shape

(20, 20)

In [35]:
def find_optimal_path(Q):
    size=grid.shape[0]
    optimal_path=[]
    pos=(4,0)
    while (pos not in optimal_path):
        optimal_path.append(pos)
        action=np.argmax(Q[pos[0], pos[1]])
        pos = get_new_state(pos, action, size)
    return optimal_path
        
        


In [9]:
optimal_path=find_optimal_path(Matrice_Q)

In [33]:
def plot_grid_with_path(grid, optimal_path):
    root = tk.Tk()
    root.title("Grid Representation")

    rows = len(grid)
    cols = len(grid[0])

    cell_width = 50
    cell_height = 50

    canvas = tk.Canvas(root, width=cols * cell_width, height=rows * cell_height)
    canvas.pack()

    # Dessiner la grille en premier
    for i in range(rows):
        for j in range(cols):
            x0, y0 = j * cell_width, i * cell_height
            x1, y1 = x0 + cell_width, y0 + cell_height
            color = 'white' if grid[i][j] == -1 else 'blue' if grid[i][j] == -10 else 'green'
            canvas.create_rectangle(x0, y0, x1, y1, fill=color, outline='black')

    # Dessiner ensuite le chemin optimal avec des ronds rouges
    for pos in optimal_path:
        x0, y0 = pos[1] * cell_width + cell_width / 4, pos[0] * cell_height + cell_height / 4
        x1, y1 = x0 + cell_width / 2, y0 + cell_height / 2
        canvas.create_oval(x0, y0, x1, y1, fill='red')

    root.mainloop()


In [10]:
plot_grid_with_path(grid,optimal_path)

In [19]:
size=30
grid = generate_grid(size)
plot_grid1(grid)

In [23]:
# Initialisation de la fenêtre tkinter
root = tk.Tk()
root.title("Q-Learning Robot")

# Création du canevas
canvas = tk.Canvas(root, width=size * 50, height=size * 50)
canvas.pack()

# Apprentissage Q-Learning
Matrice_Q=Q_learning(canvas, epsilon, gamma, alpha, episodes, size,grid)

# Lancement de la boucle principale tkinter
root.mainloop()

100%|██████████| 1000/1000 [30:40<00:00,  1.84s/it] 


In [24]:
optimal_path=find_optimal_path(Matrice_Q)

In [27]:
plot_grid_with_path(grid,optimal_path)

In [36]:
def Q_learning(canvas, epsilon, gamma, alpha, episodes, size,grid):
    Q = np.zeros((size, size, 8))
    robot_pos = (0, 0)
    for _ in tqdm(range(episodes)):
        state = (4,0)
        reward=0
        while reward != 100:  # Tant que l'objectif n'est pas atteint
            action = choose_action(state, epsilon, Q)
            new_state = get_new_state(state, action, size)
            reward=grid[new_state[0]][new_state[1]]
            best_future_reward = np.max(Q[new_state[0], new_state[1]])
            Q[state[0], state[1], action] += alpha * (reward + gamma * best_future_reward - Q[state[0], state[1], action])
            state = new_state
            robot_pos = state
            plot_grid(canvas, grid, robot_pos)
              # Pause pour visualiser l'apprentissage
    return Q

In [37]:
grid=[[-1,-1,-1,-1,-1,-10,-1,-1,-1,-1],
      [-1,-1,-10,-10,-1,-10,-1,-1,-1,-1],
      [-1,-1,-1,-10,-1,-10,-1,-10,-1,-1],
      [-1,-1,-1,-10,-1,-1,-1,-10,-1,-1],
      [-1,-1,-1,-10,-10,-1,-1,-10,-1,100],
      [-1,-1,-1,-1,-10,-1,-1,-10,-1,-1],
      [-1,-10,-1,-1,-10,-1,-1,-10,-10,-10],
      [-1,-10,-10,-10,-10,-1,-1,-1,-1,-1],
      [-1,-1,-1,-1,-1,-1,-10,-1,-1,-1],
      [-1,-1,-1,-1,-1,-1,-10,-1,-1,-1]]
grid = np.array(grid)

In [38]:
plot_grid1(grid)

In [39]:
size=10

In [40]:
# Initialisation de la fenêtre tkinter
root = tk.Tk()
root.title("Q-Learning Robot")

# Création du canevas
canvas = tk.Canvas(root, width=size * 50, height=size * 50)
canvas.pack()

# Apprentissage Q-Learning
Matrice_Q=Q_learning(canvas, epsilon, gamma, alpha, episodes, size,grid)

# Lancement de la boucle principale tkinter
root.mainloop()

100%|██████████| 1000/1000 [01:45<00:00,  9.45it/s]


In [41]:
optimal_path=find_optimal_path(Matrice_Q)

In [42]:
plot_grid_with_path(grid,optimal_path)

Affichage de la grille