In [None]:
import numpy as np
import plotly.graph_objects as go
import random

# Hyperparamètres
alpha = 0.1
gamma = 0.9
epsilon = 0.3
epsilon_decay = 0.995
min_epsilon = 0.01
iterations = 5000

# Taille de la grille
GRID_SIZE = 20

# Départ et but
start = (0, 0, 0)
goal = (GRID_SIZE - 1, GRID_SIZE - 1, GRID_SIZE - 1)

# Définition des actions : (dx, dy, dz)
actions = [
    (1, 0, 0), (-1, 0, 0),  # droite / gauche
    (0, 1, 0), (0, -1, 0),  # avant / arrière
    (0, 0, 1), (0, 0, -1)   # haut / bas
]

# Initialisation Q-table
Q = {}

def get_q(state):
    if state not in Q:
        Q[state] = np.zeros(len(actions))
    return Q[state]

def is_valid(pos):
    return all(0 <= p < GRID_SIZE for p in pos)

def step(pos, action_idx):
    dx, dy, dz = actions[action_idx]
    new_pos = (pos[0] + dx, pos[1] + dy, pos[2] + dz)
    if not is_valid(new_pos):
        return pos, -1  # pénalité pour tentative hors limites
    if new_pos == goal:
        return new_pos, 10
    return new_pos, -0.1

# Suivi du meilleur chemin
best_path = []
shortest_length = float('inf')

for episode in range(iterations):
    state = start
    path = [state]
    total_reward = 0

    while state != goal:
        if random.random() < epsilon:
            action_idx = random.randint(0, len(actions) - 1)
        else:
            action_idx = np.argmax(get_q(state))

        next_state, reward = step(state, action_idx)
        total_reward += reward

        old_value = get_q(state)[action_idx]
        next_max = np.max(get_q(next_state))
        get_q(state)[action_idx] = old_value + alpha * (reward + gamma * next_max - old_value)

        state = next_state
        path.append(state)

        # Sécurité pour éviter des boucles infinies
        if len(path) > GRID_SIZE**3:
            break

    # Sauvegarde du meilleur chemin
    if state == goal and len(path) < shortest_length:
        best_path = path
        shortest_length = len(path)

    # Décroissance de l'exploration
    epsilon = max(min_epsilon, epsilon * epsilon_decay)

# --- Affichage avec Plotly ---
x, y, z = zip(*best_path)


fig = go.Figure()

# Tracé du chemin
fig.add_trace(go.Scatter3d(
    x=x, y=y, z=z,
    mode='lines+markers',
    line=dict(color='blue', width=5),
    marker=dict(size=4),
    name='Best path'
))

# Départ (vert)
fig.add_trace(go.Scatter3d(
    x=[start[0]], y=[start[1]], z=[start[2]],
    mode='markers',
    marker=dict(size=8, color='green'),
    name='Start'
))

# But (rouge)
fig.add_trace(go.Scatter3d(
    x=[goal[0]], y=[goal[1]], z=[goal[2]],
    mode='markers',
    marker=dict(size=8, color='red'),
    name='Goal'
))

fig.update_layout(
    title='Best Path in 3D Grid (Q-Learning)',
    scene=dict(
        xaxis=dict(nticks=5, range=[0, GRID_SIZE]),
        yaxis=dict(nticks=5, range=[0, GRID_SIZE]),
        zaxis=dict(nticks=5, range=[0, GRID_SIZE]),
    ),
    margin=dict(l=0, r=0, b=0, t=30)
)

fig.show()


In [None]:
import numpy as np
import plotly.graph_objects as go
import random

# Hyperparamètres
alpha = 0.2
gamma = 0.9
epsilon = 0.3
epsilon_decay = 0.995
min_epsilon = 0.01
iterations = 3000

# Taille de la grille
GRID_SIZE = 20
MAX_DISTANCE = 3 * (GRID_SIZE - 1)

# Départ et but
start_agent = (0, 0, 0)
start_target = (GRID_SIZE - 1, GRID_SIZE - 1, GRID_SIZE - 1)

# Définition des actions : (dx, dy, dz)
actions = [
    (1, 0, 0), (-1, 0, 0),  # droite / gauche
    (0, 1, 0), (0, -1, 0),  # avant / arrière
    (0, 0, 1), (0, 0, -1)   # haut / bas
]

# Initialisation Q-table
Q = {}

def distance_to_goal(pos, goal):
    dx = pos[0] - goal[0]
    dy = pos[1] - goal[1]
    dz = pos[2] - goal[2]
    return abs(dx) + abs(dy) + abs(dz)

def get_q(state, goal):
    key = tuple(state + goal)
    if key not in Q:
        Q[key] = np.zeros(len(actions))
    return Q[key]

def is_valid(pos):
    return all(0 <= p < GRID_SIZE for p in pos)

def agent_step(pos, action_idx):
    dx, dy, dz = actions[action_idx]
    new_pos = (pos[0] + dx, pos[1] + dy, pos[2] + dz)

    if not is_valid(new_pos):
        return pos, -1

    if new_pos == goal:
        return new_pos, 10

    new_reward = -0.1 + max(0, (MAX_DISTANCE - distance_to_goal(new_pos, goal)) / MAX_DISTANCE) * 0.5

    return new_pos, new_reward

def target_step(pos):
    dx, dy, dz = actions[random.randint(0, len(actions) - 1)]
    new_pos = (pos[0] + dx, pos[1] + dy, pos[2] + dz)
    if not is_valid(new_pos):
        return pos
    return new_pos

# Suivi du meilleur chemin
best_path = []
saved_target_path = []
shortest_length = float('inf')

for episode in range(iterations):
    state = start_agent
    goal = start_target
    path = [state]
    target_path = [goal]
    total_reward = 0

    while state != goal:
        if random.random() < epsilon:
            action_idx = random.randint(0, len(actions) - 1)
        else:
            action_idx = np.argmax(get_q(state, goal))

        next_state, reward = agent_step(state, action_idx)
        next_goal = target_step(goal)
        total_reward += reward

        old_value = get_q(state, goal)[action_idx]
        next_max = np.max(get_q(next_state, next_goal))
        get_q(state, goal)[action_idx] = old_value + alpha * (reward + gamma * next_max - old_value)

        state = next_state
        goal = next_goal
        path.append(state)
        target_path.append(goal)

        # Sécurité pour éviter des boucles infinies
        if len(path) > GRID_SIZE**3:
            break

    # Sauvegarde du meilleur chemin
    if state == goal and len(path) < shortest_length:
        best_path = path
        saved_target_path = target_path
        shortest_length = len(path)

    # Décroissance de l'exploration
    epsilon = max(min_epsilon, epsilon * epsilon_decay)

## Plot
# Ajoute des positions séparées pour l'agent et la cible
agent_positions = best_path
target_positions = saved_target_path

# Déballage des positions
x_agent, y_agent, z_agent = zip(*agent_positions)
x_target, y_target, z_target = zip(*target_positions)

fig = go.Figure()

# Trajectoire de l'agent
fig.add_trace(go.Scatter3d(
    x=x_agent, y=y_agent, z=z_agent,
    mode='lines+markers',
    name='Agent',
    line=dict(color='blue', width=4),
    marker=dict(size=3)
))

# Trajectoire de la cible
fig.add_trace(go.Scatter3d(
    x=x_target, y=y_target, z=z_target,
    mode='lines+markers',
    name='Cible (nourriture)',
    line=dict(color='orange', width=2, dash='dot'),
    marker=dict(size=3)
))

# Départ et arrivée de l'agent
fig.add_trace(go.Scatter3d(
    x=[x_agent[0]], y=[y_agent[0]], z=[z_agent[0]],
    mode='markers',
    name='Départ Agent',
    marker=dict(color='green', size=6, symbol='circle')
))
fig.add_trace(go.Scatter3d(
    x=[x_agent[-1]], y=[y_agent[-1]], z=[z_agent[-1]],
    mode='markers',
    name='Arrivée Agent',
    marker=dict(color='red', size=6, symbol='circle')
))

# Départ et arrivée de la cible
fig.add_trace(go.Scatter3d(
    x=[x_target[0]], y=[y_target[0]], z=[z_target[0]],
    mode='markers',
    name='Départ Cible',
    marker=dict(color='green', size=6, symbol='diamond')
))
fig.add_trace(go.Scatter3d(
    x=[x_target[-1]], y=[y_target[-1]], z=[z_target[-1]],
    mode='markers',
    name='Arrivée Cible',
    marker=dict(color='red', size=6, symbol='diamond')
))

fig.update_layout(
    title='Trajectoire Agent et Cible dans l\'environnement 3D',
    scene=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
    ),
    legend=dict(x=0, y=1)
)

fig.show()



In [None]:
# Construction des frames pour chaque étape
frames = []
for i in range(len(x_agent)):
    frames.append(go.Frame(
        data=[
            go.Scatter3d(x=x_agent[:i+1], y=y_agent[:i+1], z=z_agent[:i+1],
                         mode='lines+markers', name='Agent',
                         line=dict(color='blue', width=4),
                         marker=dict(size=3)),
            go.Scatter3d(x=x_target[:i+1], y=y_target[:i+1], z=z_target[:i+1],
                         mode='lines+markers', name='Cible (nourriture)',
                         line=dict(color='orange', width=2, dash='dot'),
                         marker=dict(size=3))
        ],
        name=str(i)
    ))

# Figure de base avec les premières positions uniquement
fig = go.Figure(
    data=[
        go.Scatter3d(x=[x_agent[0]], y=[y_agent[0]], z=[z_agent[0]],
                     mode='lines+markers', name='Agent',
                     line=dict(color='blue', width=4),
                     marker=dict(size=3)),
        go.Scatter3d(x=[x_target[0]], y=[y_target[0]], z=[z_target[0]],
                     mode='lines+markers', name='Cible (nourriture)',
                     line=dict(color='orange', width=2, dash='dot'),
                     marker=dict(size=3)),
        # Départ / Arrivée Agent
        go.Scatter3d(x=[x_agent[0]], y=[y_agent[0]], z=[z_agent[0]],
                     mode='markers', name='Départ Agent',
                     marker=dict(color='green', size=6, symbol='circle')),
        go.Scatter3d(x=[x_agent[-1]], y=[y_agent[-1]], z=[z_agent[-1]],
                     mode='markers', name='Arrivée Agent',
                     marker=dict(color='red', size=6, symbol='circle')),
        # Départ / Arrivée Cible
        go.Scatter3d(x=[x_target[0]], y=[y_target[0]], z=[z_target[0]],
                     mode='markers', name='Départ Cible',
                     marker=dict(color='green', size=6, symbol='diamond')),
        go.Scatter3d(x=[x_target[-1]], y=[y_target[-1]], z=[z_target[-1]],
                     mode='markers', name='Arrivée Cible',
                     marker=dict(color='red', size=6, symbol='diamond'))
    ],
    layout=go.Layout(
        height= 900,
        title='Trajectoire Agent et Cible dans l\'environnement 3D',
        scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'),
        updatemenus=[dict(
            type='buttons',
            showactive=False,
            y=1.15,
            x=1.05,
            xanchor='right',
            yanchor='top',
            buttons=[dict(label='Play',
                          method='animate',
                          args=[None, {"frame": {"duration": 100, "redraw": True},
                                       "fromcurrent": True, "transition": {"duration": 0}}]),
                     dict(label='Pause',
                          method='animate',
                          args=[[None], {"frame": {"duration": 0, "redraw": False},
                                         "mode": "immediate",
                                         "transition": {"duration": 0}}])]
        )],
        sliders=[dict(
            steps=[dict(method='animate',
                        args=[[str(k)], {"frame": {"duration": 0, "redraw": True},
                                         "mode": "immediate",
                                         "transition": {"duration": 0}}],
                        label=str(k)) for k in range(len(x_agent))],
            transition=dict(duration=0),
            x=0.1, y=0, currentvalue=dict(font=dict(size=12), prefix="Étape : ", visible=True, xanchor='right'),
            len=0.9
        )]
    ),
    frames=frames
)

fig.show()
