## Agent and Basic Target


In [None]:
import numpy as np
import plotly.graph_objects as go
import random

# Hyperparamètres
alpha = 0.1
gamma = 0.9
epsilon = 0.3
epsilon_decay = 0.995
min_epsilon = 0.01
iterations = 5000

# Taille de la grille
GRID_SIZE = 20

# Départ et but
start = (0, 0, 0)
goal = (GRID_SIZE - 1, GRID_SIZE - 1, GRID_SIZE - 1)

# Définition des actions : (dx, dy, dz)
actions = [
    (1, 0, 0), (-1, 0, 0),  # droite / gauche
    (0, 1, 0), (0, -1, 0),  # avant / arrière
    (0, 0, 1), (0, 0, -1)   # haut / bas
]

# Initialisation Q-table
Q = {}

def get_q(state):
    if state not in Q:
        Q[state] = np.zeros(len(actions))
    return Q[state]

def is_valid(pos):
    return all(0 <= p < GRID_SIZE for p in pos)

def step(pos, action_idx):
    dx, dy, dz = actions[action_idx]
    new_pos = (pos[0] + dx, pos[1] + dy, pos[2] + dz)
    if not is_valid(new_pos):
        return pos, -1  # pénalité pour tentative hors limites
    if new_pos == goal:
        return new_pos, 10
    return new_pos, -0.1

# Suivi du meilleur chemin
best_path = []
shortest_length = float('inf')

for episode in range(iterations):
    state = start
    path = [state]
    total_reward = 0

    while state != goal:
        if random.random() < epsilon:
            action_idx = random.randint(0, len(actions) - 1)
        else:
            action_idx = np.argmax(get_q(state))

        next_state, reward = step(state, action_idx)
        total_reward += reward

        old_value = get_q(state)[action_idx]
        next_max = np.max(get_q(next_state))
        get_q(state)[action_idx] = old_value + alpha * (reward + gamma * next_max - old_value)

        state = next_state
        path.append(state)

        # Sécurité pour éviter des boucles infinies
        if len(path) > GRID_SIZE**3:
            break

    # Sauvegarde du meilleur chemin
    if state == goal and len(path) < shortest_length:
        best_path = path
        shortest_length = len(path)

    # Décroissance de l'exploration
    epsilon = max(min_epsilon, epsilon * epsilon_decay)

# --- Affichage avec Plotly ---
x, y, z = zip(*best_path)


fig = go.Figure()

# Tracé du chemin
fig.add_trace(go.Scatter3d(
    x=x, y=y, z=z,
    mode='lines+markers',
    line=dict(color='blue', width=5),
    marker=dict(size=4),
    name='Best path'
))

# Départ (vert)
fig.add_trace(go.Scatter3d(
    x=[start[0]], y=[start[1]], z=[start[2]],
    mode='markers',
    marker=dict(size=8, color='green'),
    name='Start'
))

# But (rouge)
fig.add_trace(go.Scatter3d(
    x=[goal[0]], y=[goal[1]], z=[goal[2]],
    mode='markers',
    marker=dict(size=8, color='red'),
    name='Goal'
))

fig.update_layout(
    title='Best Path in 3D Grid (Q-Learning)',
    scene=dict(
        xaxis=dict(nticks=5, range=[0, GRID_SIZE]),
        yaxis=dict(nticks=5, range=[0, GRID_SIZE]),
        zaxis=dict(nticks=5, range=[0, GRID_SIZE]),
    ),
    margin=dict(l=0, r=0, b=0, t=30)
)

fig.show()


## Graph

In [None]:
def display_graph(saved_agent_path, saved_target_path):
    x_agent, y_agent, z_agent = zip(*saved_agent_path)
    x_target, y_target, z_target = zip(*saved_target_path)

    fig = go.Figure()

    # Trajectoire de l'agent
    fig.add_trace(go.Scatter3d(
        x=x_agent, y=y_agent, z=z_agent,
        mode='lines+markers',
        name='Agent',
        line=dict(color='blue', width=4),
        marker=dict(size=3)
    ))

    # Trajectoire de la cible
    fig.add_trace(go.Scatter3d(
        x=x_target, y=y_target, z=z_target,
        mode='lines+markers',
        name='Cible (nourriture)',
        line=dict(color='orange', width=2, dash='dot'),
        marker=dict(size=3)
    ))

    # Départ et arrivée de l'agent
    fig.add_trace(go.Scatter3d(
        x=[x_agent[0]], y=[y_agent[0]], z=[z_agent[0]],
        mode='markers',
        name='Départ Agent',
        marker=dict(color='green', size=6, symbol='circle')
    ))
    fig.add_trace(go.Scatter3d(
        x=[x_agent[-1]], y=[y_agent[-1]], z=[z_agent[-1]],
        mode='markers',
        name='Arrivée Agent',
        marker=dict(color='red', size=6, symbol='circle')
    ))

    # Départ et arrivée de la cible
    fig.add_trace(go.Scatter3d(
        x=[x_target[0]], y=[y_target[0]], z=[z_target[0]],
        mode='markers',
        name='Départ Cible',
        marker=dict(color='green', size=6, symbol='diamond')
    ))
    fig.add_trace(go.Scatter3d(
        x=[x_target[-1]], y=[y_target[-1]], z=[z_target[-1]],
        mode='markers',
        name='Arrivée Cible',
        marker=dict(color='red', size=6, symbol='diamond')
    ))

    fig.update_layout(
        height=600,
        title='Trajectoire Agent et Cible dans l\'environnement 3D',
        scene=dict(
            xaxis_title='X',
            yaxis_title='Y',
            zaxis_title='Z',
        ),
        legend=dict(x=0, y=1)
    )

    fig.show()

In [None]:
def display_graph_2d(saved_agent_path, saved_target_path, grid_size):
    x_agent, y_agent = zip(*saved_agent_path)
    x_target, y_target = zip(*saved_target_path)

    # Déterminer les bornes min et max de la grille
    all_x = x_agent + x_target
    all_y = y_agent + y_target
    min_x = grid_size * (min(all_x) // grid_size)
    max_x = grid_size * ((max(all_x) // grid_size) + 1)
    min_y = grid_size * (min(all_y) // grid_size)
    max_y = grid_size * ((max(all_y) // grid_size) + 1)

    fig = go.Figure()

    # Trajectoire de l'agent
    fig.add_trace(go.Scatter(
        x=x_agent, y=y_agent,
        mode='lines+markers',
        name='Agent',
        line=dict(color='blue', width=3),
        marker=dict(size=6)
    ))

    # Trajectoire de la cible
    fig.add_trace(go.Scatter(
        x=x_target, y=y_target,
        mode='lines+markers',
        name='Cible (nourriture)',
        line=dict(color='orange', width=2, dash='dot'),
        marker=dict(size=6)
    ))

    # Départ et arrivée de l'agent
    fig.add_trace(go.Scatter(
        x=[x_agent[0]], y=[y_agent[0]],
        mode='markers',
        name='Départ Agent',
        marker=dict(color='green', size=10, symbol='circle')
    ))
    fig.add_trace(go.Scatter(
        x=[x_agent[-1]], y=[y_agent[-1]],
        mode='markers',
        name='Arrivée Agent',
        marker=dict(color='red', size=10, symbol='circle')
    ))

    # Départ et arrivée de la cible
    fig.add_trace(go.Scatter(
        x=[x_target[0]], y=[y_target[0]],
        mode='markers',
        name='Départ Cible',
        marker=dict(color='green', size=10, symbol='diamond')
    ))
    fig.add_trace(go.Scatter(
        x=[x_target[-1]], y=[y_target[-1]],
        mode='markers',
        name='Arrivée Cible',
        marker=dict(color='red', size=10, symbol='diamond')
    ))

    fig.update_layout(
        height=1600,
        template='plotly_dark',
        title='Trajectoire Agent et Cible dans l\'environnement 2D',
        xaxis=dict(
            title='X',
            tickmode='linear',
            dtick=grid_size,
            range=[min_x, max_x],
            showgrid=True,
            gridwidth=1,
            gridcolor='lightgrey',
            scaleanchor='y'  # Fixe le rapport 1:1 avec l'axe Y
        ),
        yaxis=dict(
            title='Y',
            tickmode='linear',
            dtick=grid_size,
            range=[min_y, max_y],
            showgrid=True,
            gridwidth=1,
            gridcolor='lightgrey'
        ),
        legend=dict(x=0.02, y=0.98),
        template='plotly_white'
    )

    fig.show()


In [None]:
def display_animation(saved_agent_path, saved_target_path):
    x_agent, y_agent, z_agent = zip(*saved_agent_path)
    x_target, y_target, z_target = zip(*saved_target_path)

    frames = []
    for i in range(len(x_agent)):
        frames.append(go.Frame(
            data=[
                go.Scatter3d(x=x_agent[:i+1], y=y_agent[:i+1], z=z_agent[:i+1],
                             mode='lines+markers', name='Agent',
                             line=dict(color='blue', width=4),
                             marker=dict(size=3)),
                go.Scatter3d(x=x_target[:i+1], y=y_target[:i+1], z=z_target[:i+1],
                             mode='lines+markers', name='Cible (nourriture)',
                             line=dict(color='orange', width=2, dash='dot'),
                             marker=dict(size=3))
            ],
            name=str(i)
        ))

    # Figure de base avec les premières positions uniquement
    fig = go.Figure(
        data=[
            go.Scatter3d(x=[x_agent[0]], y=[y_agent[0]], z=[z_agent[0]],
                         mode='lines+markers', name='Agent',
                         line=dict(color='blue', width=4),
                         marker=dict(size=3)),
            go.Scatter3d(x=[x_target[0]], y=[y_target[0]], z=[z_target[0]],
                         mode='lines+markers', name='Cible (nourriture)',
                         line=dict(color='orange', width=2, dash='dot'),
                         marker=dict(size=3)),
            # Départ / Arrivée Agent
            go.Scatter3d(x=[x_agent[0]], y=[y_agent[0]], z=[z_agent[0]],
                         mode='markers', name='Départ Agent',
                         marker=dict(color='green', size=6, symbol='circle')),
            go.Scatter3d(x=[x_agent[-1]], y=[y_agent[-1]], z=[z_agent[-1]],
                         mode='markers', name='Arrivée Agent',
                         marker=dict(color='red', size=6, symbol='circle')),
            # Départ / Arrivée Cible
            go.Scatter3d(x=[x_target[0]], y=[y_target[0]], z=[z_target[0]],
                         mode='markers', name='Départ Cible',
                         marker=dict(color='green', size=6, symbol='diamond')),
            go.Scatter3d(x=[x_target[-1]], y=[y_target[-1]], z=[z_target[-1]],
                         mode='markers', name='Arrivée Cible',
                         marker=dict(color='red', size=6, symbol='diamond'))
        ],
        layout=go.Layout(
            height= 900,
            title='Trajectoire Agent et Cible dans l\'environnement 3D',
            scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'),
            updatemenus=[dict(
                type='buttons',
                showactive=False,
                y=1.15,
                x=1.05,
                xanchor='right',
                yanchor='top',
                buttons=[dict(label='Play',
                              method='animate',
                              args=[None, {"frame": {"duration": 100, "redraw": True},
                                           "fromcurrent": True, "transition": {"duration": 0}}]),
                         dict(label='Pause',
                              method='animate',
                              args=[[None], {"frame": {"duration": 0, "redraw": False},
                                             "mode": "immediate",
                                             "transition": {"duration": 0}}])]
            )],
            sliders=[dict(
                steps=[dict(method='animate',
                            args=[[str(k)], {"frame": {"duration": 0, "redraw": True},
                                             "mode": "immediate",
                                             "transition": {"duration": 0}}],
                            label=str(k)) for k in range(len(x_agent))],
                transition=dict(duration=0),
                x=0.1, y=0, currentvalue=dict(font=dict(size=12), prefix="Étape : ", visible=True, xanchor='right'),
                len=0.9
            )]
        ),
        frames=frames
    )

    fig.show()


In [None]:
def display_animation_2d(saved_agent_path, saved_target_path):
    x_agent, y_agent = zip(*saved_agent_path)
    x_target, y_target = zip(*saved_target_path)

    frames = []
    for i in range(len(x_agent)):
        frames.append(go.Frame(
            data=[
                go.Scatter(x=x_agent[:i+1], y=y_agent[:i+1],
                           mode='lines+markers', name='Agent',
                           line=dict(color='blue', width=4),
                           marker=dict(size=6)),
                go.Scatter(x=x_target[:i+1], y=y_target[:i+1],
                           mode='lines+markers', name='Cible (nourriture)',
                           line=dict(color='orange', width=2, dash='dot'),
                           marker=dict(size=6))
            ],
            name=str(i)
        ))

    # Figure de base avec les premières positions uniquement
    fig = go.Figure(
        data=[
            go.Scatter(x=[x_agent[0]], y=[y_agent[0]],
                       mode='lines+markers', name='Agent',
                       line=dict(color='blue', width=4),
                       marker=dict(size=6)),
            go.Scatter(x=[x_target[0]], y=[y_target[0]],
                       mode='lines+markers', name='Cible (nourriture)',
                       line=dict(color='orange', width=2, dash='dot'),
                       marker=dict(size=6)),
            # Départ / Arrivée Agent
            go.Scatter(x=[x_agent[0]], y=[y_agent[0]],
                       mode='markers', name='Départ Agent',
                       marker=dict(color='green', size=10, symbol='circle')),
            go.Scatter(x=[x_agent[-1]], y=[y_agent[-1]],
                       mode='markers', name='Arrivée Agent',
                       marker=dict(color='red', size=10, symbol='circle')),
            # Départ / Arrivée Cible
            go.Scatter(x=[x_target[0]], y=[y_target[0]],
                       mode='markers', name='Départ Cible',
                       marker=dict(color='green', size=10, symbol='diamond')),
            go.Scatter(x=[x_target[-1]], y=[y_target[-1]],
                       mode='markers', name='Arrivée Cible',
                       marker=dict(color='red', size=10, symbol='diamond'))
        ],
        layout=go.Layout(
            height=1200,
            title='Trajectoire Agent et Cible dans l\'environnement 2D',
            xaxis_title='X',
            yaxis_title='Y',
            xaxis=dict(
                tickmode='linear',
                showgrid=True,  # Afficher la grille
                gridwidth=1,  # Largeur des lignes de grille
                gridcolor='LightGrey',  # Couleur des lignes de grille
            ),
            yaxis=dict(
                tickmode='linear',
                showgrid=True,  # Afficher la grille
                gridwidth=1,  # Largeur des lignes de grille
                gridcolor='LightGrey',  # Couleur des lignes de grille
            ),
            updatemenus=[dict(
                type='buttons',
                showactive=False,
                y=1.15,
                x=1.05,
                xanchor='right',
                yanchor='top',
                buttons=[dict(label='Play',
                              method='animate',
                              args=[None, {"frame": {"duration": 100, "redraw": True},
                                           "fromcurrent": True, "transition": {"duration": 0}}]),
                         dict(label='Pause',
                              method='animate',
                              args=[[None], {"frame": {"duration": 0, "redraw": False},
                                             "mode": "immediate",
                                             "transition": {"duration": 0}}])],
            )],
            sliders=[dict(
                steps=[dict(method='animate',
                            args=[[str(k)], {"frame": {"duration": 0, "redraw": True},
                                             "mode": "immediate",
                                             "transition": {"duration": 0}}],
                            label=str(k)) for k in range(len(x_agent))],
                transition=dict(duration=0),
                x=0.1, y=0, currentvalue=dict(font=dict(size=12), prefix="Étape : ", visible=True, xanchor='right'),
                len=0.9
            )]
        ),
        frames=frames
    )

    fig.show()


## Agent and Moving Target

In [None]:
import numpy as np
import plotly.graph_objects as go
import random

# Hyperparamètres
alpha = 0.2
gamma = 0.9
epsilon = 0.3
epsilon_decay = 0.995
min_epsilon = 0.01
iterations = 3000

# Taille de la grille
GRID_SIZE = 20
MAX_DISTANCE = 3 * (GRID_SIZE - 1)

# Départ et but
start_agent = (GRID_SIZE / 2 - 1, 0, GRID_SIZE / 2 - 1)
start_target = (GRID_SIZE / 2 - 1 , GRID_SIZE - 1, GRID_SIZE / 2 - 1)

# Définition des actions : (dx, dy, dz)
actions = [
    (1, 0, 0), (-1, 0, 0),  # droite / gauche
    (0, 1, 0), (0, -1, 0),  # avant / arrière
    (0, 0, 1), (0, 0, -1)   # haut / bas
]

# Initialisation Q-table
Q = {}

def distance_to_goal(pos, goal):
    dx = pos[0] - goal[0]
    dy = pos[1] - goal[1]
    dz = pos[2] - goal[2]
    return abs(dx) + abs(dy) + abs(dz)

def get_q(state, goal):
    key = tuple(state + goal)
    if key not in Q:
        Q[key] = np.zeros(len(actions))
    return Q[key]

def is_valid(pos):
    return all(0 <= p < GRID_SIZE for p in pos)

def agent_step(pos, action_idx):
    dx, dy, dz = actions[action_idx]
    new_pos = (pos[0] + dx, pos[1] + dy, pos[2] + dz)

    if not is_valid(new_pos):
        return pos, -1

    if new_pos == goal:
        return new_pos, 10

    new_reward = -0.1 + max(0, (MAX_DISTANCE - distance_to_goal(new_pos, goal)) / MAX_DISTANCE) * 0.5

    return new_pos, new_reward

def target_step(pos):
    dx, dy, dz = actions[random.randint(0, len(actions) - 1)]
    new_pos = (pos[0] + dx, pos[1] + dy, pos[2] + dz)
    if not is_valid(new_pos):
        return pos
    return new_pos

# Suivi du meilleur chemin
saved_agent_path = []
saved_target_path = []
shortest_length = float('inf')

for episode in range(iterations):
    state = start_agent
    goal = start_target
    path = [state]
    target_path = [goal]
    total_reward = 0

    while state != goal:
        if random.random() < epsilon:
            action_idx = random.randint(0, len(actions) - 1)
        else:
            action_idx = np.argmax(get_q(state, goal))

        next_state, reward = agent_step(state, action_idx)
        next_goal = target_step(goal)
        total_reward += reward

        old_value = get_q(state, goal)[action_idx]
        next_max = np.max(get_q(next_state, next_goal))
        get_q(state, goal)[action_idx] = old_value + alpha * (reward + gamma * next_max - old_value)

        state = next_state
        goal = next_goal
        path.append(state)
        target_path.append(goal)

        # Sécurité pour éviter des boucles infinies
        if len(path) > GRID_SIZE**3:
            break

    # Sauvegarde du meilleur chemin
    if state == goal and len(path) < shortest_length:
        saved_agent_path = path
        saved_target_path = target_path
        shortest_length = len(path)

    # Décroissance de l'exploration
    epsilon = max(min_epsilon, epsilon * epsilon_decay)


display_graph(saved_agent_path, saved_target_path)

In [None]:
display_animation(saved_agent_path, saved_target_path)

## Distance based state

In [None]:
import numpy as np
import random

# Hyperparamètres
alpha = 0.2
gamma = 0.9
epsilon = 0.3
epsilon_decay = 0.995
min_epsilon = 0.01
iterations = 3000

# Taille de la grille
GRID_SIZE = 20
MAX_DISTANCE = 3 * (GRID_SIZE - 1)

# Départ et but
start_agent = (0, 0, 0)
start_target = (GRID_SIZE - 1 , GRID_SIZE - 1, GRID_SIZE - 1)

# Définition des actions : (dx, dy, dz)
actions = [
    (1, 0, 0), (-1, 0, 0),  # droite / gauche
    (0, 1, 0), (0, -1, 0),  # avant / arrière
    (0, 0, 1), (0, 0, -1)   # haut / bas
]

# Initialisation Q-table
Q = {}

def distances_to_goal(pos, goal):
    dx = pos[0] - goal[0]
    dy = pos[1] - goal[1]
    dz = pos[2] - goal[2]
    return dx, dy, dz

def distance_to_goal(pos, goal):
    dx, dy, dz = distances_to_goal(pos, goal)
    return abs(dx) + abs(dy) + abs(dz)

def get_q(state):
    if state not in Q:
        Q[state] = np.zeros(len(actions))
    return Q[state]

def is_valid(pos):
    return all(0 <= p < GRID_SIZE for p in pos)

def agent_step(pos, action_idx):
    dx, dy, dz = actions[action_idx]
    new_pos = (pos[0] + dx, pos[1] + dy, pos[2] + dz)

    if not is_valid(new_pos):
        return pos, distance_to_goal(pos, goal), -1

    if new_pos == goal:
        return new_pos, distance_to_goal(new_pos, goal), 10

    new_reward = -0.1 + max(0, (MAX_DISTANCE - distance_to_goal(new_pos, goal)) / MAX_DISTANCE) * 0.5

    return new_pos, distance_to_goal(new_pos, goal), new_reward

def target_step(pos):
    dx, dy, dz = actions[random.randint(0, len(actions) - 1)]
    new_pos = (pos[0] + dx, pos[1] + dy, pos[2] + dz)
    if not is_valid(new_pos):
        return pos
    return new_pos

# Suivi du meilleur chemin
saved_agent_path = []
saved_target_path = []
shortest_length = float('inf')

for episode in range(iterations):
    agent = start_agent
    goal = start_target

    state = distances_to_goal(agent, goal)
    total_reward = 0

    path = [agent]
    target_path = [goal]

    while agent != goal:
        if random.random() < epsilon:
            action_idx = random.randint(0, len(actions) - 1)
        else:
            action_idx = np.argmax(get_q(state))

        next_position, next_state, reward = agent_step(agent, action_idx)
        next_goal = target_step(goal)
        total_reward += reward

        old_value = get_q(state)[action_idx]
        next_max = np.max(get_q(next_state))
        get_q(state)[action_idx] = old_value + alpha * (reward + gamma * next_max - old_value)

        agent = next_position
        goal = next_goal
        state = next_state
        path.append(agent)
        target_path.append(goal)

        # Sécurité pour éviter des boucles infinies
        if len(path) > GRID_SIZE**3:
            break

    # Sauvegarde du meilleur chemin
    if agent == goal and len(path) < shortest_length:
        saved_agent_path = path
        saved_target_path = target_path
        shortest_length = len(path)

    # Décroissance de l'exploration
    epsilon = max(min_epsilon, epsilon * epsilon_decay)

## Plot

# Déballage des positions
display_graph(saved_agent_path, saved_target_path)


In [None]:
display_animation(saved_agent_path, saved_target_path)

## Penalize agent if does not reduce distance

In [None]:
import numpy as np
import random

# Hyperparamètres
alpha = 0.2
gamma = 0.9
epsilon = 0.3
epsilon_decay = 0.995
min_epsilon = 0.01
iterations = 3000

# Taille de la grille
GRID_SIZE = 20
MAX_DISTANCE = 3 * (GRID_SIZE - 1)

# Départ et but
start_agent = (0, 0, 0)
start_target = (GRID_SIZE - 1 , GRID_SIZE - 1, GRID_SIZE - 1)

# Définition des actions : (dx, dy, dz)
actions = [
    (1, 0, 0), (-1, 0, 0),  # droite / gauche
    (0, 1, 0), (0, -1, 0),  # avant / arrière
    (0, 0, 1), (0, 0, -1)   # haut / bas
]

# Initialisation Q-table
Q = {}

def distances_to_goal(pos, goal):
    dx = pos[0] - goal[0]
    dy = pos[1] - goal[1]
    dz = pos[2] - goal[2]
    return dx, dy, dz

def distance_to_goal(pos, goal):
    dx, dy, dz = distances_to_goal(pos, goal)
    return abs(dx) + abs(dy) + abs(dz)

def get_q(state):
    if state not in Q:
        Q[state] = np.zeros(len(actions))
    return Q[state]

def is_valid(pos):
    return all(0 <= p < GRID_SIZE for p in pos)

def agent_step(agent, action_idx, goal, path):
    dx, dy, dz = actions[action_idx]
    new_agent_pos = (agent[0] + dx, agent[1] + dy, agent[2] + dz)

    if not is_valid(new_agent_pos):
        return agent, distance_to_goal(agent, goal), -1

    if new_agent_pos == goal:
        return new_agent_pos, distance_to_goal(new_agent_pos, goal), 10

    if new_agent_pos in path[-10:]:
        recent_index = path[-10:].index(new_agent_pos)
        penalty = -0.2 * (recent_index + 1)
        return new_agent_pos, distance_to_goal(new_agent_pos, goal), penalty

    new_reward = -0.1 + max(0, (MAX_DISTANCE - distance_to_goal(new_agent_pos, goal)) / MAX_DISTANCE) * 0.5

    return new_agent_pos, distance_to_goal(new_agent_pos, goal), new_reward

def target_step(pos):
    dx, dy, dz = actions[random.randint(0, len(actions) - 1)]
    new_pos = (pos[0] + dx, pos[1] + dy, pos[2] + dz)
    if not is_valid(new_pos):
        return pos
    return new_pos

# Suivi du meilleur chemin
saved_agent_path = []
saved_target_path = []
shortest_length = float('inf')

for episode in range(iterations):
    agent = start_agent
    goal = start_target

    state = distances_to_goal(agent, goal)
    total_reward = 0

    path = [agent]
    target_path = [goal]

    while agent != goal:
        if random.random() < epsilon:
            action_idx = random.randint(0, len(actions) - 1)
        else:
            action_idx = np.argmax(get_q(state))

        next_position, next_state, reward = agent_step(agent, action_idx, goal, path)
        next_goal = target_step(goal)
        total_reward += reward

        old_value = get_q(state)[action_idx]
        next_max = np.max(get_q(next_state))
        get_q(state)[action_idx] = old_value + alpha * (reward + gamma * next_max - old_value)

        agent = next_position
        goal = next_goal
        state = next_state
        path.append(agent)
        target_path.append(goal)

        # Sécurité pour éviter des boucles infinies
        if len(path) > GRID_SIZE**3:
            break

    # Sauvegarde du meilleur chemin
    if agent == goal and len(path) < shortest_length:
        saved_agent_path = path
        saved_target_path = target_path
        shortest_length = len(path)

    # Décroissance de l'exploration
    epsilon = max(min_epsilon, epsilon * epsilon_decay)


## Plot
display_graph(saved_agent_path, saved_target_path)


In [None]:
display_animation(saved_agent_path, saved_target_path)

## Chemotaxis

In [None]:
import numpy as np
import random

# Hyperparamètres
alpha = 0.2
gamma = 0.9
epsilon = 0.3
epsilon_decay = 0.99
min_epsilon = 0.01
iterations = 3000

# Taille de la grille
GRID_SIZE = 20
MAX_DISTANCE = 3 * (GRID_SIZE - 1)

# Départ et but
start_agent = (0, 0, 0)
start_target = (GRID_SIZE - 1 , GRID_SIZE - 1, GRID_SIZE - 1)

# Définition des actions : (dx, dy, dz)
actions = [
    (1, 0, 0), (-1, 0, 0),  # droite / gauche
    (0, 1, 0), (0, -1, 0),  # avant / arrière
    (0, 0, 1), (0, 0, -1)   # haut / bas
]

# Initialisation Q-table
Q = {}

def distances_to_goal(pos, goal):
    dx = pos[0] - goal[0]
    dy = pos[1] - goal[1]
    dz = pos[2] - goal[2]
    return dx, dy, dz

def distance_to_goal(pos, goal):
    dx, dy, dz = distances_to_goal(pos, goal)
    return abs(dx) + abs(dy) + abs(dz)

def get_concentration(agent, goal):
    dist = distance_to_goal(agent, goal)
    return max(0.0, 1.0 - dist / MAX_DISTANCE)

def bucket(value, step=0.1):
    return round(np.floor(value / step) * step, 2)

def get_q(state):
    if state not in Q:
        Q[state] = np.zeros(len(actions))
    return Q[state]

def is_valid(pos):
    return all(0 <= p < GRID_SIZE for p in pos)

def agent_step(agent, action_idx, goal, prev_concentration):
    dx, dy, dz = actions[action_idx]
    new_agent_pos = (agent[0] + dx, agent[1] + dy, agent[2] + dz)

    if not is_valid(new_agent_pos):
        return agent, prev_concentration, 0.0, -1

    if new_agent_pos == goal:
        return new_agent_pos, 1.0, 1.0, 10

    new_concentration = get_concentration(new_agent_pos, goal)
    gradient = new_concentration - prev_concentration

    reward = gradient * 5.0 - 0.05

    return new_agent_pos, new_concentration, gradient, reward

def target_step(pos):
    dx, dy, dz = actions[random.randint(0, len(actions) - 1)]
    new_pos = (pos[0] + dx, pos[1] + dy, pos[2] + dz)
    if not is_valid(new_pos):
        return pos
    return new_pos

# Suivi du meilleur chemin
saved_agent_path = []
saved_target_path = []
shortest_length = float('inf')

for episode in range(iterations):
    agent = start_agent
    goal = start_target

    current_concentration = get_concentration(agent, goal)
    previous_concentration = current_concentration  # au début, même valeur
    state = (bucket(current_concentration), bucket(0.0))
    total_reward = 0

    path = [agent]
    target_path = [goal]

    # Pendant l'exploration :
    while agent != goal:

        if random.random() < epsilon:
            action_idx = random.randint(0, len(actions) - 1)
        else:
            action_idx = np.argmax(get_q(state))

        next_position, new_concentration, gradient, reward = agent_step(agent, action_idx, goal, previous_concentration)
        next_state = (bucket(new_concentration), bucket(gradient))

        # epsilon = np.clip(epsilon * (0.99 if gradient > 0 else 1.01), min_epsilon, 1.0)

        agent = next_position
        goal = target_step(goal)

        old_value = get_q(state)[action_idx]
        next_max = np.max(get_q(next_state))
        get_q(state)[action_idx] = old_value + alpha * (reward + gamma * next_max - old_value)

        state = next_state
        previous_concentration = current_concentration
        current_concentration = new_concentration

        path.append(agent)
        target_path.append(goal)

        if len(path) > GRID_SIZE**3:
            break


    # Sauvegarde du meilleur chemin
    if agent == goal and len(path) < shortest_length:
        saved_agent_path = path
        saved_target_path = target_path
        shortest_length = len(path)

    # Décroissance de l'exploration
    epsilon = max(min_epsilon, epsilon * epsilon_decay)


## Plot
display_graph(saved_agent_path, saved_target_path)


In [None]:
display_animation(saved_agent_path, saved_target_path)

## 2D Chemiotaxis

In [None]:
import numpy as np
import random
import plotly.graph_objects as go

# Hyperparamètres
alpha = 0.2
gamma = 0.9
epsilon = 0.3
epsilon_decay = 0.99
min_epsilon = 0.01
iterations = 500

# Taille de la grille
GRID_SIZE = 60
MAX_DISTANCE = 2 * (GRID_SIZE - 1)

# Départ et but
start_agent = (10, 10)
start_target = (50 , 50)

# Définition des actions : (dx, dy)
actions = [
    (1, 0,), (-1, 0),  # droite / gauche
    (0, 1), (0, -1),  # avant / arrière
]

# Initialisation Q-table
Q = {}

def distances_to_goal(pos, goal):
    dx = pos[0] - goal[0]
    dy = pos[1] - goal[1]
    return dx, dy

def distance_to_goal(pos, goal):
    dx, dy = distances_to_goal(pos, goal)
    return abs(dx) + abs(dy)

def get_concentration(agent, goal):
    dist = distance_to_goal(agent, goal)
    return max(0.0, 1.0 - dist / MAX_DISTANCE)

def bucket(value, step=0.1):
    return round(np.floor(value / step) * step, 2)

def get_q(state):
    if state not in Q:
        Q[state] = np.zeros(len(actions))
    return Q[state]

def is_valid(pos):
    return all(0 <= p < GRID_SIZE for p in pos)

def agent_step(agent, action_idx, goal, prev_concentration):
    dx, dy = actions[action_idx]
    new_agent_pos = (agent[0] + dx, agent[1] + dy)

    if not is_valid(new_agent_pos):
        return agent, prev_concentration, 0.0, -1

    if new_agent_pos == goal:
        return new_agent_pos, 1.0, 1.0, 10

    new_concentration = get_concentration(new_agent_pos, goal)
    gradient = new_concentration - prev_concentration

    reward = gradient * 5.0 - 0.05

    return new_agent_pos, new_concentration, gradient, reward

def target_step(pos):
    dx, dy = actions[random.randint(0, len(actions) - 1)]
    new_pos = (pos[0] + dx, pos[1] + dy)
    if not is_valid(new_pos):
        return pos
    return new_pos

# Suivi du meilleur chemin
saved_agent_path = []
saved_target_path = []
shortest_length = float('inf')

for episode in range(iterations):
    agent = start_agent
    goal = start_target

    current_concentration = get_concentration(agent, goal)
    previous_concentration = current_concentration  # au début, même valeur
    state = (bucket(current_concentration), bucket(0.0))
    total_reward = 0

    path = [agent]
    target_path = [goal]

    # Pendant l'exploration :
    while agent != goal:

        if random.random() < epsilon:
            action_idx = random.randint(0, len(actions) - 1)
        else:
            action_idx = np.argmax(get_q(state))

        next_position, new_concentration, gradient, reward = agent_step(agent, action_idx, goal, previous_concentration)
        next_state = (bucket(new_concentration), bucket(gradient))

        # epsilon = np.clip(epsilon * (0.99 if gradient > 0 else 1.01), min_epsilon, 1.0)

        agent = next_position
        goal = target_step(goal)

        old_value = get_q(state)[action_idx]
        next_max = np.max(get_q(next_state))
        get_q(state)[action_idx] = old_value + alpha * (reward + gamma * next_max - old_value)

        state = next_state
        previous_concentration = current_concentration
        current_concentration = new_concentration

        path.append(agent)
        target_path.append(goal)

        if len(path) > GRID_SIZE**3:
            break

    if episode % 100 == 0:
        print(f"Epoch : {episode}")

    # Sauvegarde du meilleur chemin
    if agent == goal and len(path) < shortest_length:
        saved_agent_path = path
        saved_target_path = target_path
        shortest_length = len(path)
        print(f"New shortest path length: {shortest_length} on episode: {episode}")

    # Décroissance de l'exploration
    epsilon = max(min_epsilon, epsilon * epsilon_decay)



## Plot
display_graph_2d(saved_agent_path, saved_target_path, GRID_SIZE)

In [None]:
display_animation_2d(saved_agent_path, saved_target_path)