In [1]:
%load_ext autoreload
%autoreload 2

import env
import agent
import reward
import simulate

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import argparse
import time
from typing import Tuple, Optional, Dict

from env import MazeEnv
from agent import MyAgent


def simulation_config(config_path: str, new_agent: bool = True) -> Tuple[MazeEnv, Optional[MyAgent], Dict]:
    """
    Configure the environment and optionally an agent using a JSON configuration file.

    Args:
        config_path (str): Path to the configuration JSON file.
        new_agent (bool): Whether to initialize the agent. Defaults to True.

    Returns:
        Tuple[MazeEnv, Optional[MyAgent], Dict]: Configured environment, agent (if new), and the configuration dictionary.
    """
    
    # Read config
    with open(config_path, 'r') as config_file:
        config = json.load(config_file)

    # Env configuration
    env = MazeEnv(
        size=config.get('grid_size'),                               # Grid size
        walls_proportion=config.get('walls_proportion'),            # Walls proportion in the grid
        num_dynamic_obstacles=config.get('num_dynamic_obstacles'),  # Number of dynamic obstacles
        num_agents=config.get('num_agents'),                        # Number of agents
        communication_range=config.get('communication_range'),      # Maximum distance for agent communications
        max_lidar_dist_main=config.get('max_lidar_dist_main'),      # Maximum distance for main LIDAR scan
        max_lidar_dist_second=config.get('max_lidar_dist_second'),  # Maximum distance for secondary LIDAR scan
        max_episode_steps=config.get('max_episode_steps'),          # Number of steps before episode termination
        render_mode=config.get('render_mode', None),
        seed=config.get('seed', None)                               # Seed for reproducibility
    )

    # Agent configuration
    agent = MyAgent(num_agents=config.get('num_agents')) if new_agent else None

    return env, agent, config


def plot_cumulated_rewards(rewards: list, interval: int = 100):
    """
    Plot and save the rewards over episodes.

    Args:
        rewards (list): List of total rewards per episode.
        interval (int): Interval between ticks on the x-axis (default is 100).
    """
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(rewards)+1), rewards, color='blue', marker='o', linestyle='-')
    plt.title('Total Cumulated Rewards per Episode')
    plt.xlabel('Episodes')
    
    # Adjust x-ticks to display every 'interval' episodes
    xticks = range(1, len(rewards)+1, interval)
    plt.xticks(xticks)
    
    plt.ylabel('Cumulated Rewards')
    plt.grid(True)
    plt.savefig('reward_curve_per_episode.png', dpi=300)
    plt.show()


def train(config_path: str) -> MyAgent:
    """
    Train an agent on the configured environment.

    Args:
        config_path (str): Path to the configuration JSON file.

    Returns:
        MyAgent: The trained agent.
    """

    # Environment and agent configuration
    env, agent, config = simulation_config(config_path)
    max_episodes = config.get('max_episodes')

    # Metrics to follow the performance
    all_rewards = []
    total_reward = 0
    episode_count = 0
    
    # Initial reset of the environment
    state, info = env.reset()
    time.sleep(1)

    try:
        while episode_count < max_episodes:
            # Determine agents actions
            print("STATE : ", state)
            prepro = agent.process_states(state)
            print("PREPRO : ", prepro)
            actions = agent.get_action(state)
            print(actions)
            # Execution of a simulation step
            state, rewards, terminated, truncated, info = env.step(actions)
            print(state)
            return state
            print(rewards)
            # print( terminated)
            # print( truncated)
            # print(info)
            total_reward += np.sum(rewards)

            # Update agent policy
            agent.update_policy(actions, state, rewards)

            # Display of the step information
            print(f"\rEpisode {episode_count + 1}, Step {info['current_step']}, "
                  f"Reward: {total_reward:.2f}, "
                  f"Evacuated: {len(info['evacuated_agents'])}, "
                  f"Deactivated: {len(info['deactivated_agents'])}", end='')
    except: pass


state = train("config.json")

PREPRO :  [array([4, 4, 0, 2, 1, 1, 1, 2, 1, 4, 4]), array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1])]
action 0  [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
[0, 0]
[[4. 4. 0. 0. 0. 0. 2. 1. 1. 1. 2. 1.]]


In [27]:
print(state)

[[29. 29.  0.  0.  0.  0.  1.  3.  1.  1.  1.  3. 28. 29.  0.  0.  4.  1.
   1.  1.  1.  3. 29. 28.  1.  0.  1.  3.  1.  1.  1.  3. 28. 28.  1.  0.
   1.  3.  1.  3.  1.  1.]
 [28. 29.  0.  0.  1.  0.  4.  1.  1.  1.  1.  3. 29. 29.  0.  0.  1.  3.
   1.  1.  1.  3. 29. 28.  1.  0.  1.  3.  1.  1.  1.  3. 28. 28.  1.  0.
   1.  3.  1.  3.  1.  1.]
 [29. 28.  1.  0.  0.  1.  1.  3.  1.  1.  1.  3. 29. 29.  0.  0.  1.  3.
   1.  1.  1.  3. 28. 29.  0.  0.  4.  1.  1.  1.  1.  3. 28. 28.  1.  0.
   1.  3.  1.  3.  1.  1.]
 [28. 28.  1.  0.  1.  1.  1.  3.  1.  3.  1.  1. 29. 29.  0.  0.  1.  3.
   1.  1.  1.  3. 28. 29.  0.  0.  4.  1.  1.  1.  1.  3. 29. 28.  1.  0.
   1.  3.  1.  1.  1.  3.]]
