In [31]:
import sys
import os
sys.path.append('../')

In [32]:
from simulation.farm_env import FarmEnv
from simulation.env_wrapper import MultiAgentActionWrapper
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CheckpointCallback

In [34]:
TRAINING_STEPS = 1000000 
MODEL_NAME = "ppo_marl_super_agent.zip" # The name for our definitive super-agent

# Define directories for models and TensorBoard logs
models_dir = "../models"
logs_dir = "../logs/marl_logs/" # A dedicated folder for these logs
os.makedirs(models_dir, exist_ok=True)
os.makedirs(logs_dir, exist_ok=True)

In [35]:
print("Creating the base Multi-Agent environment...")
# 1. Create the base environment, which expects a dictionary of actions
base_env = FarmEnv()

Creating the base Multi-Agent environment...
Fetching real weather data for Lat: 30.901, Long: 75.8573 from Open-Meteo...
Sucessfully fetched and processed real weather data from Open-Meteo.


In [36]:
print("Applying the Action Wrapper...")
# 2. Wrap the complex environment to make it look simple to the agent
# The PPO agent will only interact with this wrapped_env
wrapped_env = MultiAgentActionWrapper(base_env)

print("Environment is ready for MARL training.")

Applying the Action Wrapper...
Environment is ready for MARL training.


In [37]:
model_marl = PPO(
    "MlpPolicy",
    wrapped_env,
    verbose=1,
    tensorboard_log=logs_dir # Tell the model where to save the live logs
)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [38]:
checkpoint_callback = CheckpointCallback(
  save_freq=50000,
  save_path=os.path.join(logs_dir, 'checkpoints'),
  name_prefix="marl_model"
)

In [39]:
print(f"Starting powerful MARL training for {TRAINING_STEPS} timesteps...")
print("To view live training graphs, open a new terminal, activate your environment,")
print("navigate to the project folder, and run:")
print(f"tensorboard --logdir {os.path.abspath(logs_dir)}")

# Start the long training process
model_marl.learn(
    total_timesteps=TRAINING_STEPS,
    tb_log_name="PPO_MARL_SuperAgent", # A specific name for this training run
    callback=checkpoint_callback
)

Starting powerful MARL training for 1000000 timesteps...
To view live training graphs, open a new terminal, activate your environment,
navigate to the project folder, and run:
tensorboard --logdir /Users/aadarshraj/Documents/ai_project/digital_twin_farm/logs/marl_logs
Logging to ../logs/marl_logs/PPO_MARL_SuperAgent_13


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 90       |
|    ep_rew_mean     | 942      |
| time/              |          |
|    fps             | 5170     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 90           |
|    ep_rew_mean          | 906          |
| time/                   |              |
|    fps                  | 3598         |
|    iterations           | 2            |
|    time_elapsed         | 1            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0031017647 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.2         |
|    explained_variance   | 0.000596     |
|    learning_r

<stable_baselines3.ppo.ppo.PPO at 0x32575e230>

In [40]:
final_model_path = os.path.join(models_dir, MODEL_NAME)
model_marl.save(final_model_path)

print(f"\n--- MARL Training Complete! ---")
print(f"Super-Agent model saved at: {final_model_path}")


--- MARL Training Complete! ---
Super-Agent model saved at: ../models/ppo_marl_super_agent.zip
