# Train BeeWorld with TD3 model

### Preparation for colab

In [1]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
!pip install gymnasium
!pip install stable_baselines3, moviepy
!git clone https://github.com/alTeska/rl-bee-multimodal-sensing.git
!mv rl-bee-multimodal-sensing/bee.py ./

## Load and setup

In [3]:
import os
from train_model import custom_training
from utils import create_directory, set_device

DEVICE = set_device()



In [4]:
# define config as a dict
config = {
    "setup": {
        "path": "models/bee-world-1000/",
        "alias": "round1",
        "continue_training": False,
        "old_alias": "round1"
    },
    "env": {
        "algorithm": "TD3",
        "gym_name": "BeeWorld",
        "render_mode": "rgb_array",
        "walls": [[[5.0, 0.0], [5.0, 5.0]]],
        "goal_size": 1.0,
        "video": True
    },
    "train": {
        "timesteps": 500_000,
        "max_episode_steps": 1000,
        "policy_kwargs": {"net_arch": [200, 100], "activation_fn": "ReLU"},
        "learning_rate": 0.001,
        "max_no_improvement_evals": 100,
        "eval_freq": 1000
    },
    "test": {
        "prediction_steps": 1000
    }
}


In [5]:

# or Load it from your yml file
# with open("config.yaml", "r") as file:
    # config = yaml.safe_load(file)

## Setup model and environment

In [6]:
tensorboard_path = os.path.join("/content/drive/", config['setup']['path'], config['setup']['alias'], "logs/")
tensorboard_path

'/content/drive/models/bee-world/round1/logs/'

In [7]:
%load_ext tensorboard
%tensorboard --logdir '/content/drive/models/bee-world/round1/logs/' --port=80

ERROR: Failed to launch TensorBoard (exited with 1).
Contents of stderr:
Permission denied

In [8]:
custom_training(config)

Logging to models/bee-world/round1/logs
Creating a new model
Using cpu device
Wrapping the env in a DummyVecEnv.
Eval num_timesteps=1000, episode_reward=739.90 +/- 466.68
Episode length: 254.00 +/- 261.35
Success rate: 90.00%
---------------------------------
| eval/              |          |
|    mean_ep_length  | 254      |
|    mean_reward     | 740      |
|    success_rate    | 0.9      |
| time/              |          |
|    total_timesteps | 1000     |
| train/             |          |
|    actor_loss      | -4.66    |
|    critic_loss     | 2.8e+03  |
|    learning_rate   | 0.001    |
|    n_updates       | 667      |
---------------------------------
New best mean reward!
Eval num_timesteps=2000, episode_reward=854.33 +/- 335.35
Episode length: 309.60 +/- 300.65
Success rate: 90.00%
---------------------------------
| eval/              |          |
|    mean_ep_length  | 310      |
|    mean_reward     | 854      |
|    success_rate    | 0.9      |
| time/              |     

## Train a bit more - NOT ADVISED - better to re run the notebook for your analysis

In [9]:

config["setup"]["continue_training"] = True
config['setup']['alias'] = 'round2'
config['setup']['old_alias'] = 'round1'

In [10]:
custom_training(config)

Logging to models/bee-world/round2/logs


  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


Loading existing model
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 236      |
|    ep_rew_mean     | 960      |
|    success_rate    | 1        |
| time/              |          |
|    episodes        | 4        |
|    fps             | 446      |
|    time_elapsed    | 0        |
|    total_timesteps | 2111     |
| train/             |          |
|    actor_loss      | -11.7    |
|    critic_loss     | 3.72e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1274     |
---------------------------------
Eval num_timesteps=3000, episode_reward=672.48 +/- 383.39
Episode length: 610.50 +/- 406.45
Success rate: 90.00%
---------------------------------
| eval/              |          |
|    mean_ep_length  | 610      |
|    mean_reward     | 672      |
|    success_rate    | 0.9      |
| time/              |          |
|    total_timesteps | 3000     |
| train/             |          |
|    actor_loss      | -45      |
|    critic_lo