# Train BeeWorld with TD3 model

### Preparation for colab

In [1]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
!pip install gymnasium
!pip install stable_baselines3, moviepy
!git clone https://github.com/alTeska/rl-bee-multimodal-sensing.git
!mv rl-bee-multimodal-sensing/bee.py ./

## Load and setup

In [3]:
import os

from utils import create_directory, set_device
from train import new_model, load_existing_model, custom_training

DEVICE = set_device()



In [4]:
# define config as a dict
config = {
    'setup': {
        'path': 'models/bee-world/',
        'gym_name': 'BeeWorld',
        'algorithm': 'TD3',
        'alias': 'round1',
        'continue_training': False,
        'old_alias': ''
    },
    'train': {
        'timesteps': 2000,
        'iter_max': 10,
        'policy_kwargs': {'net_arch': [100, 100], 'activation_fn': 'ReLU'},
        'learning_rate': 0.001
    },
    'test': {
        'prediction_step': 1000
    }
}

In [5]:

# or Load it from your yml file
# with open("config.yaml", "r") as file:
    # config = yaml.safe_load(file)

## Setup model and environment

In [6]:
tensorboard_path = os.path.join("/content/drive/", config['setup']['path'], config['setup']['alias'], "logs/")
tensorboard_path

'/content/drive/models/bee-world/round1/logs/'

In [7]:
%load_ext tensorboard
%tensorboard --logdir '/content/drive/models/bee-world/round1/logs/' --port=80

ERROR: Failed to launch TensorBoard (exited with 1).
Contents of stderr:
Permission denied

In [8]:
custom_training(config)

Creating a new model
Logging to models/bee-world/round1/logs
Using cpu device
Wrapping the env in a DummyVecEnv.
Eval num_timesteps=1000, episode_reward=570.73 +/- 513.31
Episode length: 559.20 +/- 397.44
Success rate: 60.00%
---------------------------------
| eval/              |          |
|    mean_ep_length  | 559      |
|    mean_reward     | 571      |
|    success_rate    | 0.6      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
New best mean reward!
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 477      |
|    ep_rew_mean     | 679      |
|    success_rate    | 0.75     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 472      |
|    time_elapsed    | 4        |
|    total_timesteps | 1907     |
| train/             |          |
|    actor_loss      | -2.49    |
|    critic_loss     | 1.84e+03 |
|    learning_rate   | 0.001    |
|   

## Train a bit more - NOT ADVISED - better to re run the notebook for your analysis

In [9]:

config["setup"]["continue_training"] = True
config['setup']['alias'] = 'round2'
config['setup']['old_alias'] = 'round1'

In [10]:
custom_training(config)

Loading existing model
Logging to models/bee-world/round2/logs


  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 319      |
|    ep_rew_mean     | 802      |
|    success_rate    | 0.875    |
| time/              |          |
|    episodes        | 8        |
|    fps             | 947      |
|    time_elapsed    | 0        |
|    total_timesteps | 2645     |
| train/             |          |
|    actor_loss      | -4.79    |
|    critic_loss     | 11.4     |
|    learning_rate   | 0.001    |
|    n_updates       | 2230     |
---------------------------------
Eval num_timesteps=3000, episode_reward=642.76 +/- 633.96
Episode length: 262.50 +/- 371.81
Success rate: 80.00%
---------------------------------
| eval/              |          |
|    mean_ep_length  | 262      |
|    mean_reward     | 643      |
|    success_rate    | 0.8      |
| time/              |          |
|    total_timesteps | 3000     |
| train/             |          |
|    actor_loss      | -14.3    |
|    critic_loss     | 6.02e+03 |
|  