# Run Bee World Environment

In [1]:
import gymnasium as gym
from bee import BeeWorld

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation

from IPython.display import HTML


In [2]:
def display_video(frames, framerate=30):
  """Generates video from `frames`.

  Args:
    frames (ndarray): Array of shape (n_frames, height, width, 3).
    framerate (int): Frame rate in units of Hz.

  Returns:
    Display object.
  """
  height, width, _ = frames[0].shape
  dpi = 70
  orig_backend = matplotlib.get_backend()
  matplotlib.use('Agg')  # Switch to headless 'Agg' to inhibit figure rendering.
  fig, ax = plt.subplots(1, 1, figsize=(width / dpi, height / dpi), dpi=dpi)
  matplotlib.use(orig_backend)  # Switch back to the original backend.
  ax.set_axis_off()
  ax.set_aspect('equal')
  ax.set_position([0, 0, 1, 1])
  im = ax.imshow(frames[0])
  
  def update(frame):
    im.set_data(frame)
    return [im]
  interval = 1000/framerate
  anim = animation.FuncAnimation(fig=fig, func=update, frames=frames,
                                  interval=interval, blit=True, repeat=False)
  return HTML(anim.to_html5_video())

## Initialize Gym

In [3]:
gym.register(
    id="BeeWorld",
    entry_point=BeeWorld,
    max_episode_steps=3000,
)

env = gym.make("BeeWorld", render_mode="rgb_array")
env.reset()

({'vision': 0, 'smell': array([0.00433557])}, {})

## Initialize the RL model

In [None]:
# action_noise = NormalActionNoise(
# mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)
# )

action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.1)

policy_kwargs = {
    "net_arch": [64, 64],  # Specify the number of hidden units per layer
    "activation_fn": nn.ReLU,  # Specify the activation function
}

model = TD3(
    "MultiInputPolicy",
    env,
    action_noise=action_noise,
    verbose=1,
    policy_kwargs=policy_kwargs,
)
model.learn(total_timesteps=10000, log_interval=10)

vec_env = model.get_env()
obs = vec_env.reset()

In [None]:
tionNoise(mean=np.zeros(n_actions), sigma=0.1)

policy_kwargs = {
    "net_arch": [64, 64],  # Specify the number of hidden units per layer
    "activation_fn": nn.ReLU,  # Specify the activation function
}

model = TD3(
    "MultiInputPolicy",
    env,
    action_noise=action_noise,
    verbose=1,
    policy_kwargs=policy_kwargs,
)
model.learn(total_timesteps=10000, log_interval=10)

vec_env = model.get_env()
obs = vec_env.reset()

## Run the model 

In [6]:

frames = []
  
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = vec_env.step(action)
    frames.append(env.render())

env.close()
model.save("test")


In [7]:
display_video(frames, framerate=10)