In [1]:
!pip install pyflyt
!pip uninstall stable-baselines3 sb3_contrib -y
!pip install stable-baselines3 sb3_contrib
!pip install stable-baselines3[extra]

Collecting pyflyt
  Downloading PyFlyt-0.21.0-py3-none-any.whl.metadata (5.0 kB)
Collecting pybullet (from pyflyt)
  Downloading pybullet-3.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Downloading PyFlyt-0.21.0-py3-none-any.whl (198 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m198.8/198.8 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading pybullet-3.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (103.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.2/103.2 MB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pybullet, pyflyt
Successfully installed pybullet-3.2.6 pyflyt-0.21.0
Found existing installation: stable-baselines3 2.1.0
Uninstalling stable-baselines3-2.1.0:
  Successfully uninstalled stable-baselines3-2.1.0
[0mCollecting stable-baselines3
  Downloading stable_baselines3-2.3.2-py3-none-any.whl.metadata (5

In [2]:
# !rm -r *
# !rm -r sac_rocket_tensorboard custom_tensorboard checkpoints monitor_eval_logs.monitor.csv

In [3]:
from PyFlyt.core.drones import Rocket
import numpy as np

import gymnasium as gym
import PyFlyt.gym_envs

from stable_baselines3 import DQN, SAC
# from sb3_contrib import SAC
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.policies import ActorCriticPolicy
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import BaseCallback
import time
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

pybullet build time: Nov 28 2023 23:45:17
2024-05-18 22:15:22.522346: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-18 22:15:22.522508: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-18 22:15:22.640386: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [4]:
class AggregateRewardLoggingCallback(BaseCallback):
    def __init__(self, log_dir='custom_tensorboard/',verbose=0, check_freq=1000):
        super(AggregateRewardLoggingCallback, self).__init__(verbose)
        self.best_mean_reward = -np.inf
        self.check_freq = check_freq
        self.log_dir = log_dir
        self.writer = SummaryWriter(log_dir=self.log_dir)
 
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            all_rewards = []
            # Loop through all wrapped environments
            for env in self.training_env.envs:
                # Check and collect rewards from each environment
                if hasattr(env, 'get_episode_rewards'):
                    episode_rewards = env.get_episode_rewards()  # Use if method is available
                else:
                    episode_rewards = env.episode_rewards  # Direct attribute access if method is not available
                all_rewards.extend(episode_rewards)  # Collecting rewards from all environments
 
            # Compute overall mean reward across all environments
            mean_reward = np.mean(all_rewards)
            self.writer.add_scalar("Mean Episode Reward", mean_reward, self.num_timesteps)
            if mean_reward > self.best_mean_reward:
                self.best_mean_reward = mean_reward
                print(f"New best mean reward across all envs: {self.best_mean_reward}")
        return True
    def _on_training_end(self):
        self.writer.close()

In [14]:
def make_env():
    def _init():
        env = gym.make("PyFlyt/Rocket-Landing-v1")
        env = Monitor(env, "./monitor_train_logs")  # Monitor for tracking metrics
#         env = FrameSkip(env, skip=4)
#         env = HistoryWrapper(env, horizon=2)
        return env
    return _init

In [5]:
# Create 16 parallel training environments using SubprocVecEnv
# n_envs = 10
# train_env = DummyVecEnv([make_env() for _ in range(n_envs)])  # Parallel environments
train_env = gym.make("PyFlyt/Rocket-Landing-v1")
# Create the evaluation environment
eval_env = Monitor(gym.make("PyFlyt/Rocket-Landing-v1"), "./monitor_eval_logs")

# Define a CheckpointCallback for model saving
checkpoint_callback = CheckpointCallback(
    save_freq=1000,
    save_path="./checkpoints/",
    name_prefix="sac_model",
)

In [16]:
policy_kwargs = {
    "net_arch": [256, 256, 128, 64],  # Architecture for the policy network
    "activation_fn": nn.ReLU,  # Properly reference activation functions
}

model = SAC(
    "MlpPolicy",
    train_env,
    buffer_size=100000,
    learning_rate=0.0003,
    batch_size=256,
    tau=0.005,
    gamma=0.99,
    train_freq=64,
    gradient_steps=64,
    use_sde=True,
    use_sde_at_warmup=True,
    ent_coef='auto',
    tensorboard_log="./sac_rocket_tensorboard/",
    verbose=1,
    policy_kwargs=policy_kwargs
)
# model = SAC(
#     "MlpPolicy",
#     train_env,
#     learning_rate=0.0003,
#     batch_size=256,
#     tensorboard_log="./sac_rocket_tensorboard/",
#     verbose=1,
#     policy_kwargs=policy_kwargs
# )

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [6]:
model = SAC.load('/kaggle/working/sac_model_1000000_steps.zip', env=train_env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [16]:
new_lr = 0.00005
for param_group in model.actor.optimizer.param_groups:
    param_group['lr'] = new_lr

for param_group in model.critic.optimizer.param_groups:
    param_group['lr'] = new_lr

model.ent_coef_optimizer.param_groups[0]['lr'] = new_lr

In [None]:
model.learn(total_timesteps=1000000,
            callback=[checkpoint_callback, AggregateRewardLoggingCallback(check_freq=1000)])


[A                             [A
Logging to ./sac_rocket_tensorboard/SAC_2
argv[0]=
[A                             [A
argv[0]=
New best mean reward across all envs: -11233.758690691642
[A                             [A
argv[0]=
[A                             [A
argv[0]=
New best mean reward across all envs: -11223.348139479886
[A                             [A
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 572       |
|    ep_rew_mean     | -8.92e+03 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 48        |
|    time_elapsed    | 47        |
|    total_timesteps | 2288      |
| train/             |           |
|    actor_loss      | 1.66e+03  |
|    critic_loss     | 4.23e+03  |
|    ent_coef        | 0.454     |
|    ent_coef_loss   | -0.0629   |
|    learning_rate   | 0.0003    |
|    n_updates       | 1299584   |
|    std             | 0.00456   |
-------------------------------

In [8]:
model.save("sac_rocket_landing_pretrained_v6")

In [1]:
def get_last_model(check_point_path="/kaggle/working/checkpoints"):
    import os
    return os.path.join(check_point_path, sorted(os.listdir(check_point_path),
       key=lambda x : int(x.split("_")[2]),
      reverse=True)[0])
def copy_last_model(check_point_path="/kaggle/working/checkpoints", dst="/kaggle/working/"):
    import shutil
    src = get_last_model(check_point_path)
    shutil.copy(src, dst)
    print(f"Copied the model {src.split('/')[-1]}")

In [2]:
copy_last_model()

Copied the model sac_model_1000000_steps.zip


In [None]:
import os
import shutil
os.listdir('/kaggle/working/sac_rocket_tensorboard/SAC_3')

In [None]:
shutil.copy('/kaggle/working/sac_rocket_tensorboard/SAC_3/events.out.tfevents.1716033260.ad79dafc9970.359.0',
            '/kaggle/working/')