# Shield Adapter

In [1]:
import torch
import omnisafe
from omnisafe.adapter import ShieldAdapter
from omnisafe.utils.config import get_default_kwargs_yaml, Config

train_terminal_cfgs = {
    "algo": "PPO",
    "env_id": "SafetyPointGoal1-v0",
    "parallel": 1,
    "total_steps": 1638400,
    "device": "cpu",
    "vector_env_nums": 64,
    "torch_threads": 16,
}

agent = omnisafe.Agent(
    train_terminal_cfgs["algo"],
    train_terminal_cfgs["env_id"],
    train_terminal_cfgs=train_terminal_cfgs,
)
cfgs: Config = agent.cfgs

data_dir = "/home/juntao/workspace/my_omnisafe/experiments/data"


  from .autonotebook import tqdm as notebook_tqdm


Loading PPO.yaml from /home/juntao/workspace/my_omnisafe/omnisafe/utils/../configs/on-policy/PPO.yaml


In [2]:
print(cfgs)
print(type(cfgs))
env = ShieldAdapter(
    train_terminal_cfgs["env_id"], cfgs.train_cfgs.vector_env_nums, cfgs.seed, cfgs
)

{'seed': 0, 'train_cfgs': {'device': 'cpu', 'torch_threads': 16, 'vector_env_nums': 64, 'parallel': 1, 'total_steps': 1638400, 'algo': 'PPO', 'env_id': 'SafetyPointGoal1-v0', 'epochs': 50}, 'algo_cfgs': {'update_cycle': 32768, 'update_iters': 40, 'batch_size': 64, 'target_kl': 0.02, 'entropy_coef': 0.0, 'reward_normalize': True, 'cost_normalize': True, 'obs_normalize': True, 'kl_early_stop': True, 'use_max_grad_norm': True, 'max_grad_norm': 40.0, 'use_critic_norm': True, 'critic_norm_coef': 0.001, 'gamma': 0.99, 'cost_gamma': 0.99, 'lam': 0.95, 'lam_c': 0.95, 'clip': 0.2, 'adv_estimation_method': 'gae', 'standardized_rew_adv': True, 'standardized_cost_adv': True, 'penalty_coef': 0.0, 'use_cost': False}, 'logger_cfgs': {'use_wandb': False, 'wandb_project': 'omnisafe', 'use_tensorboard': True, 'save_model_freq': 100, 'log_dir': './runs', 'window_lens': 100}, 'model_cfgs': {'weight_initialization_mode': 'kaiming_uniform', 'actor_type': 'gaussian_learning', 'linear_lr_decay': True, 'explor

In [3]:
steps = 16_000_000
data = env.random_exploration(steps=steps)
import os
os.makedirs(data_dir, exist_ok=True)
filename = f"random_{cfgs.train_cfgs.env_id}_{steps}.pt"
filename = os.path.join(data_dir, filename)
torch.save(data, filename)

100%|██████████| 250000/250000 [1:16:26<00:00, 54.51it/s]  


Collected 16000000 random samples.


In [4]:
def flatten_vectors(vectors):
    return vectors.transpose(0, 1).flatten(0, 1)

flatten_data = {k: flatten_vectors(v) for k, v in data.items()}
flatten_data.keys()
for k, v in flatten_data.items():
    print(f"{k} shape: {v.shape}")

obs shape: torch.Size([16000000, 60])
act shape: torch.Size([16000000, 2])
reward shape: torch.Size([16000000])
cost shape: torch.Size([16000000])
done shape: torch.Size([16000000])
next_obs shape: torch.Size([16000000, 60])
