In [2]:
%load_ext autoreload
%autoreload 2

from rl_games.torch_runner import Runner
import os
import yaml
import torch
import matplotlib.pyplot as plt
import gym
from IPython import display
import numpy as np
import onnx
import onnxruntime as ort
%matplotlib inline

import os
from hydra import initialize, initialize_config_module, initialize_config_dir, compose
from omegaconf import OmegaConf
# https://github.com/facebookresearch/hydra/blob/main/examples/jupyter_notebooks/compose_configs_in_notebook.ipynb

  if LooseVersion(module.__version__) < minver:
  other = LooseVersion(other)


In [3]:
!nvidia-smi -L

GPU 0: NVIDIA GeForce RTX 4090 (UUID: GPU-ab333812-0b68-185d-faad-8885aa7ed6e7)


In [4]:
abs_config_dir=os.path.abspath("./cfg")
with initialize_config_dir(version_base=None, config_dir=abs_config_dir):
    cfg=compose(config_name="config.yaml")

In [5]:
from isaacgymenvs.utils.reformat import omegaconf_to_dict, print_dict



def preprocess_train_config(cfg, config_dict):
    """
    Adding common configuration parameters to the rl_games train config.
    An alternative to this is inferring them in task-specific .yaml files, but that requires repeating the same
    variable interpolations in each config.
    """

    train_cfg = config_dict['params']['config']

    train_cfg['device'] = cfg.rl_device

    train_cfg['population_based_training'] = cfg.pbt.enabled
    train_cfg['pbt_idx'] = cfg.pbt.policy_idx if cfg.pbt.enabled else None

    train_cfg['full_experiment_name'] = cfg.get('full_experiment_name')

    print(f'Using rl_device: {cfg.rl_device}')
    print(f'Using sim_device: {cfg.sim_device}')
    print(train_cfg)

    try:
        model_size_multiplier = config_dict['params']['network']['mlp']['model_size_multiplier']
        if model_size_multiplier != 1:
            units = config_dict['params']['network']['mlp']['units']
            for i, u in enumerate(units):
                units[i] = u * model_size_multiplier
            print(
                f'Modified MLP units by x{model_size_multiplier} to {config_dict["params"]["network"]["mlp"]["units"]}'
            )
    except KeyError:
        pass

    return config_dict


rlg_config_dict = omegaconf_to_dict(cfg.train)
rlg_config_dict = preprocess_train_config(cfg, rlg_config_dict)

Using rl_device: cuda:0
Using sim_device: cuda:0
{'name': 'A1Terrain', 'full_experiment_name': None, 'env_name': 'rlgpu', 'ppo': True, 'mixed_precision': True, 'normalize_input': True, 'normalize_value': True, 'normalize_advantage': True, 'value_bootstrap': True, 'clip_actions': False, 'num_actors': 4096, 'reward_shaper': {'scale_value': 1.0}, 'gamma': 0.99, 'tau': 0.95, 'e_clip': 0.2, 'entropy_coef': 0.001, 'learning_rate': 0.0003, 'lr_schedule': 'adaptive', 'kl_threshold': 0.008, 'truncate_grads': True, 'grad_norm': 1.0, 'horizon_length': 24, 'minibatch_size': 16384, 'mini_epochs': 5, 'critic_coef': 2, 'clip_value': True, 'seq_length': 4, 'bounds_loss_coef': 0.0, 'max_epochs': 1500, 'save_best_after': 100, 'score_to_win': 20000, 'save_frequency': 50, 'print_stats': True, 'player': {'games_num': 100000}, 'device': 'cuda:0', 'population_based_training': False, 'pbt_idx': None}


In [6]:
runner = Runner()
runner.load(rlg_config_dict)

# agent = runner.create_player()


self.seed = 42


In [7]:
onnx_model_path = "/home/grl/repo/RobotsMakingRobots/legged_env/outputs/Biped/True/20240627_182618/biped.onnx"
onnx_model = onnx.load(onnx_model_path)


In [8]:
onnx_model.graph.input

[name: "obs"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 1
      }
      dim {
        dim_value: 42
      }
    }
  }
}
]

In [9]:
ort_model = ort.InferenceSession(onnx_model_path)

outputs = ort_model.run(
    None,
    {"obs": np.zeros((1, 42)).astype(np.float32)},
)
print(outputs)

[array([[-0.3042116 , -0.19681515,  0.37351635, -0.26178414,  0.16354656,
        -0.09075491,  0.13380232,  0.2533207 , -0.59685737, -0.20561233]],
      dtype=float32), array([[-3.3247182, -2.7401602, -2.6324568, -2.6378489, -2.7127957,
        -3.351093 , -2.763488 , -2.6127214, -2.631418 , -2.7043083]],
      dtype=float32), array([[0.3541862]], dtype=float32)]


In [11]:
outputs[2]

array([[0.3541862]], dtype=float32)

In [23]:
mu = outputs[0]
sigma = np.exp(outputs[1])
# action = np.random.normal(mu, sigma)
action = mu


In [24]:
mu

array([[-0.3042116 , -0.19681515,  0.37351635, -0.26178414,  0.16354656,
        -0.09075491,  0.13380232,  0.2533207 , -0.59685737, -0.20561233]],
      dtype=float32)

In [25]:
sigma

array([[0.03598266, 0.06456   , 0.0719016 , 0.07151494, 0.06635105,
        0.03504603, 0.06307139, 0.07333469, 0.07197633, 0.0669166 ]],
      dtype=float32)

In [4]:
import numpy as np
from operator import itemgetter

obs_names = [
            "linearVelocity", 
            "angularVelocity", 
            "projectedGravity", 
            "commands", 
            "dofPosition", 
            "dofVelocity", 
            "actions"
        ]


        
    

In [12]:
import onnx
import onnxruntime as ort
onnx_model_path = "/home/grl/repo/RobotsMakingRobots/legged_env/outputs/Biped/export/20240628_100823/policy.onnx"
ort_model = ort.InferenceSession(onnx_model_path)

def get_action(obs):
    mu, log_std, value = ort_model.run(
    None,
    {
        "obs": np.asarray(obs,dtype=np.float32).reshape(1,-1)},
    )
    return mu

In [13]:
get_action(obs_buf)

array([[-0.3311361 , -0.20155035,  0.44653955, -0.67294735,  0.34802103,
        -0.04197904,  0.14953172,  0.43407536, -0.25228912, -0.10573124]],
      dtype=float32)

In [7]:
import onnx
import onnxruntime as ort
import numpy as np
from operator import itemgetter


class Model:
    def __init__(self, onnx_model_path) -> None:
        self.ort_model = ort.InferenceSession(onnx_model_path)
                
        self.obs_names = [
            # "linearVelocity", 
            "angularVelocity", 
            "projectedGravity", 
            "commands", 
            "dofPosition", 
            "dofVelocity", 
            "actions"
        ]
        
    def get_action(self,obs):
        mu, log_std, value = self.ort_model.run(
        None,
        {
            "obs": np.asarray(obs,dtype=np.float32).reshape(1,-1)},
        )
        return mu
    
onnx_model_path = "/home/grl/repo/RobotsMakingRobots/legged_env/outputs/Biped/export/biped_c2r5h3/policy.onnx"
model = Model(onnx_model_path)


obs_dict =  {
    # 'linearVelocity': sensor_data['linearVelocity'], # DOES NOT HAVE THIS IN REAL
    'linearVelocity': np.zeros(3), # TODO: Fix this
    'angularVelocity':np.zeros(3), # TODO: Fix this
    'projectedGravity': np.array([0.0, 0.0, -1]),
    'commands': np.zeros(3), # TODO: Fix this
    "dofPosition": np.zeros(10),
    "dofVelocity": np.zeros(10),
    # "heightMap": 
    "actions": np.zeros(10), # TODO: Fix this
}
obs_buf = np.concatenate(itemgetter(*model.obs_names)(obs_dict), axis=-1)
print(obs_buf)
actions = model.get_action(obs_buf)
print(actions)

[ 0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.]
[[ 0.05191529 -0.09791541  0.03827243 -0.32177296  0.16737801 -0.038475
   0.06746499 -0.1290952   0.3078883  -0.14461225]]
