In [1]:
from stable_baselines3 import DDPG

from rl_training.two_wheel_robot import TwoWheelRobot

env = TwoWheelRobot()


model = DDPG("MlpPolicy", env, verbose=1)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [2]:
obs = env.reset()
env.render()
model.learn(total_timesteps=1000)

for i in range(1000):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    env.render()
    if done:
        break

Creating window glfw
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 55.8      |
|    ep_rew_mean     | -1.94e+03 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 91        |
|    time_elapsed    | 2         |
|    total_timesteps | 223       |
| train/             |           |
|    actor_loss      | 53.7      |
|    critic_loss     | 215       |
|    learning_rate   | 0.001     |
|    n_updates       | 163       |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 37.4     |
|    ep_rew_mean     | -1.2e+03 |
| time/              |          |
|    episodes        | 8        |
|    fps             | 81       |
|    time_elapsed    | 3        |
|    total_timesteps | 299      |
| train/             |          |
|    actor_loss      | 62.1     |
|    critic_loss     | 127      |
|    learning_rate   | 0.001    |
|    n_updat

In [6]:
import torch

model.actor

Actor(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mu): Sequential(
    (0): Linear(in_features=45, out_features=400, bias=True)
    (1): ReLU()
    (2): Linear(in_features=400, out_features=300, bias=True)
    (3): ReLU()
    (4): Linear(in_features=300, out_features=2, bias=True)
    (5): Tanh()
  )
)

In [15]:
x = torch.randn(1, len(obs), requires_grad=False, device="cuda")

In [16]:
torch.onnx.export_to_pretty_string(model.actor, x, "actor_model.onnx")


'ModelProto {\n  producer_name: "pytorch"\n  domain: ""\n  doc_string: ""\n  graph:\n    GraphProto {\n      name: "torch-jit-export"\n      inputs: [{name: "obs", type:Tensor dtype: 1, Tensor dims: 1 45}]\n      outputs: [{name: "14", type:Tensor dtype: 1, Tensor dims: 1 2}]\n      value_infos: []\n      initializers: [TensorProto shape: [400 45],TensorProto shape: [400],TensorProto shape: [300 400],TensorProto shape: [300],TensorProto shape: [2 300],TensorProto shape: [2]]\n      nodes: [\n        Node {type: "Cast", inputs: [obs], outputs: [7], attributes: [{ name: \'to\', type: int, value: 1}]},\n        Node {type: "Flatten", inputs: [7], outputs: [8], attributes: [{ name: \'axis\', type: int, value: 1}]},\n        Node {type: "Gemm", inputs: [8,mu.0.weight,mu.0.bias], outputs: [9], attributes: [{ name: \'alpha\', type: float, value: 1},{ name: \'beta\', type: float, value: 1},{ name: \'transB\', type: int, value: 1}]},\n        Node {type: "Relu", inputs: [9], outputs: [10], attr

In [17]:
torch.onnx.export(model.actor, x, "actor_model.onnx")


In [18]:
import onnx


In [19]:
onnx_model = onnx.load("actor_model.onnx")  # load onnx model

In [5]:
import struct

struct.unpack_from('2f',onnx_model.graph.initializer[5].raw_data)



(-0.030470559373497963, 0.006223668809980154)

In [14]:
import numpy as np
with open("weight_definitions.h", "w") as header_file:
    for i, initializer in enumerate(onnx_model.graph.initializer):
        format_spec = f"{len(initializer.raw_data)//4}f"
#         with np.printoptions(floatmode='unique') as po:
#            float_data = np.array(struct.unpack_from(format_spec,initializer.raw_data))
            
        raw_def = f""" float {initializer.name.replace('.','_')}[] {{ 
        {np.array2string(float_data, separator=',',floatmode='unique', threshold=10000000,  )};
}};
        """
        header_file.write(raw_def)
        print(raw_def)
    

 float mu_0_weight[] { 
        [-0.030470559373497963, 0.006223668809980154];
};
        
 float mu_0_bias[] { 
        [-0.030470559373497963, 0.006223668809980154];
};
        
 float mu_2_weight[] { 
        [-0.030470559373497963, 0.006223668809980154];
};
        
 float mu_2_bias[] { 
        [-0.030470559373497963, 0.006223668809980154];
};
        
 float mu_4_weight[] { 
        [-0.030470559373497963, 0.006223668809980154];
};
        
 float mu_4_bias[] { 
        [-0.030470559373497963, 0.006223668809980154];
};
        


In [10]:
onnx_model.graph.node

[input: "obs"
output: "7"
name: "Cast_0"
op_type: "Cast"
attribute {
  name: "to"
  i: 1
  type: INT
}
, input: "7"
output: "8"
name: "Flatten_1"
op_type: "Flatten"
attribute {
  name: "axis"
  i: 1
  type: INT
}
, input: "8"
input: "mu.0.weight"
input: "mu.0.bias"
output: "9"
name: "Gemm_2"
op_type: "Gemm"
attribute {
  name: "alpha"
  f: 1.0
  type: FLOAT
}
attribute {
  name: "beta"
  f: 1.0
  type: FLOAT
}
attribute {
  name: "transB"
  i: 1
  type: INT
}
, input: "9"
output: "10"
name: "Relu_3"
op_type: "Relu"
, input: "10"
input: "mu.2.weight"
input: "mu.2.bias"
output: "11"
name: "Gemm_4"
op_type: "Gemm"
attribute {
  name: "alpha"
  f: 1.0
  type: FLOAT
}
attribute {
  name: "beta"
  f: 1.0
  type: FLOAT
}
attribute {
  name: "transB"
  i: 1
  type: INT
}
, input: "11"
output: "12"
name: "Relu_5"
op_type: "Relu"
, input: "12"
input: "mu.4.weight"
input: "mu.4.bias"
output: "13"
name: "Gemm_6"
op_type: "Gemm"
attribute {
  name: "alpha"
  f: 1.0
  type: FLOAT
}
attribute {
  nam

In [None]:
np.array2string()