In [1]:
import numpy as np
import torch as T
import mlflow
import os

from unitree_robot.train.environments import Go2Env, MujocoEnv
from unitree_robot.train.training import Trainer
from unitree_robot.train.experiments import StandUpExperiment



MJCF_PATH = "./external/unitree_mj_models/go2/scene.xml"
SEED = 0
DEVICE = "cuda:0"
BODY_ANGLE_REWARD_SCALE = 1.0
BODY_HEIGHT_REWARD_SCALE = 1.0
ENERGY_REWARD_SCALE = 1.0
JOINT_LIMIT_REWARD_SCALE = 1.0
REWARD_SCALING = 1.0
ENTROPY_COST = 8e-3
DISCOUNTING = .97

SIM_FRAMES_PER_STEP = 5 # number of frames that are simulated between each 'decision step' by the network
TRAIN_EPOCHS = 5000
LEARNING_RATE = 3e-3

NETWORK_HIDDEN_SIZE = 16
NETWORK_LAYERS = 4
UNROLL_LENGTH = 128 # number of actions taken in the environment (inbetween these actions there are SIM_FRAMES_PER_STEP steps of simulation)
NUM_UNROLLS = 4 # number of full unrolls to collect training samples
MINIBATCH_SIZE = 32 # the sequence length that is trained on (UNROLL_LENGTH has to be divisible by this number)
TRAIN_BATCH_SIZE = 8 # the number of sequences that are used for a single training step (all batches are used in one epoch)

EXPERIMENT_NAME = "standing_up"

In [2]:
np.random.seed(seed=SEED)

experiment=StandUpExperiment(
    body_name = "base_link",
    body_angle_reward_scale = BODY_ANGLE_REWARD_SCALE,
    body_height_reward_scale = BODY_HEIGHT_REWARD_SCALE,
    energy_reward_scale = ENERGY_REWARD_SCALE,
    # joint_limit_reward_scale = JOINT_LIMIT_REWARD_SCALE
)

env = Go2Env(
    model_path = MJCF_PATH,
    sim_frames_per_step = SIM_FRAMES_PER_STEP
)


trainer = Trainer(
    env=env,
    experiment=experiment,
    device=DEVICE,
    network_hidden_size=NETWORK_HIDDEN_SIZE,
    network_layers=NETWORK_LAYERS,
    learning_rate=LEARNING_RATE,
    reward_scaling=REWARD_SCALING,
    entropy_cost=ENTROPY_COST,
    discounting=DISCOUNTING
)

Trainer: device set to gpu (cuda) !


In [3]:
if not mlflow.get_experiment_by_name(EXPERIMENT_NAME):
    mlflow.create_experiment(EXPERIMENT_NAME)
mlflow.set_experiment(EXPERIMENT_NAME)


with mlflow.start_run():
    trainer.train(
        epochs=TRAIN_EPOCHS,
        unroll_length=UNROLL_LENGTH,
        num_unrolls=NUM_UNROLLS,
        minibatch_size=MINIBATCH_SIZE,
        train_batch_size=TRAIN_BATCH_SIZE,
        seed=SEED
    )

training:   0%|                                                                                                                            | 0/5000 [00:04<?, ?it/s]


RuntimeError: The size of tensor a (3) must match the size of tensor b (24) at non-singleton dimension 2

---

---

# Visualization

In [1]:
env.model.camera("main").pos = np.array([1, 1, 3])

try:
    i = 0
    while True:
        
        action = env.action_space.sample()
        env.do_simulation(ctrl=action, n_frames=5)

        env.render(cam_offset=np.array([1,1,5]))

        # print(env.data.qfrc_actuator)
       
        # print(r.as_euler("xyz", degrees=True))
        print(calc_angle(env.data.body("base").xquat))
        
        i += 1
        
except Exception as e:
    raise e
finally:
    env.close()

NameError: name 'np' is not defined

In [9]:
from unitree_robot.train.environments import lookat


target = env.data.cam("main").xpos + np.array([0, 0, -1])
target

# lookat(env.data.cam("main").xpos, target)


<mujoco._structs.MjData at 0x2b5a63c05b0>

In [2]:
# env.action_space

# len(env.data.qpos)

# env.data.("imu_quat")


# q_rotate = quaternion.as_quat_array(env.data.sensor("imu_quat").data)
# quaternion.rotate_vectors(q_rotate, np.array([0,0,1]))
# (np.array([1, 0, 0]))




# env.get_sensor_state()

# env.data.xquat.shape