In [106]:
from lerobot.envs.utils import _load_module_from_path, _call_make_env, _normalize_hub_result

# Load your module
module = _load_module_from_path("./inverted_pendulum_env.py")

# Test the make_env function
result = _call_make_env(module, n_envs=1, use_async_envs=False)
normalized = _normalize_hub_result(result)

# Verify it works
suite_name = next(iter(normalized))
env = normalized[suite_name][0]
obs, info = env.reset()
env.render_mode = "human"
action = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(action)



In [None]:
mjmodel = env.envs[0].unwrapped.model

import mujoco as mj
cart_id = mj.mj_name2id(mjmodel, mj.mjtObj.mjOBJ_BODY, "cart")
pole_id = mj.mj_name2id(mjmodel, mj.mjtObj.mjOBJ_BODY, "pole")

m_c = mjmodel.body_mass[cart_id]
m_p = mjmodel.body_mass[pole_id]
l = 1
g = 9.81

import numpy as np
def f_t(ddx,                   # Free variable: ddx
        x, dx, theta, dtheta,  # State: x, dx, tehta, dtheta
        m_c=m_c, m_p=m_p, l=l, g=g,   # Parameters: m_c, m_p, l, g
        min_f=-3, max_f=3) -> float:
    sin_theta = np.sin(theta)
    cos_theta = np.cos(theta)
    ddtheta =  (ddx*cos_theta + g*sin_theta) / l
    f_t = (
        (m_c + m_p) * ddx 
        - m_p * l * ddtheta * cos_theta 
        + m_p * l * dtheta**2 * sin_theta)
    f_t = np.clip(f_t, min_f, max_f)
    return f_t


def ddx(x, dx):
    if dx * x < 0:
        # If dx is in the right direction for returning to zero, keep ddx = 0
        ddx = 0
    elif abs(dx) > 0:
        # Otherwise, gradually reverse course
        ddx = -dx / 10.0
    elif abs(x) > 0:
        # If we are stationary but away from zero, accelerate back to zero
        ddx = -np.sign(x) * 0.1
    else:
        ddx = 0
    return ddx


def policy(obs) -> float:
    x, theta, dx, dtheta = obs
    ddx_val = ddx(x, dx)
    f_t_val = f_t(ddx_val, x, dx, theta, dtheta)
    return f_t_val

In [112]:
# Run this as many times as you'd like
action = -action
print(action)
obs, reward, terminated, truncated, info = env.step(action)
print(obs, reward, terminated, truncated, info)

x, theta, dx, dtheta = obs[0]

[[-1.434539]]
[[ 0.01028418  0.0535959  -0.39793246  0.9266252 ]] [1.] [False] [False] {'reward_survive': array([1]), '_reward_survive': array([ True])}


np.float64(0.010284175640354993)

In [102]:
env.close()