In [1]:
import gymnasium as gym

In [2]:
env = gym.make("BipedalWalker-v3", render_mode="human")

In [3]:
env.reset()
obs, reward, done, info, x = env.step(env.action_space.sample())

## Break down of values

- Obs
 - idx - 0 Hull Angle Speed
 - idx - 1 Angular Velocity
 - idx - 2 Horizontal Speed
 - idx - 3 Vertical Speed
 - idx - 4, 5 - joints idx 0
 - idx - 6, 7 - joints idx 1
 - idx - 8, 9 - joints idx 2
 - idx - 10, 11 - joints idx 3
 - idx - 12, 13 legs contact with ground
 - idx - 14 - 23 lidar readings

 - Lidar Readings
    The lidar readings are 10 values representing the distance to a surface in 10 different directions directly in front of the walker with the following angles, taking the point directly above the walk as 0 radians:
    [0.0, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05, 1.2, 1.35] radians

    And the values are values between 0-1 which determine the distance to the surface in that direction.

- Action
4 values representing the torque applied to each joint. The torque values are clipped to be between -1 and 1.

In [4]:
def obs_to_text(obs):
    starter = "Observation from last step: "
    hull_angle_speed = f"Hull angle: {obs[0]:.2f}"
    angular_velocity = f"Angular velocity: {obs[1]:.2f}"
    x_velocity = f"X velocity: {obs[2]:.2f}"
    y_velocity = f"Y velocity: {obs[3]:.2f}"
    back_revolute_joint_angle = f"Back revolute joint angle: {obs[4]:.2f}"
    back_revolute_joint_speed = f"Back revolute joint speed: {obs[5]:.2f}"
    back_lower_leg_angle = f"Back lower leg angle: {obs[6]:.2f}"
    back_lower_leg_speed = f"Back lower leg speed: {obs[7]:.2f}"
    back_leg_ground_contact_flag = f"Back leg ground contact flag: {obs[8]:.2f}"
    front_revolute_joint_angle = f"Front revolute joint angle: {obs[9]:.2f}"
    front_revolute_joint_speed = f"Front revolute joint speed: {obs[10]:.2f}"
    front_lower_leg_angle = f"Front lower leg angle: {obs[11]:.2f}"
    front_lower_leg_speed = f"Front lower leg speed: {obs[12]:.2f}"
    front_leg_ground_contact_flag = f"Front leg ground contact flag: {obs[13]:.2f}"
    
    # Lidars
    lidar_angles = [0.0, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05, 1.2, 1.35] # In radians starting from the top of the hull
    lidar_distances = obs[14:24]
    lidar_readings = [f"Lidar {i+1} ({angle:.2f} rad): {distance:.2f}" for i, (angle, distance) in enumerate(zip(lidar_angles, lidar_distances))]
    
    return "\n".join([starter, hull_angle_speed, angular_velocity, x_velocity, y_velocity, back_revolute_joint_angle, back_revolute_joint_speed, back_lower_leg_angle, back_lower_leg_speed, back_leg_ground_contact_flag, front_revolute_joint_angle, front_revolute_joint_speed, front_lower_leg_angle, front_lower_leg_speed, front_leg_ground_contact_flag] + lidar_readings)
    

## Work on getting observation with addition descriptions to help LLM understand the environment better

- If no increase in reward in {x} actions, then reset the environment and tell the LLM to try a different approach
    - This will help the LLM to not get stuck in position thinking it is a good thing
- If the walker falls, then reset the environment and tell the LLM to try a different approach
    - This will help the LLM to not keep repeating the same mistake
- If the walker is tilting over a certain direction based on the lidar readings, tell the LLM that it is follow x direction and try to correct it
- If little to no leg movement, tell the LLM to try to move the legs more
- If the walker is moving backwards or not moving at all, tell the LLM to try to move forward
- If the walker is moving forward tell it that it is moving forward and to keep doing so

array([ 0.01213455,  0.03359081, -0.0020611 , -0.01071129,  0.4334355 ,
        0.36703855,  0.07950807, -1.0001649 ,  1.        ,  0.33565775,
        0.2973047 ,  0.08417094, -0.70090467,  1.        ,  0.44704577,
        0.45212266,  0.46794587,  0.49647093,  0.5416534 ,  0.610978  ,
        0.7191741 ,  0.89845616,  1.        ,  1.        ], dtype=float32)

In [7]:
obs_to_text(obs)

'Observation from last step: \nHull angle: 0.01\nAngular velocity: 0.03\nX velocity: -0.00\nY velocity: -0.01\nBack revolute joint angle: 0.43\nBack revolute joint speed: 0.37\nBack lower leg angle: 0.08\nBack lower leg speed: -1.00\nBack leg ground contact flag: 1.00\nFront revolute joint angle: 0.34\nFront revolute joint speed: 0.30\nFront lower leg angle: 0.08\nFront lower leg speed: -0.70\nFront leg ground contact flag: 1.00\nLidar 1 (0.00 rad): 0.45\nLidar 2 (0.15 rad): 0.45\nLidar 3 (0.30 rad): 0.47\nLidar 4 (0.45 rad): 0.50\nLidar 5 (0.60 rad): 0.54\nLidar 6 (0.75 rad): 0.61\nLidar 7 (0.90 rad): 0.72\nLidar 8 (1.05 rad): 0.90\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00'

In [8]:
env.close()

In [None]:
def reward_to_text(reward):
    return f"The reward from the last step was: {reward:.2f}"

In [None]:
def text_to_action(text):
    """
        Given an output by the LLM in the form:
            Move Back revolute joint {value}, Back lower leg {value}, Front revolute joint {value}, Front lower leg {value}
        This function will return the corresponding action values for the environment 
    """
    action = [0, 0, 0, 0]
    split_text = text.split(", ")
    for i, action_value in enumerate(split_text):
        action[i] = float(action_value.split(" ")[-1])
    return action