In [4]:
import gymnasium as gym
import numpy as np
import pandas as pd

# Get prompts

In [50]:
def obs_to_text(obs):
    starter = "Observation from last step: "
    hull_angle_speed = f"Hull angle: {obs[0]:.2f}"
    angular_velocity = f"Angular velocity: {obs[1]:.2f}"
    x_velocity = f"X velocity: {obs[2]:.2f}"
    y_velocity = f"Y velocity: {obs[3]:.2f}"
    back_revolute_joint_angle = f"Back revolute joint angle: {obs[4]:.2f}"
    back_revolute_joint_speed = f"Back revolute joint speed: {obs[5]:.2f}"
    back_lower_leg_angle = f"Back lower leg angle: {obs[6]:.2f}"
    back_lower_leg_speed = f"Back lower leg speed: {obs[7]:.2f}"
    back_leg_ground_contact_flag = f"Back leg ground contact flag: {obs[8]:.2f}"
    front_revolute_joint_angle = f"Front revolute joint angle: {obs[9]:.2f}"
    front_revolute_joint_speed = f"Front revolute joint speed: {obs[10]:.2f}"
    front_lower_leg_angle = f"Front lower leg angle: {obs[11]:.2f}"
    front_lower_leg_speed = f"Front lower leg speed: {obs[12]:.2f}"
    front_leg_ground_contact_flag = f"Front leg ground contact flag: {obs[13]:.2f}"
    
    # Lidars
    lidar_angles = [0.0, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05, 1.2, 1.35] # In radians starting from the top of the hull
    lidar_distances = obs[14:24]
    lidar_readings = [f"Lidar {i+1} ({angle:.2f} rad): {distance:.2f}" for i, (angle, distance) in enumerate(zip(lidar_angles, lidar_distances))]
    
    return "\n".join([starter, hull_angle_speed, angular_velocity, x_velocity, y_velocity, back_revolute_joint_angle, back_revolute_joint_speed, back_lower_leg_angle, back_lower_leg_speed, back_leg_ground_contact_flag, front_revolute_joint_angle, front_revolute_joint_speed, front_lower_leg_angle, front_lower_leg_speed, front_leg_ground_contact_flag] + lidar_readings)

def reward_to_text(reward):
    return f"The reward from the last step was: {reward:.2f}"

def text_to_action(text):
    """
        Given an output by the LLM in the form:
            Move Back revolute joint {value}, Back lower leg {value}, Front revolute joint {value}, Front lower leg {value}
        This function will return the corresponding action values for the environment 
    """
    action = [0, 0, 0, 0]
    split_text = text.strip().split(", ")
    for i, action_value in enumerate(split_text):
        action[i] = float(action_value.split(" ")[-1])
    return action
    

In [6]:
primer = """
Imagine you are an expert at controlling a bipedal machine, and you have been given the task of controlling a bipedal machine in a reinforcement learning environment. The challenge is to walk a bipedal machine as far as possible without falling over. The machine has a hull, two legs, a back and front revolute joint, and lower leg. The machine has 10 lidar sensors that can detect the distance to objects in front of it. 

You will be given scenarios in the form of observations from the environment and the reward based on the last action and the Current Total Score. Your goal is to provide the next action in the following format:

'Move Back revolute joint `value`, Back lower leg 'value', Front revolute joint 'value', Front lower leg 'value''

The values must be in the range [-1, 1], and your goal is to maximize the reward and move the machine forward effectively. Only provide the action and no additional explanation.

Example
'Observation from last step: \nHull angle: -0.02\nAngular velocity: -0.03\nX velocity: -0.03\nY velocity: -0.01\nBack revolute joint angle: 0.48\nBack revolute joint speed: 1.00\nBack lower leg angle: 0.07\nBack lower leg speed: -1.00\nBack leg ground contact flag: 1.00\nFront revolute joint angle: 0.38\nFront revolute joint speed: 1.00\nFront lower leg angle: 0.08\nFront lower leg speed: -1.00\nFront leg ground contact flag: 1.00\nLidar 1 (0.00 rad): 0.45\nLidar 2 (0.15 rad): 0.45\nLidar 3 (0.30 rad): 0.47\nLidar 4 (0.45 rad): 0.50\nLidar 5 (0.60 rad): 0.54\nLidar 6 (0.75 rad): 0.61\nLidar 7 (0.90 rad): 0.72\nLidar 8 (1.05 rad): 0.90\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00'
'The reward from the last step was: -0.25'
Score: -0.25

Action: Move Back revolute joint 0.5, Back lower leg -0.3, Front revolute joint 0.2, Front lower leg 0.7
"""

obs_skeleton = """
_
{obs_step}
{reward_step}
score: {score_step}

Action: 
"""

obs_skeleton_reset = """
Now, given the following observation, rewards and score give me the next action for the following scenario:
_
{obs_step}
reward: None as it is the first step
score: 0

Action:
"""

## Training testing

In [7]:
import openai
import os
import time

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
STEPS = 500
OBS_TEXT_LENGTH = 800 # Approx
PRIMER_STARTER_LENGTH = 2000 # Approx
NUMBER_OF_OBS_MEMORY = 3 
save_dir = "runs/bipedal_walker/"
NUMBER_ACTIONS = 5

In [8]:
# Init environment
env = gym.make("BipedalWalker-v3", render_mode="human")
obs, _ = env.reset()

# Init OpenAI
client = openai.OpenAI()
def get_action(primer, client):
    print("Getting action from LLM")
    response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "user", "content": primer}
    ],
    response_format={
      "type": "text"
    },
    temperature=0.2,
    max_tokens=200,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0
    )

    action_text = response.choices[0].message.content
    return action_text

def save_obs(obs, reward, score, step, store={}):
    obs_text = obs_to_text(obs)
    reward_text = reward_to_text(reward)
    score_text = f"Score: {score}"
    obs_skeleton_filled = obs_skeleton.format(obs_step=obs_text, reward_step=reward_text, score_step=score_text)
    inner = {
        "obs": obs,
        "obs_text": obs_text,
        "reward": reward,
        "reward_text": reward_text,
        "score": score,
        "skeleton": obs_skeleton_filled
    }
    store[step] = inner


In [None]:
score, reward, done, store = 0, 0, False, {}
obs, _ = env.reset()

# Make first primer
obs_text = obs_to_text(obs)
primer += obs_skeleton_reset.format(obs_step=obs_text)

for i in range(STEPS):
    # sleep to not exceed the rate limit
    time.sleep(5)

    # Get action from gpt
    action_text = get_action(primer, client)
    action = text_to_action(action_text)

    save_obs(obs, reward, score, i, store)

    # Perform action
    obs, reward, done, _, _ = env.step(action)
    if done:
        print("The bipedal machine has fallen over/took too long. The total score was: ", score)
        break

    # Update score
    score += reward

    # Generate new primer
    obs_text = obs_to_text(obs)
    reward_text = reward_to_text(reward)
    obs_step = obs_skeleton.format(obs_step=obs_text, reward_step=reward_text, score_step=score)
    primer += action_text + "\n" + obs_step

    # Check length of primer
    if len(primer) > PRIMER_STARTER_LENGTH + OBS_TEXT_LENGTH*NUMBER_OF_OBS_MEMORY or len(primer) > 4096:
        # split at "_" get the primer front and the continuation
        primer_splits = primer.split("_")
        primer = primer_splits[0] + "_" + "".join(primer_splits[2:]) # Remove the first observation and reward from memory

# Save the store as df
df = pd.DataFrame(store).T

## Testing Increase number of actions per call
- Prompt gpt to give more actions per call
- new reward function

In [84]:
def rewards_to_text(rewards):
    # Gets a list of rewards and returns a string with the rewards in the form "[reward1, reward2, reward3]"
    return "[" + ", ".join([str(reward) for reward in rewards]) + "]"

In [91]:
primer = """
Imagine you are an expert at controlling a bipedal machine, and you have been given the task of controlling a bipedal machine in a reinforcement learning environment. The challenge is to walk a bipedal machine as far as possible without falling over. The machine has a hull, two legs, a back and front revolute joint, and lower leg. The machine has 10 lidar sensors that can detect the distance to objects in front of it. 

You will be given scenarios in the form of observations from the environment and the reward based on the last action and the Current Total Score. Your goal is to provide the next action in the following format:

'Move Back revolute joint `value`, Back lower leg 'value', Front revolute joint 'value', Front lower leg 'value''

The values must be in the range [-1, 1], and your goal is to maximize the reward and move the machine forward effectively. Only provide the action and no additional explanation.

Example
'Observation from last step: \nHull angle: -0.02\nAngular velocity: -0.03\nX velocity: -0.03\nY velocity: -0.01\nBack revolute joint angle: 0.48\nBack revolute joint speed: 1.00\nBack lower leg angle: 0.07\nBack lower leg speed: -1.00\nBack leg ground contact flag: 1.00\nFront revolute joint angle: 0.38\nFront revolute joint speed: 1.00\nFront lower leg angle: 0.08\nFront lower leg speed: -1.00\nFront leg ground contact flag: 1.00\nLidar 1 (0.00 rad): 0.45\nLidar 2 (0.15 rad): 0.45\nLidar 3 (0.30 rad): 0.47\nLidar 4 (0.45 rad): 0.50\nLidar 5 (0.60 rad): 0.54\nLidar 6 (0.75 rad): 0.61\nLidar 7 (0.90 rad): 0.72\nLidar 8 (1.05 rad): 0.90\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00'
rewards: [-0.25, 0.5, -0.75, 0.25, 0.1]
Score: -0.15

Actions
Move Back revolute joint 0.5, Back lower leg -0.3, Front revolute joint 0.2, Front lower leg 0.7
Move Back revolute joint 0.1, Back lower leg -0.9, Front revolute joint -0.2, Front lower leg 0.3
Move Back revolute joint -0.9, Back lower leg 0.8, Front revolute joint -0.1, Front lower leg -0.2
Move Back revolute joint 0.9, Back lower leg -0.1, Front revolute joint 0.6, Front lower leg 0.4
Move Back revolute joint -0.1, Back lower leg -0.9, Front revolute joint 0.1, Front lower leg 0.9
"""

obs_skeleton = """
_
{obs_step}
rewards: {reward_step}
score: {score_step}

Actions 
"""

obs_skeleton_reset = """
Now, given the following observation, rewards and score give me the optimal best {no_actions} action for the following scenario:
_
{obs_step}
reward: None as it is the first step
score: 0

Actions
"""

In [92]:
import openai
import os
import time

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
STEPS = 100
OBS_TEXT_LENGTH = 1200 # Approx
PRIMER_STARTER_LENGTH = 2400 # Approx
NUMBER_OF_OBS_MEMORY = 4
save_dir = "runs/bipedal_walker/"
NUMBER_ACTIONS = 5 # Prompt also has to be updated

In [93]:
# Init environment
env = gym.make("BipedalWalker-v3", render_mode="human")
obs, _ = env.reset()

# Init OpenAI
client = openai.OpenAI()
def get_action(primer, client):
    print("Getting action from LLM")
    response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "user", "content": primer}
    ],
    response_format={
      "type": "text"
    },
    temperature=0.2,
    max_tokens=200,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0
    )

    action_text = response.choices[0].message.content
    return action_text

def save_obs(obs, reward, score, step, actions_text, actions, store={}):
    obs_text = obs_to_text(obs)
    reward_text = reward_to_text(reward)
    score_text = f"Score: {score}"
    obs_skeleton_filled = obs_skeleton.format(obs_step=obs_text, reward_step=reward_text, score_step=score_text)
    inner = {
        "obs": obs,
        "obs_text": obs_text,
        "reward": reward,
        "reward_text": reward_text,
        "score": score,
        "skeleton": obs_skeleton_filled,
        "actions_text": actions_text,
        "actions": actions
    }
    store[step] = inner


In [94]:
score, rewards, done, store = 0, [], False, {}
action_text_store, action_store = [], []
obs, _ = env.reset()

# Make first primer
obs_text = obs_to_text(obs)
primer += obs_skeleton_reset.format(obs_step=obs_text, no_actions=NUMBER_ACTIONS)

for i in range(STEPS):
    # sleep to not exceed the rate limit
    time.sleep(20)

    # Get action from gpt
    response = get_action(primer, client)
    action_texts = response.split("\n")
    for action_text in action_texts:
        # Get each of the actions given by the LLM
        action = text_to_action(action_text)
        print(action)
        # perform action
        obs, reward, done, _, _ = env.step(action)

        # Update score
        score += reward
        
        # save the reward
        rewards.append(reward)
        # save the action
        action_text_store.append(action_text)
        action_store.append(action)

        # Add the action to the primer
        primer += action_text + "\n"

        if done:
            print("The bipedal machine has fallen over/took too long. The total score was: ", score)
            break

    save_obs(obs, reward, score, i, action_text_store, action_store, store)

    # Generate new primer
    obs_text = obs_to_text(obs)
    reward_text = rewards_to_text(rewards)
    obs_step = obs_skeleton.format(obs_step=obs_text, reward_step=reward_text, score_step=score)
    primer += obs_step # actions are already added

    # reset rewards
    rewards = []
    action_text_store = []
    action_store = []

    # Check length of primer
    if len(primer) > PRIMER_STARTER_LENGTH + OBS_TEXT_LENGTH*NUMBER_OF_OBS_MEMORY:
        # split at "_" get the primer front and the continuation
        primer_splits = primer.split("_")
        primer = primer_splits[0] + "_" + "".join(primer_splits[2:]) # Remove the first observation and reward from memory

# Save the store as df
df = pd.DataFrame(store).T

Getting action from LLM
[0.1, -0.5, 0.1, 0.5]
[0.2, -0.4, 0.2, 0.4]
[0.0, -0.6, 0.0, 0.6]
[0.3, -0.3, 0.3, 0.3]
[0.1, -0.7, 0.1, 0.7]
Getting action from LLM
[-0.2, 0.5, 0.1, -0.3]
[-0.3, 0.4, 0.2, -0.2]
[-0.1, 0.6, 0.0, -0.4]
[-0.4, 0.3, 0.3, -0.1]
[-0.2, 0.7, 0.1, -0.5]
Getting action from LLM
[-0.3, 0.5, 0.2, -0.3]
[-0.4, 0.4, 0.3, -0.2]
[-0.2, 0.6, 0.1, -0.4]
[-0.5, 0.3, 0.4, -0.1]
[-0.3, 0.7, 0.2, -0.5]
Getting action from LLM
[-0.4, 0.5, 0.3, -0.3]
[-0.5, 0.4, 0.4, -0.2]
[-0.3, 0.6, 0.2, -0.4]
[-0.6, 0.3, 0.5, -0.1]
[-0.4, 0.7, 0.3, -0.5]
Getting action from LLM
[-0.5, 0.5, 0.4, -0.3]
[-0.6, 0.4, 0.5, -0.2]
[-0.4, 0.6, 0.3, -0.4]
[-0.7, 0.3, 0.6, -0.1]
[-0.5, 0.7, 0.4, -0.5]
Getting action from LLM
[0.3, -0.5, -0.2, 0.4]
[-0.4, 0.6, 0.1, -0.3]
[0.2, -0.7, 0.5, 0.2]
[-0.6, 0.4, -0.3, 0.6]
[0.1, -0.2, 0.3, -0.4]
Getting action from LLM
[0.2, -0.6, 0.4, 0.3]
[-0.5, 0.5, 0.2, -0.3]
[0.3, -0.4, 0.1, 0.5]
[-0.4, 0.3, -0.2, 0.4]
[0.1, -0.5, 0.3, -0.2]
Getting action from LLM
[0.1, -0.6,

RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-i2B7mZhvYpqPGnG5AQdKrrdd on requests per day (RPD): Limit 200, Used 200, Requested 1. Please try again in 7m12s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}

In [97]:
# Save the store as df
df = pd.DataFrame(store).T

In [99]:
df.to_csv(save_dir + "run_2.csv")

In [100]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
df

Unnamed: 0,obs,obs_text,reward,reward_text,score,skeleton,actions_text,actions
0,"[-0.064194255, -0.042562712, -0.0331809, -0.0065205186, 0.62954736, 1.000009, -0.13839555, -0.99994755, 0.0, 0.06496592, 0.3278666, 1.1970745, -0.00020940353, 0.0, 0.4696442, 0.4749777, 0.4916008, 0.5215678, 0.5690343, 0.6418633, 0.75552875, 0.94387364, 1.0, 1.0]",Observation from last step: \nHull angle: -0.06\nAngular velocity: -0.04\nX velocity: -0.03\nY velocity: -0.01\nBack revolute joint angle: 0.63\nBack revolute joint speed: 1.00\nBack lower leg angle: -0.14\nBack lower leg speed: -1.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 0.06\nFront revolute joint speed: 0.33\nFront lower leg angle: 1.20\nFront lower leg speed: -0.00\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.47\nLidar 2 (0.15 rad): 0.47\nLidar 3 (0.30 rad): 0.49\nLidar 4 (0.45 rad): 0.52\nLidar 5 (0.60 rad): 0.57\nLidar 6 (0.75 rad): 0.64\nLidar 7 (0.90 rad): 0.76\nLidar 8 (1.05 rad): 0.94\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00,-0.176249,The reward from the last step was: -0.18,-0.60187,\n_\nObservation from last step: \nHull angle: -0.06\nAngular velocity: -0.04\nX velocity: -0.03\nY velocity: -0.01\nBack revolute joint angle: 0.63\nBack revolute joint speed: 1.00\nBack lower leg angle: -0.14\nBack lower leg speed: -1.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 0.06\nFront revolute joint speed: 0.33\nFront lower leg angle: 1.20\nFront lower leg speed: -0.00\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.47\nLidar 2 (0.15 rad): 0.47\nLidar 3 (0.30 rad): 0.49\nLidar 4 (0.45 rad): 0.52\nLidar 5 (0.60 rad): 0.57\nLidar 6 (0.75 rad): 0.64\nLidar 7 (0.90 rad): 0.76\nLidar 8 (1.05 rad): 0.94\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00\nrewards: The reward from the last step was: -0.18\nscore: Score: -0.6018701003499329\n\nActions \n,"[Move Back revolute joint 0.1, Back lower leg -0.5, Front revolute joint 0.1, Front lower leg 0.5, Move Back revolute joint 0.2, Back lower leg -0.4, Front revolute joint 0.2, Front lower leg 0.4, Move Back revolute joint 0.0, Back lower leg -0.6, Front revolute joint 0.0, Front lower leg 0.6, Move Back revolute joint 0.3, Back lower leg -0.3, Front revolute joint 0.3, Front lower leg 0.3, Move Back revolute joint 0.1, Back lower leg -0.7, Front revolute joint 0.1, Front lower leg 0.7]","[[0.1, -0.5, 0.1, 0.5], [0.2, -0.4, 0.2, 0.4], [0.0, -0.6, 0.0, 0.6], [0.3, -0.3, 0.3, 0.3], [0.1, -0.7, 0.1, 0.7]]"
1,"[-0.124767184, -0.012801033, -0.0215311, -0.04694961, 0.47704247, -0.7213334, 0.49628854, 1.0, 0.0, 0.40216166, 1.0, 0.3890819, -1.0000001, 0.0, 0.46830943, 0.47362778, 0.49020365, 0.52008545, 0.567417, 0.640039, 0.7533815, 0.9411911, 1.0, 1.0]",Observation from last step: \nHull angle: -0.12\nAngular velocity: -0.01\nX velocity: -0.02\nY velocity: -0.05\nBack revolute joint angle: 0.48\nBack revolute joint speed: -0.72\nBack lower leg angle: 0.50\nBack lower leg speed: 1.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 0.40\nFront revolute joint speed: 1.00\nFront lower leg angle: 0.39\nFront lower leg speed: -1.00\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.47\nLidar 2 (0.15 rad): 0.47\nLidar 3 (0.30 rad): 0.49\nLidar 4 (0.45 rad): 0.52\nLidar 5 (0.60 rad): 0.57\nLidar 6 (0.75 rad): 0.64\nLidar 7 (0.90 rad): 0.75\nLidar 8 (1.05 rad): 0.94\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00,-0.089103,The reward from the last step was: -0.09,-1.176188,\n_\nObservation from last step: \nHull angle: -0.12\nAngular velocity: -0.01\nX velocity: -0.02\nY velocity: -0.05\nBack revolute joint angle: 0.48\nBack revolute joint speed: -0.72\nBack lower leg angle: 0.50\nBack lower leg speed: 1.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 0.40\nFront revolute joint speed: 1.00\nFront lower leg angle: 0.39\nFront lower leg speed: -1.00\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.47\nLidar 2 (0.15 rad): 0.47\nLidar 3 (0.30 rad): 0.49\nLidar 4 (0.45 rad): 0.52\nLidar 5 (0.60 rad): 0.57\nLidar 6 (0.75 rad): 0.64\nLidar 7 (0.90 rad): 0.75\nLidar 8 (1.05 rad): 0.94\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00\nrewards: The reward from the last step was: -0.09\nscore: Score: -1.176187736931197\n\nActions \n,"[Move Back revolute joint -0.2, Back lower leg 0.5, Front revolute joint 0.1, Front lower leg -0.3, Move Back revolute joint -0.3, Back lower leg 0.4, Front revolute joint 0.2, Front lower leg -0.2, Move Back revolute joint -0.1, Back lower leg 0.6, Front revolute joint 0.0, Front lower leg -0.4, Move Back revolute joint -0.4, Back lower leg 0.3, Front revolute joint 0.3, Front lower leg -0.1, Move Back revolute joint -0.2, Back lower leg 0.7, Front revolute joint 0.1, Front lower leg -0.5]","[[-0.2, 0.5, 0.1, -0.3], [-0.3, 0.4, 0.2, -0.2], [-0.1, 0.6, 0.0, -0.4], [-0.4, 0.3, 0.3, -0.1], [-0.2, 0.7, 0.1, -0.5]]"
2,"[-0.12063294, 0.010620165, -0.030922318, 0.050125204, 0.23806335, -1.0000081, 0.9230446, 0.9999995, 0.0, 0.7958852, 1.0000026, -0.18491113, -0.99999666, 0.0, 0.48036873, 0.48582405, 0.50282675, 0.533478, 0.5820284, 0.6565205, 0.7727816, 0.96542746, 1.0, 1.0]",Observation from last step: \nHull angle: -0.12\nAngular velocity: 0.01\nX velocity: -0.03\nY velocity: 0.05\nBack revolute joint angle: 0.24\nBack revolute joint speed: -1.00\nBack lower leg angle: 0.92\nBack lower leg speed: 1.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 0.80\nFront revolute joint speed: 1.00\nFront lower leg angle: -0.18\nFront lower leg speed: -1.00\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.48\nLidar 2 (0.15 rad): 0.49\nLidar 3 (0.30 rad): 0.50\nLidar 4 (0.45 rad): 0.53\nLidar 5 (0.60 rad): 0.58\nLidar 6 (0.75 rad): 0.66\nLidar 7 (0.90 rad): 0.77\nLidar 8 (1.05 rad): 0.97\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00,-0.042615,The reward from the last step was: -0.04,-1.475389,\n_\nObservation from last step: \nHull angle: -0.12\nAngular velocity: 0.01\nX velocity: -0.03\nY velocity: 0.05\nBack revolute joint angle: 0.24\nBack revolute joint speed: -1.00\nBack lower leg angle: 0.92\nBack lower leg speed: 1.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 0.80\nFront revolute joint speed: 1.00\nFront lower leg angle: -0.18\nFront lower leg speed: -1.00\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.48\nLidar 2 (0.15 rad): 0.49\nLidar 3 (0.30 rad): 0.50\nLidar 4 (0.45 rad): 0.53\nLidar 5 (0.60 rad): 0.58\nLidar 6 (0.75 rad): 0.66\nLidar 7 (0.90 rad): 0.77\nLidar 8 (1.05 rad): 0.97\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00\nrewards: The reward from the last step was: -0.04\nscore: Score: -1.4753886198150625\n\nActions \n,"[Move Back revolute joint -0.3, Back lower leg 0.5, Front revolute joint 0.2, Front lower leg -0.3 , Move Back revolute joint -0.4, Back lower leg 0.4, Front revolute joint 0.3, Front lower leg -0.2 , Move Back revolute joint -0.2, Back lower leg 0.6, Front revolute joint 0.1, Front lower leg -0.4 , Move Back revolute joint -0.5, Back lower leg 0.3, Front revolute joint 0.4, Front lower leg -0.1 , Move Back revolute joint -0.3, Back lower leg 0.7, Front revolute joint 0.2, Front lower leg -0.5 ]","[[-0.3, 0.5, 0.2, -0.3], [-0.4, 0.4, 0.3, -0.2], [-0.2, 0.6, 0.1, -0.4], [-0.5, 0.3, 0.4, -0.1], [-0.3, 0.7, 0.2, -0.5]]"
3,"[-0.041420367, 0.047924638, -0.0139922695, -0.026710093, -0.14816827, -1.0000006, 0.9274341, 0.0, 0.0, 1.1132249, 0.0, -0.63458765, 1.5894572e-07, 0.0, 0.4793004, 0.4847436, 0.50170845, 0.5322916, 0.580734, 0.6550604, 0.77106297, 0.9632804, 1.0, 1.0]",Observation from last step: \nHull angle: -0.04\nAngular velocity: 0.05\nX velocity: -0.01\nY velocity: -0.03\nBack revolute joint angle: -0.15\nBack revolute joint speed: -1.00\nBack lower leg angle: 0.93\nBack lower leg speed: 0.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 1.11\nFront revolute joint speed: 0.00\nFront lower leg angle: -0.63\nFront lower leg speed: 0.00\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.48\nLidar 2 (0.15 rad): 0.48\nLidar 3 (0.30 rad): 0.50\nLidar 4 (0.45 rad): 0.53\nLidar 5 (0.60 rad): 0.58\nLidar 6 (0.75 rad): 0.66\nLidar 7 (0.90 rad): 0.77\nLidar 8 (1.05 rad): 0.96\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00,0.055802,The reward from the last step was: 0.06,-1.373861,\n_\nObservation from last step: \nHull angle: -0.04\nAngular velocity: 0.05\nX velocity: -0.01\nY velocity: -0.03\nBack revolute joint angle: -0.15\nBack revolute joint speed: -1.00\nBack lower leg angle: 0.93\nBack lower leg speed: 0.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 1.11\nFront revolute joint speed: 0.00\nFront lower leg angle: -0.63\nFront lower leg speed: 0.00\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.48\nLidar 2 (0.15 rad): 0.48\nLidar 3 (0.30 rad): 0.50\nLidar 4 (0.45 rad): 0.53\nLidar 5 (0.60 rad): 0.58\nLidar 6 (0.75 rad): 0.66\nLidar 7 (0.90 rad): 0.77\nLidar 8 (1.05 rad): 0.96\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00\nrewards: The reward from the last step was: 0.06\nscore: Score: -1.3738606105414553\n\nActions \n,"[Move Back revolute joint -0.4, Back lower leg 0.5, Front revolute joint 0.3, Front lower leg -0.3 , Move Back revolute joint -0.5, Back lower leg 0.4, Front revolute joint 0.4, Front lower leg -0.2 , Move Back revolute joint -0.3, Back lower leg 0.6, Front revolute joint 0.2, Front lower leg -0.4 , Move Back revolute joint -0.6, Back lower leg 0.3, Front revolute joint 0.5, Front lower leg -0.1 , Move Back revolute joint -0.4, Back lower leg 0.7, Front revolute joint 0.3, Front lower leg -0.5 ]","[[-0.4, 0.5, 0.3, -0.3], [-0.5, 0.4, 0.4, -0.2], [-0.3, 0.6, 0.2, -0.4], [-0.6, 0.3, 0.5, -0.1], [-0.4, 0.7, 0.3, -0.5]]"
4,"[0.07836275, 0.047990344, -0.015861316, -0.113013856, -0.5480418, -1.0000005, 0.9295216, 0.0, 0.0, 1.1131788, 0.0, -0.63247657, 3.973643e-08, 0.0, 0.46122152, 0.4664594, 0.48278436, 0.51221395, 0.5588291, 0.63035196, 0.74197894, 0.92694604, 1.0, 1.0]",Observation from last step: \nHull angle: 0.08\nAngular velocity: 0.05\nX velocity: -0.02\nY velocity: -0.11\nBack revolute joint angle: -0.55\nBack revolute joint speed: -1.00\nBack lower leg angle: 0.93\nBack lower leg speed: 0.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 1.11\nFront revolute joint speed: 0.00\nFront lower leg angle: -0.63\nFront lower leg speed: 0.00\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.46\nLidar 2 (0.15 rad): 0.47\nLidar 3 (0.30 rad): 0.48\nLidar 4 (0.45 rad): 0.51\nLidar 5 (0.60 rad): 0.56\nLidar 6 (0.75 rad): 0.63\nLidar 7 (0.90 rad): 0.74\nLidar 8 (1.05 rad): 0.93\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00,-0.190926,The reward from the last step was: -0.19,-1.865749,\n_\nObservation from last step: \nHull angle: 0.08\nAngular velocity: 0.05\nX velocity: -0.02\nY velocity: -0.11\nBack revolute joint angle: -0.55\nBack revolute joint speed: -1.00\nBack lower leg angle: 0.93\nBack lower leg speed: 0.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 1.11\nFront revolute joint speed: 0.00\nFront lower leg angle: -0.63\nFront lower leg speed: 0.00\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.46\nLidar 2 (0.15 rad): 0.47\nLidar 3 (0.30 rad): 0.48\nLidar 4 (0.45 rad): 0.51\nLidar 5 (0.60 rad): 0.56\nLidar 6 (0.75 rad): 0.63\nLidar 7 (0.90 rad): 0.74\nLidar 8 (1.05 rad): 0.93\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00\nrewards: The reward from the last step was: -0.19\nscore: Score: -1.8657486045348162\n\nActions \n,"[Move Back revolute joint -0.5, Back lower leg 0.5, Front revolute joint 0.4, Front lower leg -0.3 , Move Back revolute joint -0.6, Back lower leg 0.4, Front revolute joint 0.5, Front lower leg -0.2 , Move Back revolute joint -0.4, Back lower leg 0.6, Front revolute joint 0.3, Front lower leg -0.4 , Move Back revolute joint -0.7, Back lower leg 0.3, Front revolute joint 0.6, Front lower leg -0.1 , Move Back revolute joint -0.5, Back lower leg 0.7, Front revolute joint 0.4, Front lower leg -0.5 ]","[[-0.5, 0.5, 0.4, -0.3], [-0.6, 0.4, 0.5, -0.2], [-0.4, 0.6, 0.3, -0.4], [-0.7, 0.3, 0.6, -0.1], [-0.5, 0.7, 0.4, -0.5]]"
5,"[0.14224765, 0.031404667, -0.027008932, -0.1874416, -0.72638947, -0.09985064, 0.81732565, -0.9999998, 0.0, 1.1140598, 0.0, -0.37086177, -0.17273533, 0.0, 0.42528698, 0.43011674, 0.44516984, 0.4723065, 0.5152898, 0.58124024, 0.6841701, 0.85472614, 1.0, 1.0]",Observation from last step: \nHull angle: 0.14\nAngular velocity: 0.03\nX velocity: -0.03\nY velocity: -0.19\nBack revolute joint angle: -0.73\nBack revolute joint speed: -0.10\nBack lower leg angle: 0.82\nBack lower leg speed: -1.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 1.11\nFront revolute joint speed: 0.00\nFront lower leg angle: -0.37\nFront lower leg speed: -0.17\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.43\nLidar 2 (0.15 rad): 0.43\nLidar 3 (0.30 rad): 0.45\nLidar 4 (0.45 rad): 0.47\nLidar 5 (0.60 rad): 0.52\nLidar 6 (0.75 rad): 0.58\nLidar 7 (0.90 rad): 0.68\nLidar 8 (1.05 rad): 0.85\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00,-0.126566,The reward from the last step was: -0.13,-2.502554,\n_\nObservation from last step: \nHull angle: 0.14\nAngular velocity: 0.03\nX velocity: -0.03\nY velocity: -0.19\nBack revolute joint angle: -0.73\nBack revolute joint speed: -0.10\nBack lower leg angle: 0.82\nBack lower leg speed: -1.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 1.11\nFront revolute joint speed: 0.00\nFront lower leg angle: -0.37\nFront lower leg speed: -0.17\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.43\nLidar 2 (0.15 rad): 0.43\nLidar 3 (0.30 rad): 0.45\nLidar 4 (0.45 rad): 0.47\nLidar 5 (0.60 rad): 0.52\nLidar 6 (0.75 rad): 0.58\nLidar 7 (0.90 rad): 0.68\nLidar 8 (1.05 rad): 0.85\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00\nrewards: The reward from the last step was: -0.13\nscore: Score: -2.502553942226123\n\nActions \n,"[Move Back revolute joint 0.3, Back lower leg -0.5, Front revolute joint -0.2, Front lower leg 0.4, Move Back revolute joint -0.4, Back lower leg 0.6, Front revolute joint 0.1, Front lower leg -0.3, Move Back revolute joint 0.2, Back lower leg -0.7, Front revolute joint 0.5, Front lower leg 0.2, Move Back revolute joint -0.6, Back lower leg 0.4, Front revolute joint -0.3, Front lower leg 0.6, Move Back revolute joint 0.1, Back lower leg -0.2, Front revolute joint 0.3, Front lower leg -0.4]","[[0.3, -0.5, -0.2, 0.4], [-0.4, 0.6, 0.1, -0.3], [0.2, -0.7, 0.5, 0.2], [-0.6, 0.4, -0.3, 0.6], [0.1, -0.2, 0.3, -0.4]]"
6,"[0.19576944, 0.019558359, -0.031223724, -0.2702174, -0.8349067, 0.043137588, 0.66203934, -1.0, 0.0, 1.0686994, 0.22547579, -0.083728194, -0.2437887, 0.0, 0.37111253, 0.37532708, 0.38846263, 0.41214254, 0.44965044, 0.50719994, 0.5970183, 0.74584836, 1.0, 1.0]",Observation from last step: \nHull angle: 0.20\nAngular velocity: 0.02\nX velocity: -0.03\nY velocity: -0.27\nBack revolute joint angle: -0.83\nBack revolute joint speed: 0.04\nBack lower leg angle: 0.66\nBack lower leg speed: -1.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 1.07\nFront revolute joint speed: 0.23\nFront lower leg angle: -0.08\nFront lower leg speed: -0.24\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.37\nLidar 2 (0.15 rad): 0.38\nLidar 3 (0.30 rad): 0.39\nLidar 4 (0.45 rad): 0.41\nLidar 5 (0.60 rad): 0.45\nLidar 6 (0.75 rad): 0.51\nLidar 7 (0.90 rad): 0.60\nLidar 8 (1.05 rad): 0.75\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00,-0.092308,The reward from the last step was: -0.09,-3.081511,\n_\nObservation from last step: \nHull angle: 0.20\nAngular velocity: 0.02\nX velocity: -0.03\nY velocity: -0.27\nBack revolute joint angle: -0.83\nBack revolute joint speed: 0.04\nBack lower leg angle: 0.66\nBack lower leg speed: -1.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 1.07\nFront revolute joint speed: 0.23\nFront lower leg angle: -0.08\nFront lower leg speed: -0.24\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.37\nLidar 2 (0.15 rad): 0.38\nLidar 3 (0.30 rad): 0.39\nLidar 4 (0.45 rad): 0.41\nLidar 5 (0.60 rad): 0.45\nLidar 6 (0.75 rad): 0.51\nLidar 7 (0.90 rad): 0.60\nLidar 8 (1.05 rad): 0.75\nLidar 9 (1.20 rad): 1.00\nLidar 10 (1.35 rad): 1.00\nrewards: The reward from the last step was: -0.09\nscore: Score: -3.081511229990672\n\nActions \n,"[Move Back revolute joint 0.2, Back lower leg -0.6, Front revolute joint 0.4, Front lower leg 0.3, Move Back revolute joint -0.5, Back lower leg 0.5, Front revolute joint 0.2, Front lower leg -0.3, Move Back revolute joint 0.3, Back lower leg -0.4, Front revolute joint 0.1, Front lower leg 0.5, Move Back revolute joint -0.4, Back lower leg 0.3, Front revolute joint -0.2, Front lower leg 0.4, Move Back revolute joint 0.1, Back lower leg -0.5, Front revolute joint 0.3, Front lower leg -0.2]","[[0.2, -0.6, 0.4, 0.3], [-0.5, 0.5, 0.2, -0.3], [0.3, -0.4, 0.1, 0.5], [-0.4, 0.3, -0.2, 0.4], [0.1, -0.5, 0.3, -0.2]]"
7,"[0.19165625, 0.008557221, -0.015007644, -0.3261096, -0.73286235, 0.17337495, 0.25696355, -0.99999994, 1.0, 1.1099733, 0.0, 0.14765656, 0.06867426, 0.0, 0.30005392, 0.3034615, 0.31408194, 0.33322772, 0.36355382, 0.41008407, 0.4827045, 0.60303736, 0.8280599, 1.0]",Observation from last step: \nHull angle: 0.19\nAngular velocity: 0.01\nX velocity: -0.02\nY velocity: -0.33\nBack revolute joint angle: -0.73\nBack revolute joint speed: 0.17\nBack lower leg angle: 0.26\nBack lower leg speed: -1.00\nBack leg ground contact flag: 1.00\nFront revolute joint angle: 1.11\nFront revolute joint speed: 0.00\nFront lower leg angle: 0.15\nFront lower leg speed: 0.07\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.30\nLidar 2 (0.15 rad): 0.30\nLidar 3 (0.30 rad): 0.31\nLidar 4 (0.45 rad): 0.33\nLidar 5 (0.60 rad): 0.36\nLidar 6 (0.75 rad): 0.41\nLidar 7 (0.90 rad): 0.48\nLidar 8 (1.05 rad): 0.60\nLidar 9 (1.20 rad): 0.83\nLidar 10 (1.35 rad): 1.00,-0.054811,The reward from the last step was: -0.05,-3.337307,\n_\nObservation from last step: \nHull angle: 0.19\nAngular velocity: 0.01\nX velocity: -0.02\nY velocity: -0.33\nBack revolute joint angle: -0.73\nBack revolute joint speed: 0.17\nBack lower leg angle: 0.26\nBack lower leg speed: -1.00\nBack leg ground contact flag: 1.00\nFront revolute joint angle: 1.11\nFront revolute joint speed: 0.00\nFront lower leg angle: 0.15\nFront lower leg speed: 0.07\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.30\nLidar 2 (0.15 rad): 0.30\nLidar 3 (0.30 rad): 0.31\nLidar 4 (0.45 rad): 0.33\nLidar 5 (0.60 rad): 0.36\nLidar 6 (0.75 rad): 0.41\nLidar 7 (0.90 rad): 0.48\nLidar 8 (1.05 rad): 0.60\nLidar 9 (1.20 rad): 0.83\nLidar 10 (1.35 rad): 1.00\nrewards: The reward from the last step was: -0.05\nscore: Score: -3.337307404612254\n\nActions \n,"[Move Back revolute joint 0.1, Back lower leg -0.6, Front revolute joint 0.3, Front lower leg 0.2, Move Back revolute joint -0.4, Back lower leg 0.4, Front revolute joint 0.2, Front lower leg -0.3, Move Back revolute joint 0.2, Back lower leg -0.5, Front revolute joint 0.1, Front lower leg 0.4, Move Back revolute joint -0.3, Back lower leg 0.3, Front revolute joint -0.1, Front lower leg 0.5, Move Back revolute joint 0.0, Back lower leg -0.4, Front revolute joint 0.2, Front lower leg -0.2]","[[0.1, -0.6, 0.3, 0.2], [-0.4, 0.4, 0.2, -0.3], [0.2, -0.5, 0.1, 0.4], [-0.3, 0.3, -0.1, 0.5], [0.0, -0.4, 0.2, -0.2]]"
8,"[0.36193708, 0.011618221, -0.044398073, 0.007729818, -0.84322435, -0.0010162592, -0.06452572, -0.0626295, 0.0, 1.0340366, -0.6684821, 0.4116661, 1.324814, 0.0, 0.24649309, 0.24929237, 0.25801703, 0.27374524, 0.29865798, 0.33688238, 0.39653978, 0.49539274, 0.68024784, 1.0]",Observation from last step: \nHull angle: 0.36\nAngular velocity: 0.01\nX velocity: -0.04\nY velocity: 0.01\nBack revolute joint angle: -0.84\nBack revolute joint speed: -0.00\nBack lower leg angle: -0.06\nBack lower leg speed: -0.06\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 1.03\nFront revolute joint speed: -0.67\nFront lower leg angle: 0.41\nFront lower leg speed: 1.32\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.25\nLidar 2 (0.15 rad): 0.25\nLidar 3 (0.30 rad): 0.26\nLidar 4 (0.45 rad): 0.27\nLidar 5 (0.60 rad): 0.30\nLidar 6 (0.75 rad): 0.34\nLidar 7 (0.90 rad): 0.40\nLidar 8 (1.05 rad): 0.50\nLidar 9 (1.20 rad): 0.68\nLidar 10 (1.35 rad): 1.00,-0.029587,The reward from the last step was: -0.03,-4.556107,\n_\nObservation from last step: \nHull angle: 0.36\nAngular velocity: 0.01\nX velocity: -0.04\nY velocity: 0.01\nBack revolute joint angle: -0.84\nBack revolute joint speed: -0.00\nBack lower leg angle: -0.06\nBack lower leg speed: -0.06\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 1.03\nFront revolute joint speed: -0.67\nFront lower leg angle: 0.41\nFront lower leg speed: 1.32\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.25\nLidar 2 (0.15 rad): 0.25\nLidar 3 (0.30 rad): 0.26\nLidar 4 (0.45 rad): 0.27\nLidar 5 (0.60 rad): 0.30\nLidar 6 (0.75 rad): 0.34\nLidar 7 (0.90 rad): 0.40\nLidar 8 (1.05 rad): 0.50\nLidar 9 (1.20 rad): 0.68\nLidar 10 (1.35 rad): 1.00\nrewards: The reward from the last step was: -0.03\nscore: Score: -4.55610657128369\n\nActions \n,"[Move Back revolute joint 0.0, Back lower leg -0.3, Front revolute joint 0.2, Front lower leg 0.1, Move Back revolute joint -0.2, Back lower leg 0.2, Front revolute joint 0.1, Front lower leg -0.2, Move Back revolute joint 0.1, Back lower leg -0.4, Front revolute joint 0.3, Front lower leg 0.2, Move Back revolute joint -0.1, Back lower leg 0.1, Front revolute joint -0.1, Front lower leg 0.3, Move Back revolute joint 0.2, Back lower leg -0.2, Front revolute joint 0.0, Front lower leg -0.1]","[[0.0, -0.3, 0.2, 0.1], [-0.2, 0.2, 0.1, -0.2], [0.1, -0.4, 0.3, 0.2], [-0.1, 0.1, -0.1, 0.3], [0.2, -0.2, 0.0, -0.1]]"
9,"[0.35890242, -0.0027186065, 0.0037267231, 0.009262775, -0.826628, 0.1593083, -0.45623612, -0.99933904, 0.0, 0.99062353, -0.054481182, 0.6153089, 0.23271234, 0.0, 0.25186464, 0.25472495, 0.26363972, 0.27971068, 0.30516633, 0.3442237, 0.40518117, 0.50618833, 0.69507176, 1.0]",Observation from last step: \nHull angle: 0.36\nAngular velocity: -0.00\nX velocity: 0.00\nY velocity: 0.01\nBack revolute joint angle: -0.83\nBack revolute joint speed: 0.16\nBack lower leg angle: -0.46\nBack lower leg speed: -1.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 0.99\nFront revolute joint speed: -0.05\nFront lower leg angle: 0.62\nFront lower leg speed: 0.23\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.25\nLidar 2 (0.15 rad): 0.25\nLidar 3 (0.30 rad): 0.26\nLidar 4 (0.45 rad): 0.28\nLidar 5 (0.60 rad): 0.31\nLidar 6 (0.75 rad): 0.34\nLidar 7 (0.90 rad): 0.41\nLidar 8 (1.05 rad): 0.51\nLidar 9 (1.20 rad): 0.70\nLidar 10 (1.35 rad): 1.00,-0.004443,The reward from the last step was: -0.00,-4.650013,\n_\nObservation from last step: \nHull angle: 0.36\nAngular velocity: -0.00\nX velocity: 0.00\nY velocity: 0.01\nBack revolute joint angle: -0.83\nBack revolute joint speed: 0.16\nBack lower leg angle: -0.46\nBack lower leg speed: -1.00\nBack leg ground contact flag: 0.00\nFront revolute joint angle: 0.99\nFront revolute joint speed: -0.05\nFront lower leg angle: 0.62\nFront lower leg speed: 0.23\nFront leg ground contact flag: 0.00\nLidar 1 (0.00 rad): 0.25\nLidar 2 (0.15 rad): 0.25\nLidar 3 (0.30 rad): 0.26\nLidar 4 (0.45 rad): 0.28\nLidar 5 (0.60 rad): 0.31\nLidar 6 (0.75 rad): 0.34\nLidar 7 (0.90 rad): 0.41\nLidar 8 (1.05 rad): 0.51\nLidar 9 (1.20 rad): 0.70\nLidar 10 (1.35 rad): 1.00\nrewards: The reward from the last step was: -0.00\nscore: Score: -4.650012614050261\n\nActions \n,"[Move Back revolute joint 0.1, Back lower leg -0.3, Front revolute joint 0.2, Front lower leg 0.1, Move Back revolute joint -0.2, Back lower leg 0.2, Front revolute joint 0.1, Front lower leg -0.2, Move Back revolute joint 0.0, Back lower leg -0.4, Front revolute joint 0.3, Front lower leg 0.2, Move Back revolute joint -0.1, Back lower leg 0.1, Front revolute joint -0.1, Front lower leg 0.3, Move Back revolute joint 0.2, Back lower leg -0.2, Front revolute joint 0.0, Front lower leg -0.1]","[[0.1, -0.3, 0.2, 0.1], [-0.2, 0.2, 0.1, -0.2], [0.0, -0.4, 0.3, 0.2], [-0.1, 0.1, -0.1, 0.3], [0.2, -0.2, 0.0, -0.1]]"


## Test with other LLMs
- OpenAI API too slow due to rate limits

In [10]:
import torch
from transformers import pipeline
from huggingface_hub import InferenceClient
from openai import OpenAI
import os

## Inference APi

# Running the model locally
- Works but will have to choose a smaller model

In [19]:
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-3B-Instruct", device="cuda")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [22]:
pipe(
    messages,
    max_new_tokens=100,
    pad_token_id=50256,
    return_full_text=True
)

[{'generated_text': [{'role': 'user', 'content': 'Who are you?'},
   {'role': 'assistant',
    'content': 'I\'m an artificial intelligence model known as Llama. Llama stands for "Large Language Model Meta AI."'}]}]

In [12]:
# Llama 1B
pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B", device="cuda")

In [34]:
generated = pipe(
    primer,
    pad_token_id=50256,
    max_new_tokens=len(primer) + 100
)

In [36]:
print(generated[0]['generated_text'])


Imagine you are an expert at controlling a bipedal machine, and you have been given the task of controlling a bipedal machine in a reinforcement learning environment. The challenge is to walk a bipedal machine as far as possible without falling over. The machine has a hull, two legs, a back and front revolute joint, and lower leg. The machine has 10 lidar sensors that can detect the distance to objects in front of it. 

You will be given scenarios in the form of observations from the environment and the reward based on the last action and the Current Total Score. Your goal is to provide the next action in the following format:

'Move Back revolute joint `value`, Back lower leg 'value', Front revolute joint 'value', Front lower leg 'value''

The values must be in the range [-1, 1], and your goal is to maximize the reward and move the machine forward effectively. Only provide the action and no additional explanation.

Example
'Observation from last step: 
Hull angle: -0.02
Angular veloc

In [33]:
len("Move Back revolute joint -0.4, Back lower leg 0.4, Front revolute joint 0.0, Front lower leg -0.6")

97

In [30]:
print(primer)


Imagine you are an expert at controlling a bipedal machine, and you have been given the task of controlling a bipedal machine in a reinforcement learning environment. The challenge is to walk a bipedal machine as far as possible without falling over. The machine has a hull, two legs, a back and front revolute joint, and lower leg. The machine has 10 lidar sensors that can detect the distance to objects in front of it. 

You will be given scenarios in the form of observations from the environment and the reward based on the last action and the Current Total Score. Your goal is to provide the next action in the following format:

'Move Back revolute joint `value`, Back lower leg 'value', Front revolute joint 'value', Front lower leg 'value''

The values must be in the range [-1, 1], and your goal is to maximize the reward and move the machine forward effectively. Only provide the action and no additional explanation.

Example
'Observation from last step: 
Hull angle: -0.02
Angular veloc