In [2]:
import gymnasium as gym
import numpy as np

In [12]:
env = gym.make("LunarLander-v3", render_mode="human")
observation, info = env.reset()

episode_over = False
while not episode_over:
    action = env.action_space.sample()  # agent policy that uses the observation and info
    observation, reward, terminated, truncated, info = env.step(action)
    episode_over = terminated or truncated

In [11]:
env.close()

## Breakdown of 8-dim observation values

- [0] : x-coordinate of lander

- [1] : y-coordinate of lander

- [2] : linear velocity of lander in x

- [3] : linear velocity of lander in y

- [4] : angle of lander

- [5] : angular velocity of lander (must be multiplied by a factor of 2.5 to convert to rps)

- [6] : boolean whether or not leg 1 (arbitrary) is in contact with ground

- [7] : boolean whether or not leg 2 (arbitrary) is in contact with ground


In [15]:
def obs_to_text(obs):
    starter = "Observation from last step: "
    x_coordinate = f"X coordinate: {obs[0]:2f}"
    y_coordinate = f"Y coordinate: {obs[1]:.2f}"
    x_velocity = f"X velocity: {obs[2]:.2f}"
    y_velocity = f"Y velocity: {obs[3]:.2f}"
    lander_angle = f"Angle of the lander: {obs[4]:.2f}"
    angular_velocity = obs[5] * 2.5
    angular_velocity = f"Angular velocity of lander: {angular_velocity}"
    leg_1_contact_flag = f"Leg 1 contact with ground flag: {obs[6]}"
    leg_2_contact_flag = f"Leg 2 contact with ground flag: {obs[7]}"
     
    return "\n".join([starter, x_coordinate, y_coordinate, x_velocity, y_velocity, lander_angle, angular_velocity, leg_1_contact_flag, leg_2_contact_flag])


In [16]:
print(obs_to_text(observation))

Observation from last step: 
X coordinate: 0.557210
Y coordinate: -0.17
X velocity: 0.87
Y velocity: -0.02
Angle of the lander: -3.25
Angular velocity of lander: 0.19123662263154984
Leg 1 contact with ground flag: 0.0
Leg 2 contact with ground flag: 0.0


In [6]:
def reward_to_text(reward):
    return f"The reward from the last step was: {reward:.2f}"

In [7]:
def text_to_action(text):
    """
        Given an output by the LLM in the form:
        0 - "do nothing"
        1 - "fire left orientation engine"
        2 - "fire main engine"
        3 - "fire right orientation engine"
        This function will return the corresponding action values for the environment 
    """
    action = [0, 0, 0, 0]
    split_text = text.split(", ")
    for i, action_value in enumerate(split_text):
        action[i] = float(action_value.split(" ")[-1])
    return action