In [None]:
# Import necessary libraries
import time
import re
import subprocess
import os
import datetime
import shutil
import openai
import json
import sys
from pathlib import Path
from loguru import logger
# Import custom utilities
from utils.extract_task_code import file_to_string
from utils.simple_eureka import process_response
from utils.system import (
    check_system_encoding,
    clean_folder,
    copy_folder_sub,
)
from utils.misc import block_until_training
from utils.agent import Agent
from utils.tensorboard_parser import tensorboard_parser

In [None]:
import yaml

class Config:
    def __init__(self, dictionary):
        for key, value in dictionary.items():
            if isinstance(value, dict):
                value = Config(value)
            setattr(self, key, value)

    def __repr__(self):
        return f"{self.__dict__}"

# Function to load YAML configuration
def load_config(yaml_path):
    with open(yaml_path, 'r') as file:
        config_dict = yaml.safe_load(file)
    return Config(config_dict)

# Example usage
yaml_path = 'config/config.yaml'  # Replace with the path to your YAML file
cfg = load_config(yaml_path)

# Accessing the configuration
print(cfg.gym)
print(cfg.output.overwrite)


In [None]:
# Load configuration
ISAAC_ROOT_DIR = "H:/Omniverse/Library/isaac_sim-2023.1.1/OmniIsaacGymEnvs/omniisaacgymenvs"

PYTHON_PATH = "H:/Omniverse/Library/isaac_sim-2023.1.1/python.bat"
TASK = "Crazyflie"
TASK_PATH = "H:\\Omniverse\\Library\\isaac_sim-2023.1.1\\OmniIsaacGymEnvs\\omniisaacgymenvs\\tasks\\crazyflie.py"
SCRIPTS_DIR = "scripts/rlgames_train.py"
HEADLESS = True
ENABLE_RECORDING = False
MULTIGPU = True

model = "gpt-4-turbo-2024-04-09"
key = ""

In [None]:

# Setup platform and logger
platform = sys.platform
now = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
logger = logger.opt(colors=True)
logger.add(
    sink=f"./results/{now}/output.log",
    enqueue=True,
    backtrace=True,
    diagnose=True,
)

EUREKA_ROOT_DIR = os.getcwd()
RESULT_DIR = f"{EUREKA_ROOT_DIR}/results/{now}"

# Clean the results directory
shutil.rmtree(RESULT_DIR, ignore_errors=True)
os.makedirs(RESULT_DIR, exist_ok=True)


prompt_dir = f"{EUREKA_ROOT_DIR}/prompts"
logger.debug(f"Using LLM: {model} with API Key: {key}")
code_feedback = file_to_string(f"{prompt_dir}/code_feedback.txt")
task_obs_code_string = file_to_string(f"{EUREKA_ROOT_DIR}/input/{TASK}.py")
policy_feedback = file_to_string(f"{prompt_dir}/policy_feedback.txt")
execution_error_feedback = file_to_string(
    f"{prompt_dir}/execution_error_feedback.txt"
)
code_output_tip = file_to_string(f"{prompt_dir}/actor/code_output_tip.txt")
DUMMY_FAILURE = -10000.0
max_successes = []
max_successes_reward_correlation = []
execute_rates = []
best_code_paths = []
max_success_overall = DUMMY_FAILURE
max_success_reward_correlation_overall = DUMMY_FAILURE
max_reward_code_path = None
clean_folder(f"{ISAAC_ROOT_DIR}/runs/{TASK}")

# ---------------------- MESSAGE Assemble------------------#
actor_prompt = Agent(cfg=cfg)
messages = actor_prompt.message()
full_prompt = messages[0]["content"] + messages[1]["content"]
logger.info("Full Prompt: " + full_prompt)

# Save the full prompt to a file under {EUREKA_ROOT_DIR}/results
with open(f"{RESULT_DIR}/full_prompt.txt", "w") as f:
    f.write(full_prompt)

In [None]:
# ---------------------- Evolution ------------------#
iter = 0  # Example iteration number, you can loop through multiple iterations
BASE_DIR = f"{RESULT_DIR}/iter{iter}"
# Make sub directories: code, reponses, tensorboard, and videos
os.makedirs(BASE_DIR, exist_ok=True)
os.makedirs(f"{BASE_DIR}/{iter}", exist_ok=True)

responses = []
response_cur = None
total_samples = 0
total_token = 0
total_completion_token = 0
chunk_size: int = cfg.generation.chunk_size
logger.info(
    f"Iteration {iter}: Generating {cfg.generation.sample} samples with {cfg.api.model}"
)
client = openai.OpenAI(api_key=cfg.api.key, base_url=cfg.api.url)
while True:
    if total_samples >= cfg.generation.sample:
        break
    for attempt in range(3):
        try:
            response_cur = client.chat.completions.create(
                model=cfg.api.model,
                messages=messages,
                temperature=cfg.api.temperature,
                max_tokens=cfg.api.max_tokens,
                n=chunk_size,
            )
            total_samples += chunk_size
            break
        except Exception as e:
            if attempt >= 10:
                chunk_size = max(int(chunk_size / 2), 1)
                print("Current Chunk Size", chunk_size)
            logger.info(f"Attempt {attempt+1} failed with error: {e}")
            time.sleep(1)
    if response_cur is None:
        logger.info("Code terminated due to too many failed attempts!")
        break

    responses.extend(response_cur.choices)
    prompt_tokens = response_cur.usage.prompt_tokens
    total_completion_token += response_cur.usage.completion_tokens
    total_token += response_cur.usage.total_tokens

if cfg.generation.sample == 1:
    logger.info(
        f"Iteration {iter}: GPT Output:\n "
        + responses[0].message.content
        + "\n"
    )

# logger Token Information
logger.info(
    f"Iteration {iter}: Prompt Tokens: {prompt_tokens}, Completion Tokens: {total_completion_token}, Total Tokens: {total_token}"
)


In [None]:
# Initialize variables for code and RL runs
code_runs = []
rl_runs = []

for response_id in range(cfg.generation.sample):
    # Clean up the omniverse isaac sim environment's output directory
    clean_folder(f"{ISAAC_ROOT_DIR}/runs/{TASK}")
    os.makedirs(f"{BASE_DIR}/{response_id}", exist_ok=True)
    os.makedirs(f"{BASE_DIR}/{response_id}/checkpoint", exist_ok=True)
    response_cur = responses[response_id].message.content
    
    # Save the response to a file
    with open(
        f"{BASE_DIR}/{response_id}/response.txt", "w", encoding="utf-8"
    ) as f:
        f.write(response_cur)
    logger.info(f"Iteration {iter}: Processing Code Run {response_id}")

    # Regex patterns to extract python code enclosed in GPT response
    patterns = [
        r"```python(.*?)```",
        r"```(.*?)```",
        r'"""(.*?)"""',
        r'""(.*?)""',
        r'"(.*?)"',
    ]
    code_string = None
    for pattern in patterns:
        match = re.search(pattern, response_cur, re.DOTALL)
        if match:
            code_string = match.group(1).strip()
            break
    
    # Use the entire response if no code block is found
    code_string = response_cur if code_string is None else code_string

    # Remove unnecessary imports and add indentation
    lines = code_string.split("\n")
    lines = [" " * 4 + line for line in lines]
    for i, line in enumerate(lines):
        if line.strip().startswith("def "):
            code_string = "\n".join(lines[i:])
            break

    code_runs.append(code_string)

    # Process the response code to obtain the final code string
    code_string, reward_only_string = process_response(
        code_string, task_obs_code_string
    )
    
    # Save the new environment code
    output_file = f"{BASE_DIR}/{response_id}/env_iter{iter}_response{response_id}.py"
    with open(output_file, "w") as file:
        file.writelines(code_string + "\n")

    with open(
        f"{BASE_DIR}/{response_id}/env_iter{iter}_response{response_id}_rewardonly.py",
        "w",
    ) as file:
        file.writelines(reward_only_string + "\n")

    shutil.copyfile(output_file, f"{TASK_PATH}")
    std_path = f"{BASE_DIR}/{response_id}/env_iter{iter}_response{response_id}_train.log"
    with open(std_path, "w") as f:
        encoding = "utf-8" if check_system_encoding() else "gbk"

        if platform == "win32":
            driver = cfg.gym.omniisaacsimpathenv.split(":")[0]
            if MULTIGPU:
                command = f"{driver}: & cd {ISAAC_ROOT_DIR} & {PYTHON_PATH} {SCRIPTS_DIR} task={TASK} headless={HEADLESS} multigpu={MULTIGPU} "
            else:
                command = f"{driver}: & cd {ISAAC_ROOT_DIR} & {PYTHON_PATH} {SCRIPTS_DIR} task={TASK} headless={HEADLESS} "
        elif platform == "linux":
            if MULTIGPU:
                command = f"cd {ISAAC_ROOT_DIR} && {PYTHON_PATH} {SCRIPTS_DIR} task={TASK} headless={HEADLESS} multigpu={MULTIGPU} "
            else:
                command = f"cd {ISAAC_ROOT_DIR} && {PYTHON_PATH} {SCRIPTS_DIR} task={TASK} headless={HEADLESS} "
        else:
            logger.error("Unsupported platform!")
            exit()

        if ENABLE_RECORDING and not MULTIGPU:
            os.makedirs(f"{BASE_DIR}/{response_id}/videos/", exist_ok=True)
            command += f"enable_recording=True recording_dir={BASE_DIR}/{response_id}/videos/"
        else:
            logger.info("Recording is disabled! Either enable recording or use multi-gpu training!")
        
        logger.info(f"Command: {command}")
        process = subprocess.Popen(
            command, shell=True, stdout=f, stderr=f, encoding=encoding
        )
    block_until_training(
        std_path,
        success_keyword=cfg.env.success_keyword,
        failure_keyword=cfg.env.failure_keyword,
        log_status=True,
        iter_num=iter,
        response_id=response_id,
    )
    copy_folder_sub(
        f"{ISAAC_ROOT_DIR}/runs/{TASK}", f"{BASE_DIR}/{response_id}/"
    )
    rl_runs.append(process)


In [None]:
# Communicate with all RL runs to ensure they are completed
for response_id, (code_run, rl_run) in enumerate(zip(code_runs, rl_runs)):
    rl_run.communicate()
