#### Imports

In [1]:
import time
from openai import OpenAI
from Assistant import AssistantOpenAI
import logging
import os
import json
import gymnasium as gym
import random
import matplotlib.pyplot as plt
import os
import time
from git import Repo

import numpy as np
from collections import deque, namedtuple

# For visualization
import gymnasium.wrappers.record_video as record_video

#### Parameters

In [2]:
num_episodes = 200
ARCLABKEY_OPENAI = "sk-proj-DvHDR3hpgbm2r3kCA9jKT3BlbkFJL57ABXkfaWAIYKBxdhM6"
ARCLABKEY_OPENAI = "sk-proj-GvaDXazpibWA2M1I5Pu2T3BlbkFJxDuKlr9AcoVG98ctJZ7Q"

In [3]:
def store_code_in_file(code, filename):
    """ Store code in a file
    
    Args:
        code: str: code to store
        filename: str: filename to store code in
    
    Returns:
        None
    """
    with open(filename, 'w') as f:
        f.write(code)
        
store_code_in_file_schema = {
    "name": "store_code_in_file",
    "description": "Store code in a file",
    "parameters": {
        "type": "object",
        "properties": {
            "code": {
                "type": "string",
                "description": "The Python code to store."
            },
            "filename": {
                "type": "string",
                "description": "The filename to store the code in."
            }
        },
        "required": ["code", "filename"]
    }
}

TOOLS = [{"type": "function", "function": store_code_in_file_schema}]

### Entorno Lunnar Lander

In [4]:
env = gym.make('LunarLander-v2')
print('State shape: ', env.observation_space.shape)
print('Number of actions: ', env.action_space.n)

State shape:  (8,)
Number of actions:  4


In [5]:
agent_initial_code = """
def act(observation):
    '''
    The function that codifies the action to be taken in each instant of time.

    Args:
        observation (numpy.array):
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": "X position",
                "1": "Y position",
                "2": "X velocity",
                "3": "Y velocity",
                "4": "Angle",
                "5": "Angular velocity",
                "6": "Left contact sensor",
                "7": "Right contact sensor"
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]

    Returns:
        Integer  : The action to be taken.
    '''
    return random.randint(0, 3)
"""

#### Mapeador de logs a JSON

In [6]:
log_description = {
    "description": "Log data for each step of the spacecraft landing environment.",
    "landing attempt": {
        "type": "integer",
        "description": "The episode number."
    },
    "logs": {
        "instant": {
            "type": "integer",
            "description": "The instant within the landing attempt where the current log is taken."
        },
        "action": {
            "type": "integer",
            "description": "The possible actions taken.",
            "options": {
                1 : "Switch off engines",
                2 : "Push left engine",
                3 : "Push both engines (upwards)",
                4 : "Push right engine"
            }
        },  
        "current status": {
            "type": "array",
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": "X position",
                "1": "Y position",
                "2": "X velocity",
                "3": "Y velocity",
                "4": "Angle",
                "5": "Angular velocity",
                "6": "Left contact sensor",
                "7": "Right contact sensor"
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1] 
        }, 
        "score": {
            "type": "number",
            "description": "The score received for the action."
        },  
        "completed": {
            "type": "boolean",
            "description": "Whether the landing event has ended (landing or accident)."
        }
    },
    "total score": {
        "type": "number",
        "description": "The total score received for the landing attempt."
    }
}

### UTILS

In [7]:
import json

def format_step_data(instant, action, next_state, reward, terminated):
    """ Format the step data into a JSON string
    
    Args:
        instant: int: the instant within the landing attempt where the current log is taken
        action: int: the action taken
        next_state: np.array: the next state of the environment
        reward: float: the reward received
        terminated: bool: whether the landing event has ended
        
    Returns:
        str: the step data formatted as a JSON string
    """
    # Convertir el array numpy a una lista
    next_state_list = next_state.tolist()

    # Redondear los elementos de la lista a 4 decimales
    next_state_list_rounded = [round(x, 3) for x in next_state_list]

    step_data = {
        'instant': instant,
        'action' : int(action),
        'current status': next_state_list_rounded,  # Convert numpy array to list
        'score': round(reward, 3),
        'completed': terminated,
        #'truncated': truncated
        #'info': info
    }
    
    #print(step_data)

    # Convert the dictionary to a JSON string
    step_data_json = json.dumps(step_data)

    return step_data_json


def format_episode_logs(logs, episode, total_score):
    """ Format the logs into a JSON string
    
    Args:
        logs: list: the logs for each step of the environment
        episode: int: the episode number
        
    Returns:
        str: the logs formatted as a JSON string
    """
    logs_data = {
        'landing attempt': episode,
        'logs': logs,
        'total score': total_score
    }

    # Convert the dictionary to a JSON string
    logs_json = json.dumps(logs_data)

    return logs_json

In [8]:
def commit_changes(repo_path, commit_message):
    """ Commit changes to the repository.

    Args:
        repo_path (str): Path to the repository.
        commit_message (str): The commit message.
    """
    repo = Repo(repo_path)
    repo.git.add(update=True)
    repo.index.commit(commit_message)
    origin = repo.remote(name='origin')
    origin.push()

In [9]:
import Action

def lunar_lander(max_t=1000, n_games=1, display=False, seed=38, agent=None):
    
    # Incluimos la opción del entorno gráfico.
    if display:
        env = gym.make('LunarLander-v2', render_mode='human')
    else:
        env = gym.make('LunarLander-v2')

    logs = []
    for episode in range(1, n_games+1):
        state = env.reset(seed=seed) # Set a seed for the environment
        state = state[0] # Eliminamos el diccionario vacio y dejamos unicamente el estado de 8 elementos.
        score = 0
        instant = 0
        episode_actions = []
        episode_rewards = []
        for i in range(max_t):
            if agent: 
                action = agent.act(state)
            else:
                action = Action.act(state)
                
            next_state, reward, terminated, truncated, info = env.step(action)
            json_logs = format_step_data(instant, action, next_state, reward, terminated)
            episode_rewards.append(reward)
            score += reward
            instant += 1
            if len(episode_actions) == 0 or len(episode_actions) % 3 == 0 or terminated:
                logs.append(json_logs)
            episode_actions.append(action)
            state = next_state
            if terminated:
                break
        json_episode_logs = format_episode_logs(logs, episode, score) 
        
        print(f"Número de instantes: {instant+1}. Tamaño de logs: {len(logs)}")
        print('episode ', episode, 'score %.3f' % float(score), 'avg score %.3f' % (float(score) / instant))
        
    env.close()
    return json_episode_logs

#### Logs del código inicial

In [10]:
initial_logs = lunar_lander(n_games=2, display=True)
initial_logs

Número de instantes: 124. Tamaño de logs: 42
episode  1 score -403.225 avg score -3.278
Número de instantes: 101. Tamaño de logs: 76
episode  2 score -110.980 avg score -1.110


'{"landing attempt": 2, "logs": ["{\\"instant\\": 0, \\"action\\": 2, \\"current status\\": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], \\"score\\": -1.45, \\"completed\\": false}", "{\\"instant\\": 3, \\"action\\": 1, \\"current status\\": [0.019, 1.426, 0.366, 0.093, -0.02, -0.043, 0.0, 0.0], \\"score\\": 1.357, \\"completed\\": false}", "{\\"instant\\": 6, \\"action\\": 3, \\"current status\\": [0.029, 1.429, 0.366, 0.013, -0.024, -0.042, 0.0, 0.0], \\"score\\": -0.96, \\"completed\\": false}", "{\\"instant\\": 9, \\"action\\": 1, \\"current status\\": [0.04, 1.43, 0.351, -0.008, -0.03, -0.011, 0.0, 0.0], \\"score\\": 0.929, \\"completed\\": false}", "{\\"instant\\": 12, \\"action\\": 3, \\"current status\\": [0.05, 1.426, 0.352, -0.087, -0.029, -0.015, 0.0, 0.0], \\"score\\": -1.376, \\"completed\\": false}", "{\\"instant\\": 15, \\"action\\": 3, \\"current status\\": [0.061, 1.418, 0.371, -0.134, -0.041, -0.111, 0.0, 0.0], \\"score\\": -2.198, \\"completed\\": false}",

#### Logs exitosos
Utilizar otras semillas para que no memorice el entorno

In [11]:
import DuelingDQN

agent = DuelingDQN.Agent(num_observaciones=8, num_acciones=4, red_modelo=DuelingDQN.DuelingQNetwork, seed=0)
# Cargamos los pesos del agente entrenado.
agent.load_weights('checkpoint_Dueling.pth')

success_logs = lunar_lander(n_games=1, display=True, seed=42, agent=agent)
success_logs

Número de instantes: 294. Tamaño de logs: 99
episode  1 score 250.956 avg score 0.857


'{"landing attempt": 1, "logs": ["{\\"instant\\": 0, \\"action\\": 1, \\"current status\\": [0.004, 1.425, 0.22, 0.295, -0.003, -0.004, 0.0, 0.0], \\"score\\": 2.08, \\"completed\\": false}", "{\\"instant\\": 3, \\"action\\": 0, \\"current status\\": [0.011, 1.441, 0.22, 0.215, -0.003, -0.004, 0.0, 0.0], \\"score\\": 1.413, \\"completed\\": false}", "{\\"instant\\": 6, \\"action\\": 1, \\"current status\\": [0.018, 1.452, 0.211, 0.135, -0.002, 0.032, 0.0, 0.0], \\"score\\": 2.092, \\"completed\\": false}", "{\\"instant\\": 9, \\"action\\": 0, \\"current status\\": [0.024, 1.457, 0.211, 0.055, 0.003, 0.032, 0.0, 0.0], \\"score\\": 0.528, \\"completed\\": false}", "{\\"instant\\": 12, \\"action\\": 0, \\"current status\\": [0.03, 1.458, 0.202, -0.026, 0.011, 0.068, 0.0, 0.0], \\"score\\": -0.45, \\"completed\\": false}", "{\\"instant\\": 15, \\"action\\": 0, \\"current status\\": [0.036, 1.452, 0.202, -0.106, 0.021, 0.068, 0.0, 0.0], \\"score\\": -1.215, \\"completed\\": false}", "{\\"in

#### Bucle iterativo

In [12]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    
    # Crea un hilo
    hilo = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)

    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id)
            response = agente.get_run(ejecucion.id, hilo.id)

            # Obtén la respuesta utilizando un bucle de espera a que la respuesta esté lista.
            while response.status not in ["completed", "failed", "requires_action"]:
                logger.info(f"Status: {ejecucion.status}")
                response = agente.get_run(ejecucion.id, hilo.id)
                time.sleep(20)

            #print(f"Status: {response.status}")
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")
            #code = tool_call.function.arguments[0]
            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  

            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                #exec(code)
                store_code_in_file(code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"First execution. Iteration {i+1}.")
                
                importlib.reload(Action)

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True)
                
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                # logger.info(f"Partida: {ref.game.game_log()}")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated in the last execution{logs}. You have to improve the last code generated to maximize the reward. Please, make it more efficient, faster and more robust."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
                
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


#### Logger

In [13]:
def configura_log(nombre_archivo):
    """ 
    Configura el logger para que guarde los logs en un archivo y los muestre en la consola.
    
    Args:
        nombre_archivo: str Nombre del archivo donde se guardarán los logs.
        
    Returns:
        logger: logging.Logger Objeto logger configurado.
    """
    # Crear la carpeta logs si no existe
    if not os.path.exists('logs'):
        os.makedirs('logs')
    # Configura el logger
    logging.basicConfig(filename=f'logs/{nombre_archivo}', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger()
    
    # Añade un StreamHandler para mostrar los logs en la consola
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)
    
    return logger

def summarize_logs(logs):
    """ Summarize the logs of the Lunar Lander game
    
    Args:
        logs: dict: logs of the game
        
    Returns:
        str: summary of the logs
    """
    rewards = logs['rewards']
    actions = logs['actions']

    reward_summary = f"Average reward: {np.mean(rewards)}, Max reward: {np.max(rewards)}, Min reward: {np.min(rewards)}"
    action_summary = f"Action frequencies: {np.bincount(actions)}"

    return reward_summary + "\n" + action_summary

### Ejecución con el asistente

In [14]:
DESCRIPTION = "You are an expert programer in Pyhton. Your specialty is to generate the code responsible for making decisions about actions to be taken in various spacecraft landing environments.The objective is to land the spacecraft within a target zone in the shortest possible time and very gently. A scoring system is used to evaluate the landings, which must be maximized."
INSTRUCTIONS = f"""Your task is:
1. Analyze and reason about the results received in the last landing attempts.
2. Your goal is to be able to make the appropriate decision based on the results of previous iterations. You must code the decision making based on your reasoning in a Python function.
3. Based on the logs you should improve the code generated in the 'act' function in the last iteration, seeking to maximize the score received and generate a higher quality code.
4. Save the code of the act function in the file 'Action.py'.
5. Improve your results and correct any errors you may have generated in your last code if they exist.
"""
NAME = "Spacecraft Landing Master"

initial_msg = f"Take a deep breath and think step by step. This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Analyze the results and improve the code."

logger = configura_log('Spacecraft_4o_commits.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-06-11 14:10:05,690 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-06-11 14:10:05,897 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-06-11 14:10:06,153 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_3Ry1Cs49Q6aL1fNv9EKBieI9/messages "HTTP/1.1 200 OK"
2024-06-11 14:10:06,291 - INFO - Iteration: 1
2024-06-11 14:10:06,677 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_3Ry1Cs49Q6aL1fNv9EKBieI9/runs "HTTP/1.1 200 OK"
2024-06-11 14:10:06,919 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_3Ry1Cs49Q6aL1fNv9EKBieI9/runs/run_e4mZjcSh90uz5F4boANRe73f "HTTP/1.1 200 OK"
2024-06-11 14:10:06,919 - INFO - Status: queued
2024-06-11 14:10:07,127 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_3Ry1Cs49Q6aL1fNv9EKBieI9/runs/run_e4mZjcSh90uz5F4boANRe73f "HTTP/1.1 200 OK"
2024-06-11 14:10:27,134 - INFO - Status: queued
2024-06-11 14:10:27,486

Tool call: [RequiredActionFunctionToolCall(id='call_KgKHPiHTGhw2NfVsLJSqKhYO', function=Function(arguments='{"code":"def act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:\\n        Integer  : Th

2024-06-11 14:10:58,141 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_3Ry1Cs49Q6aL1fNv9EKBieI9/runs/run_e4mZjcSh90uz5F4boANRe73f/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-11 14:10:58,146 - INFO - Compilación exitosa.
2024-06-11 14:10:58,150 - INFO - Status: requires_action
2024-06-11 14:10:58,426 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_3Ry1Cs49Q6aL1fNv9EKBieI9/runs/run_e4mZjcSh90uz5F4boANRe73f "HTTP/1.1 200 OK"
2024-06-11 14:11:18,433 - INFO - Status: queued
2024-06-11 14:11:18,718 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_3Ry1Cs49Q6aL1fNv9EKBieI9/runs/run_e4mZjcSh90uz5F4boANRe73f "HTTP/1.1 200 OK"
2024-06-11 14:11:38,722 - INFO - These are the logs generated in the last execution{"landing attempt": 1, "logs": ["{\"instant\": 0, \"action\": 2, \"current status\": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], \"score\": -1.45, \"completed\": false}", "{\"instant\": 3, \"action\": 2, \"current stat

Tool call: [RequiredActionFunctionToolCall(id='call_9VPOOInYOhrKUOhA9UPiBPWV', function=Function(arguments='{"code": "def act(observation):\\n    x_vel = observation[4]\\n    if x_vel > 0.5:\\n        return 1\\n    elif x_vel < -0.5:\\n        return 3\\n    else:\\n        return 2", "filename": "Action.py"}', name='store_code_in_file'), type='function'), RequiredActionFunctionToolCall(id='call_YL2Tz00e1Ciid6Zxc6KuXlvn', function=Function(arguments='{"code": "def improved_act(observation):\\n    x_vel = observation[4]\\n    if x_vel > 0.5:\\n        return 1\\n    elif x_vel < -0.5:\\n        return 3\\n    else:\\n        return 2", "filename": "Improved_Action.py"}', name='store_code_in_file'), type='function')]
Número de instantes: 108. Tamaño de logs: 37
episode  1 score -643.809 avg score -6.017


2024-06-11 14:12:26,108 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_3Ry1Cs49Q6aL1fNv9EKBieI9/runs/run_h2rOhEQrUOt8sZWUybULDNeM/submit_tool_outputs "HTTP/1.1 400 Bad Request"
2024-06-11 14:12:26,108 - ERROR - Error: Error code: 400 - {'error': {'message': "Expected tool outputs for call_ids ['call_9VPOOInYOhrKUOhA9UPiBPWV', 'call_YL2Tz00e1Ciid6Zxc6KuXlvn'], got ['call_9VPOOInYOhrKUOhA9UPiBPWV']", 'type': 'invalid_request_error', 'param': None, 'code': None}}
Traceback (most recent call last):
  File "C:\Users\adria\AppData\Local\Temp\ipykernel_9120\244465185.py", line 66, in create_and_run_llm_loop
    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])
  File "c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\Assistant.py", line 171, in devolver_respuesta
    response = self.client.beta.threads.runs.submit_tool_outputs(run_id, thread_id=thread_id, tool_outputs=tool_outputs, **kwargs)
       

BadRequestError: Error code: 400 - {'error': {'message': "Expected tool outputs for call_ids ['call_9VPOOInYOhrKUOhA9UPiBPWV', 'call_YL2Tz00e1Ciid6Zxc6KuXlvn'], got ['call_9VPOOInYOhrKUOhA9UPiBPWV']", 'type': 'invalid_request_error', 'param': None, 'code': None}}

## Tasks

* Incluir guardado de vídeo.
* Mejorar el prompt inicial.
* Incluir ejemplos positivos.
* Incluir cambios en el código con GIT.