#### Imports

In [15]:
import time
from openai import OpenAI
from Assistant import AssistantOpenAI
import logging
import os
import json
import gymnasium as gym
import random
import matplotlib.pyplot as plt
import os
import time
from git import Repo

import numpy as np
#from collections import deque, namedtuple

# For visualization
import gymnasium.wrappers.record_video as record_video

#### Parameters

In [16]:
num_episodes = 200
ENV_NAME = 'LunarLander-v2'
ARCLABKEY_OPENAI = "sk-proj-DvHDR3hpgbm2r3kCA9jKT3BlbkFJL57ABXkfaWAIYKBxdhM6"
ARCLABKEY_OPENAI = "sk-proj-GvaDXazpibWA2M1I5Pu2T3BlbkFJxDuKlr9AcoVG98ctJZ7Q"

#### Function calling.

Función encargada de almacenar el código generado mediante la opción de function calling del asistente de OpenAI.

In [17]:
def store_code_in_file(reasoning, code, filename,):
    """ Store code in a file
    
    Args:
        reasoning: str: Pre-coding reasoning on log analysis and the influence of actions on observations and steps to follow to achieve a successful landing.
        code: str: code to store. Use the reasoning parameter to generate this code.
        filename: str: filename to store code in
    
    Returns:
        None
    """
    print(f"Model reasoning: {reasoning}")
    with open(filename, 'w') as f:
        f.write(code)
        
store_code_in_file_schema = {
    "name": "store_code_in_file",
    "description": "Store code in a file",
    "parameters": {
        "type": "object",
        "properties": {
            "reasoning": {
                "type": "string",
                "description": "Pre-coding deep reasoning on log analysis and the influence of actions on observations and steps to follow to achieve a successful landing. It is an expnasion but not the code itself."
            },
            "code": {
                "type": "string",
                "description": "The Python code to store. Use the reasoning parameter to generate this code."
            },
            "filename": {
                "type": "string",
                "description": "The filename to store the code in."
            }
        },
        "required": ["code", "filename"]
    }
}

TOOLS = [{"type": "function", "function": store_code_in_file_schema}]

#### Entorno Lunnar Lander

In [18]:
env = gym.make('LunarLander-v2')
print('State shape: ', env.observation_space.shape)
print('Number of actions: ', env.action_space.n)

State shape:  (8,)
Number of actions:  4


#### Código inicial

In [19]:
agent_initial_code = """
import random

def act(observation):
    '''
    The function that codifies the action to be taken in each instant of time.

    Args:
        observation (numpy.array):
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": "X position",
                "1": "Y position",
                "2": "X velocity",
                "3": "Y velocity",
                "4": "Angle",
                "5": "Angular velocity",
                "6": "Left contact sensor",
                "7": "Right contact sensor"
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]

    Returns:
        Integer  : The action to be taken.
        "options": {
                '0' : "Switch off engines",
                '1' : "Push left engine",
                '2' : "Push both engines (upwards)",
                '3' : "Push right engine"
            }
    '''
    return random.randint(0, 3)
"""

#### Mapeador de logs a JSON

In [20]:
log_description = {
    "description": "Log data for each step of the spacecraft landing environment.",
    "landing attempt": {
        "type": "integer",
        "description": "The episode number."
    },
    "logs": {
        "time": {
            "type": "integer",
            "description": "The instant within the landing attempt where the current log is taken."
        },
        "action": {
            "type": "integer",
            "description": "The possible actions taken.",
            "options": {
                '0' : "Switch off engines",
                '1' : "Push left engine",
                '2' : "Push both engines (upwards)",
                '3' : "Push right engine"
            }
        },  
        "current status": {
            "type": "array",
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": {
                    "name": "X position",
                    "description": "The horizontal position of the spacecraft relative to the landing zone."
                },
                "1": {
                    "name": "Y position",
                    "description": "The vertical position of the spacecraft relative to the landing zone."
                },
                "2": {
                    "name": "X velocity",
                    "description": "The horizontal velocity of the spacecraft."
                },
                "3": {
                    "name": "Y velocity",
                    "description": "The vertical velocity of the spacecraft."
                },
                "4": {
                    "name": "Angle",
                    "description": "The angle of the spacecraft relative to the vertical (left negative, right positive)."
                },
                "5": {
                    "name": "Angular velocity",
                    "description": "The rate of change of the angle of the spacecraft."
                },
                "6": {
                    "name": "Left contact sensor with landing zone",
                    "description": "Indicates whether the left side of the spacecraft is in contact with the landing zone."
                },
                "7": {
                    "name": "Right contact sensor with landing zone",
                    "description": "Indicates whether the right side of the spacecraft is in contact with the landing zone."
                }
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1],
            
        }, 
        "score": {
            "type": "number",
            "description": "The score received for the action."
        },  
        "completed": {
            "type": "boolean",
            "description": "Whether the landing event has ended (landing or accident)."
        }
    },
    "total score": {
        "type": "number",
        "description": "The total score received for the landing attempt."
    }
}

#### Esquema JSON MODE

In [21]:
json_shcema_response = {
    "reasoning": {
        "type": "string",
        "description": "Pre-coding deep reasoning on log analysis and the influence of actions on observations and steps to follow to achieve a successful landing. It is an expnasion but not the code itself."
    },
    "code": {
        "type": "string",
        "description": "The Python code to store. Use the reasoning parameter to generate this code."
    },
    "filename": {
        "type": "string",
        "description": "The filename to store the code in (Action.py)."
    }
}

#### LOGS

In [22]:
import json

def format_step_data(instant, action, next_state, reward, terminated):
    """ Format the step data into a JSON string
    
    Args:
        instant: int: the instant within the landing attempt where the current log is taken
        action: int: the action taken
        next_state: np.array: the next state of the environment
        reward: float: the reward received
        terminated: bool: whether the landing event has ended
        
    Returns:
        str: the step data formatted as a JSON string
    """
    # Convertir el array numpy a una lista
    next_state_list = next_state.tolist()

    # Redondear los elementos de la lista a 4 decimales
    next_state_list_rounded = [round(x, 3) for x in next_state_list]

    step_data = {
        'time': instant,
        'action' : int(action),
        'current status': next_state_list_rounded,  # Convert numpy array to list
        'score': round(reward, 3),
        #'completed': terminated,
        #'truncated': truncated
        #'info': info
    }

    # Convert the dictionary to a JSON string
    #step_data_json = json.dumps(step_data)

    return step_data


def format_episode_logs(logs, episode, total_score):
    """ Format the logs into a JSON string
    
    Args:
        logs: list: the logs for each step of the environment
        episode: int: the episode number
        
    Returns:
        str: the logs formatted as a JSON string
    """
    logs_data = {
        'landing attempt': episode,
        'logs': logs,
        'total score': round(total_score, 3)
    }

    # Convert the dictionary to a JSON string
    logs_json = json.dumps(logs_data)

    return logs_json

#### Commit changes.

In [23]:
def commit_changes(repo_path, commit_message):
    """ Commit changes to the repository.

    Args:
        repo_path (str): Path to the repository.
        commit_message (str): The commit message.
    """
    repo = Repo(repo_path)
    repo.git.add(update=True)
    repo.index.commit(commit_message)
    origin = repo.remote(name='origin')
    origin.push()

#### Código encargado de ejecutar los eventos en el entorno.

In [24]:
import Action

def lunar_lander(max_t=1000, n_games=1, display=False, seed=38, agent=None, recoder=False, video_filename='video/video.mp4'):
    """ Run the Lunar Lander environment
    
    Args:
        max_t: int: the maximum number of timesteps
        n_games: int: the number of games to play
        display: bool: whether to display the environment
        seed: int: the seed for the environment
        agent: object: the agent to use
        recoder: bool: whether to record the video
        video_filename: str: the filename for the video
        
    Returns:
        str: the logs for the landing attempts
    """
    
    # Incluimos la opción del entorno gráfico y la de grabación.
    if display:
        env = gym.make(ENV_NAME, render_mode='human')
        if recoder:
            env = gym.make(ENV_NAME, render_mode='rgb_array')
            env.reset() 
            video_recorder = record_video.RecordVideo(env, video_filename)
    else:
        env = gym.make(ENV_NAME)

    # Bucle principal de ejecución de los episodios.
    logs = []
    for episode in range(1, n_games+1):
        # Revisamos que la semilla sea un número entero o una lista de semillas.
        if isinstance(seed, list):
            semilla = seed[episode-1]
        else:
            semilla = seed
        print(f"Semilla {semilla}")    
        state = env.reset(seed=semilla) # Set a seed for the environment
        state = state[0] # Eliminamos el diccionario vacio y dejamos unicamente el estado de 8 elementos.
        score = 0
        instant = 0
        episode_actions = []
        if recoder: # En caso de que se quiera grabar el video, se inicia el grabador.
            video_recorder.start_video_recorder()
            
        for i in range(max_t): # Bucle de ejecución de los instantes de tiempo.
            
            # Seleccionamos el método de elegir la acción (agente exitoso o método de decisión del asistente)
            if agent: 
                action = agent.act(state)
            else:
                action = Action.act(state)
            
            # Avanzamos un instante de tiempo en el entorno en función de si se quiere grabar el video o no.
            if recoder:
                next_state, reward, terminated, truncated, info = video_recorder.step(action)
            else:
                next_state, reward, terminated, truncated, info = env.step(action)
            
            # Formateamos los datos del instante de tiempo y los alcenamos.
            json_logs = format_step_data(instant, action, next_state, reward, terminated)
            score += reward
            instant += 1
            if len(episode_actions) < 20 or len(episode_actions) % 2 == 0 or terminated:
                logs.append(json_logs)
            #logs.append(json_logs)
            episode_actions.append(action)
            state = next_state
            if terminated: # Condición de salida del bucle, si el episodio ha terminado.
                break
        json_episode_logs = format_episode_logs(logs, episode, score) 
        
        print(f"Número de instantes: {instant+1}. Tamaño de logs: {len(logs)}")
        print('episode ', episode, 'score %.3f' % float(score), 'avg score %.3f' % (float(score) / instant))
        
    if recoder:
        video_recorder.close()
    else:
        env.close()
        
    return json_episode_logs

#### Logs del código inicial

In [25]:
# Genera una lista de n números enteros aleatorios entre 0 y 1000.
seeds = [random.randint(0, 1000) for i in range(4)]
initial_logs = lunar_lander(n_games=1, seed=38, display=True)
initial_logs

Semilla 38
Número de instantes: 103. Tamaño de logs: 62
episode  1 score -140.751 avg score -1.380


'{"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0.085, 0.0, 0.0], "score": 0.121}, {"time": 2, "action": 3, "current status": [0.015, 1.421, 0.391, 0.077, -0.019, -0.127, 0.0, 0.0], "score": -1.253}, {"time": 3, "action": 0, "current status": [0.019, 1.423, 0.391, 0.051, -0.026, -0.127, 0.0, 0.0], "score": -0.326}, {"time": 4, "action": 0, "current status": [0.023, 1.423, 0.391, 0.024, -0.032, -0.127, 0.0, 0.0], "score": -0.445}, {"time": 5, "action": 3, "current status": [0.027, 1.423, 0.402, -0.003, -0.04, -0.171, 0.0, 0.0], "score": -1.894}, {"time": 6, "action": 1, "current status": [0.031, 1.422, 0.391, -0.029, -0.047, -0.127, 0.0, 0.0], "score": 0.378}, {"time": 7, "action": 0, "current status": [0.034, 1.421, 0.391, -0.056, -0.053, -0.127, 0.0, 0.0], "score": -0.808}, {"time": 8, "action": 3, "current s

#### Logs exitosos
Utilizar otras semillas para que no memorice el entorno

In [26]:
import DuelingDQN

agent = DuelingDQN.Agent(num_observaciones=8, num_acciones=4, red_modelo=DuelingDQN.DuelingQNetwork, seed=0)
# Cargamos los pesos del agente entrenado.
agent.load_weights('checkpoint_Dueling.pth')

seeds = [130, 412]

success_logs = lunar_lander(n_games=len(seeds), display=True, seed=seeds, agent=agent)
success_logs

Semilla 130
Número de instantes: 242. Tamaño de logs: 131
episode  1 score 263.217 avg score 1.092
Semilla 412
Número de instantes: 242. Tamaño de logs: 262
episode  2 score 266.713 avg score 1.107


'{"landing attempt": 2, "logs": [{"time": 0, "action": 1, "current status": [0.006, 1.393, 0.31, -0.402, -0.005, -0.029, 0.0, 0.0], "score": -0.567}, {"time": 1, "action": 1, "current status": [0.009, 1.384, 0.301, -0.429, -0.005, 0.005, 0.0, 0.0], "score": -0.672}, {"time": 2, "action": 1, "current status": [0.012, 1.373, 0.291, -0.456, -0.003, 0.045, 0.0, 0.0], "score": -0.468}, {"time": 3, "action": 3, "current status": [0.015, 1.363, 0.303, -0.483, -0.003, -0.001, 0.0, 0.0], "score": -1.813}, {"time": 4, "action": 1, "current status": [0.018, 1.351, 0.291, -0.509, -0.0, 0.045, 0.0, 0.0], "score": -0.367}, {"time": 5, "action": 3, "current status": [0.021, 1.339, 0.299, -0.536, 0.0, 0.015, 0.0, 0.0], "score": -1.553}, {"time": 6, "action": 3, "current status": [0.024, 1.326, 0.308, -0.563, -0.001, -0.022, 0.0, 0.0], "score": -1.579}, {"time": 7, "action": 1, "current status": [0.027, 1.313, 0.3, -0.59, -0.0, 0.009, 0.0, 0.0], "score": -0.688}, {"time": 8, "action": 2, "current statu

#### Bucle iterativo

Sección principal del código encargada de conectar con el asistente de la API de OpenAI e iterar en la generación de código nuevo a partir de los registros del generado previamente.

In [27]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    
    # Crea un hilo
    hilo = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)

    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id, tool_choice='required')
            response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
            while response.status not in ["completed", "failed", "requires_action"]:
                logger.info(f"Status: {ejecucion.status}")
                response = agente.get_run(ejecucion.id, hilo.id)
                time.sleep(20)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  
            reasoning = code_dict["reasoning"]

            logger.info(f"\nRazonamiento:\n{reasoning}")
            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(reasoning, code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"4o Setpback. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success. Remember that the code is a reflection of your reasoning and the way you approach the problem."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


#### Logger

In [28]:
def configura_log(nombre_archivo):
    """ 
    Configura el logger para que guarde los logs en un archivo y los muestre en la consola.
    
    Args:
        nombre_archivo: str Nombre del archivo donde se guardarán los logs.
        
    Returns:
        logger: logging.Logger Objeto logger configurado.
    """
    # Crear la carpeta logs si no existe
    if not os.path.exists('logs'):
        os.makedirs('logs')
    # Configura el logger
    logging.basicConfig(filename=f'logs/{nombre_archivo}', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger()
    
    # Añade un StreamHandler para mostrar los logs en la consola
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)
    
    return logger

#### Ejecución con el asistente

Prompt inicial y mensajes del sistema para el asistente.

In [None]:
DESCRIPTION = "You are an expert programer in Pyhton. Your specialty is to generate the code responsible for making decisions about actions to be taken in various spacecraft landing environments.The objective is to land the spacecraft within a target zone in the shortest possible time and very gently. A scoring system is used to evaluate the landings, which must be maximized."
INSTRUCTIONS = f"""Your task is:
You have two modes of action depending on whether the use of function call is required. Reason deeply about what you are asked to do or code the requested task.
1. Analyze and reason about the logs received in the last landing attempts.
2. Your goal is to be able to make the correct decision based on what you have learned from the results of previous iterations. You must code the decision making based on your reasoning in a Python function.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (angle and location), keep falling under control and then land gently at the end.
    - It is mandatory to use all the elements of the array of observations received by parameter in your code when deciding what action to take at any given moment. Both position and velocities must be taken into account to know how the ship is doing and towards which states it is heading. All of these must be considered to achieve stability.
    - Learn how actions taken affect the future states of the spacecraft in the logs of past events so that you can take this into account when developing code to reach the landing zone.
    - The landing zone is in the central area of the x-axis.
    - Carefully choose the actions involving the lateral engines according to the rotation of the spacecraft.
4. This is the schema of the landing logs: {log_description}.
5. You should analyze the performance that appear in the logs of the code you have generated. You should improve the code generated in the 'act' function in the last iteration without fear of making major changes, seeking to maximize the score received and generate a higher quality code.
6. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. You have to be able to learn from it to land successfully with any other conditions. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}.Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_betterprompt.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-02 00:37:43,931 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 00:37:43,931 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 00:37:44,122 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 00:37:44,122 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 00:37:44,385 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_QoB6MCbRbYhGkgNXtosUNZOa/messages "HTTP/1.1 200 OK"
2024-07-02 00:37:44,385 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_QoB6MCbRbYhGkgNXtosUNZOa/messages "HTTP/1.1 200 OK"
2024-07-02 00:37:44,532 - INFO - Iteration: 1
2024-07-02 00:37:44,532 - INFO - Iteration: 1
2024-07-02 00:37:44,929 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_QoB6MCbRbYhGkgNXtosUNZOa/runs "HTTP/1.1 200 OK"
2024-07-02 00:37:44,929 - INFO - HTTP Request: POST htt

Tool call: [RequiredActionFunctionToolCall(id='call_aFYAf5toEx9JxbWztDqncIya', function=Function(arguments='{"code":"import numpy as np\\n\\ndef act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



2024-07-02 00:38:30,678 - ERROR - Error: Missing required arguments; Expected either ('thread_id' and 'tool_outputs') or ('thread_id', 'stream' and 'tool_outputs') arguments to be given
Traceback (most recent call last):
  File "C:\Users\adria\AppData\Local\Temp\ipykernel_19840\4102630422.py", line 69, in create_and_run_llm_loop
    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])
  File "c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\Assistant.py", line 171, in devolver_respuesta
    response = self.client.beta.threads.runs.submit_tool_outputs(run_id, thread_id=thread_id, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\adria\anaconda3\Lib\site-packages\openai\_utils\_utils.py", line 276, in wrapper
    raise TypeError(msg)
TypeError: Missing required arguments; Expected either ('thread_id' and 'tool_outputs') or ('thread_id', '

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 47
episode  1 score -612.950 avg score -8.513


TypeError: Missing required arguments; Expected either ('thread_id' and 'tool_outputs') or ('thread_id', 'stream' and 'tool_outputs') arguments to be given

#### Prueba de cambio grande en el prompt.

In [14]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (speed, angle and location), keep falling under control and then land gently at the end.
    - Use all the elements of the vector of observations, all are relevant to make the right decision.
    - Pay close attention to successful events.
    - Environment and ship conditions may change but your code must be effective for all cases.
    - Find a balance in your landing policy to maximize your score (must exceed 200 points).
    - If you are rotating to the left (negative values), you should fire the left engine and vice versa. But always prioritizing a controlled speed with the main engine.
4. This is the schema of the landing logs: {log_description}.
5. You should analyze the performance that appear in the logs of the code you have generated. You should improve the code generated in the 'act' function in the last iteration without fear of making major changes, seeking to maximize the score received and generate a higher quality code.
6. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?. Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-02 01:28:50,965 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 01:28:51,143 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 01:28:51,438 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/messages "HTTP/1.1 200 OK"
2024-07-02 01:28:51,571 - INFO - Iteration: 1
2024-07-02 01:28:51,986 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs "HTTP/1.1 200 OK"
2024-07-02 01:28:52,391 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7 "HTTP/1.1 200 OK"
2024-07-02 01:28:52,394 - INFO - Status: queued
2024-07-02 01:28:52,595 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7 "HTTP/1.1 200 OK"
2024-07-02 01:29:12,599 - INFO - Status: queued
2024-07-02 01:29:12,854

Tool call: [RequiredActionFunctionToolCall(id='call_09KGpk8yWlzAUJfv4sZ0RvbY', function=Function(arguments='{"reasoning": "### Analysis and Reasoning\\n\\n#### Observations and Actions:\\n1. **X and Y Position (`obs[0]`, `obs[1]`)**: These are the horizontal and vertical positions of the spacecraft. The goal is to bring the X position to 0 (centered on the landing pad) and the Y position to 0 (landed on the pad).\\n2. **X and Y Velocity (`obs[2]`, `obs[3]`)**: These are the velocities in the X and Y directions. We aim to reduce these to near zero for a soft landing.\\n3. **Angle (`obs[4]`)**: This is the tilt angle of the spacecraft. We aim to keep this at 0 (vertical) to avoid tipping over.\\n4. **Angular Velocity (`obs[5]`)**: This is the rate at which the angle is changing. This should also be minimized to ensure the spaceship is stable.\\n5. **Left and Right Contact Sensors (`obs[6]`, `obs[7]`)**: These indicate if the left or right side of the spacecraft is in contact with the gro

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 145. Tamaño de logs: 83
episode  1 score -23.203 avg score -0.161


2024-07-02 01:29:37,623 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:29:37,625 - INFO - Compilación exitosa.
2024-07-02 01:29:37,626 - INFO - Status: requires_action
2024-07-02 01:29:37,836 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7 "HTTP/1.1 200 OK"
2024-07-02 01:29:57,839 - INFO - Status: in_progress
2024-07-02 01:29:58,153 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7 "HTTP/1.1 200 OK"
2024-07-02 01:30:18,157 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_UinaJHyHMgLlfrraivUpESvO', function=Function(arguments='{"reasoning":"### Analysis of Performance\\n\\nBased on the latest log, we can observe the following issues and necessary adjustments:\\n\\n1. **Inaction During Flight**: For a significant portion of the logs, the spacecraft did not perform any action (action 0) despite requiring stabilization and velocity adjustments. This led to a continuous accumulation of negative scores.\\n2. **Angular Stabilization**: The initial steps correctly assessed no significant angular issues, but corrective actions continued to be minimal or late.\\n3. **Horizontal and Vertical Control**: Actions to manage horizontal and vertical velocities need to be more responsive and accountable for both current velocity and position.\\n4. **Boosters Activation**: The code needs a more effective control routine for when to use which engines to maximize stabilization and minimize unwanted tilts or velocities.\\n

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 78. Tamaño de logs: 49
episode  1 score -116.178 avg score -1.509


2024-07-02 01:31:03,144 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_k50bHdiqmVqIjvXUI2M1HDLC/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:31:03,149 - INFO - Compilación exitosa.
2024-07-02 01:31:03,149 - INFO - Status: requires_action
2024-07-02 01:31:03,354 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_k50bHdiqmVqIjvXUI2M1HDLC "HTTP/1.1 200 OK"
2024-07-02 01:31:23,358 - INFO - Status: in_progress
2024-07-02 01:31:23,609 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_k50bHdiqmVqIjvXUI2M1HDLC "HTTP/1.1 200 OK"
2024-07-02 01:31:43,611 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_Xt1NDiRvOUIFCBDveYWYRgeC', function=Function(arguments='{"reasoning":"### Analysis of Performance and Adjustments Needed\\n\\n1. **Inaction During Flight**: The spacecraft remains inactive (action 0) despite needing corrections in its state. This improper action leads to the build-up of negative scores as no stabilization or velocity adjustments are being made.\\n2. **Angular Stabilization and Velocity Correction**: Late or ineffective corrections when the spacecraft starts tilting or gaining excessive velocities (both angular and linear).\\n3. **Too Many Corrections of Single Type**: The spacecraft consistently uses action 3 without achieving the required adjustments.\\n4. **Combining Stabilization and Velocity Control**: Need to manage angular corrections in combination with linear velocity adjustments effectively.\\n\\n### Revised Strategy\\n\\n1. **Continuous Adjustment**: Ensure that adjustments are made continuously rather than 

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 86. Tamaño de logs: 53
episode  1 score -116.692 avg score -1.373


2024-07-02 01:32:29,064 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_WXkvgqehu6HQvfvKUV4zAF8Z/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:32:29,065 - INFO - Compilación exitosa.
2024-07-02 01:32:29,066 - INFO - Status: requires_action
2024-07-02 01:32:29,273 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_WXkvgqehu6HQvfvKUV4zAF8Z "HTTP/1.1 200 OK"
2024-07-02 01:32:49,276 - INFO - Status: in_progress
2024-07-02 01:32:49,583 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_WXkvgqehu6HQvfvKUV4zAF8Z "HTTP/1.1 200 OK"
2024-07-02 01:33:09,588 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_9SDGGnjEawlwGNfEa36MYxk1', function=Function(arguments='{"reasoning":"### Analysis of Recent Performance\\n\\nThe recent logs show that the spacecraft again failed to land successfully. The code didn\'t manage to correct the trajectory effectively, leading to unnecessary extended periods of inaction and incorrect sequential thrust applications. Notably, the code failed to properly balance horizontal velocity, vertical descent rate, and spacecraft stabilization.\\n\\n### Problems Identified:\\n1. **Extended Inaction**: No action was taken when corrections were evidently needed, leading to continuous increases in vertical and horizontal velocities.\\n2. **Sequential Thrusts**: Excessive repeated side thrusts were applied without balancing or correcting the overall motion effectively.\\n3. **Incorrect Prioritization**: Angular stabilization seemed to overshadow the need to manage descent rate and horizontal drift effectively.\\n\\n### Mo

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 86. Tamaño de logs: 53
episode  1 score -116.692 avg score -1.373


2024-07-02 01:33:54,509 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_e1PASc5cGFVYMq46o9beOyxA/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:33:54,514 - INFO - Compilación exitosa.
2024-07-02 01:33:54,517 - INFO - Status: requires_action
2024-07-02 01:33:54,740 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_e1PASc5cGFVYMq46o9beOyxA "HTTP/1.1 200 OK"
2024-07-02 01:34:14,742 - INFO - Status: in_progress
2024-07-02 01:34:15,012 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_e1PASc5cGFVYMq46o9beOyxA "HTTP/1.1 200 OK"
2024-07-02 01:34:35,016 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_T5Nlba0CpBh7SAAPOx0rHZCR', function=Function(arguments='{"reasoning":"### Reanalysis and Code Revision\\n\\nUpon carefully analyzing the recent logs and performance, the issue still lies in timely managing the spacecraft\'s trajectory and velocity corrections. The sequence of actions observed has shown prolonged execution of a single action type, leading to insufficient response to accumulated velocity, both horizontal and vertical. The spacecraft’s descent was not controlled properly, leading it to crash.\\n\\n### Identified issues\\n1. **Prolonged Inaction**: Multiple consecutive steps executed no action, making the spaceship quickly deviate further from the desired descent path.\\n2. **Insufficient Handling of Descent Speed**: Although vertical stabilization commands were triggered, they were neither appropriately timed nor frequent enough.\\n3. **Over-corrective Angular Adjustments**: Excessive thrust applications in one direction

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 86. Tamaño de logs: 53
episode  1 score -116.692 avg score -1.373


2024-07-02 01:35:19,698 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_nOjPMiKC59rz6dTUyDqrDpkW/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:35:19,701 - INFO - Compilación exitosa.
2024-07-02 01:35:19,702 - INFO - Status: requires_action
2024-07-02 01:35:19,905 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_nOjPMiKC59rz6dTUyDqrDpkW "HTTP/1.1 200 OK"
2024-07-02 01:35:39,910 - INFO - Status: in_progress
2024-07-02 01:35:40,156 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_nOjPMiKC59rz6dTUyDqrDpkW "HTTP/1.1 200 OK"
2024-07-02 01:36:00,161 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

user: [TextContentBlock(text=Text(annotations=[], value='These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0.085, 0.0, 0.0], "score": 0.121}, {"time": 2, "action": 0, "current status": [0.015, 1.421, 0.38, 0.077, -0.017, -0.085, 0.0, 0.0], "score": 0.013}, {"time": 3, "action": 0, "current status": [0.019, 1.423, 0.38, 0.051, -0.021, -0.085, 0.0, 0.0], "score": -0.104}, {"time": 4, "action": 0, "current status": [0.023, 1.423, 0.38, 0.024, -0.026, -0.085, 0.0, 0.0], "score": -0.226}, {"time": 5, "action": 0, "current status": [0.026, 1.423, 0.38, -0.003, -0.03, -0.085, 0.0, 0.0], "score": -0.353}, {"time": 6, "action": 0, "current status": [0.03, 1.422, 0.38, -0.029, -0.034, -0.085, 0.0, 0.0], "score": -0.481}, {"time": 7, "action": 0, "current status": [0.034, 1.42

2024-07-02 01:36:01,593 - INFO - HTTP Request: DELETE https://api.openai.com/v1/assistants/asst_oz37cI4Jj4423yR963RqZPpk "HTTP/1.1 200 OK"
2024-07-02 01:36:01,926 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v "HTTP/1.1 200 OK"
2024-07-02 01:36:01,927 - INFO - 
Ejecución finalizada.




Asistente vaciado.
Run(id='run_nOjPMiKC59rz6dTUyDqrDpkW', assistant_id='asst_oz37cI4Jj4423yR963RqZPpk', cancelled_at=None, completed_at=1719876926, created_at=1719876875, expires_at=None, failed_at=None, incomplete_details=None, instructions="To complete the task you must follow the following steps and indications:\n1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.\n2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:\n    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).\n    - Increased/decreased the slower/faster the spacecraft is moving.\n    - Decreased the more the spacecraft is tilted.\n    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.\n    - Receive 100 points 

In [14]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?. Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-02 20:13:38,487 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 20:13:38,682 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 20:13:38,976 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/messages "HTTP/1.1 200 OK"
2024-07-02 20:13:39,134 - INFO - Iteration: 1
2024-07-02 20:13:39,768 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs "HTTP/1.1 200 OK"
2024-07-02 20:13:40,001 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Df58B0YObxg72VbXW3CpIngt "HTTP/1.1 200 OK"
2024-07-02 20:13:40,003 - INFO - Status: queued
2024-07-02 20:13:40,209 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Df58B0YObxg72VbXW3CpIngt "HTTP/1.1 200 OK"
2024-07-02 20:14:00,212 - INFO - Status: queued
2024-07-02 20:14:00,474

Tool call: [RequiredActionFunctionToolCall(id='call_OImymi36vVR9f8le1Dvbn3N8', function=Function(arguments='{"reasoning":"Analyzing the successful and unsuccessful landing attempts, the spacecraft\'s behavior can be broken down into several key observations and corresponding actions:\\n\\n1. **X Position and Velocity**: Ideally, keep the spacecraft near the center (X position ≈ 0) with minimal horizontal velocity (X velocity ≈ 0). Pushing the left engine (Action 1) moves the spacecraft right, and pushing the right engine (Action 3) moves it left. Pushing both engines (Action 2) slightly affects the horizontal velocity but mainly affects vertical dynamics.\\n2. **Y Position and Velocity**: The goal is to reduce the Y velocity (downward speed) to zero as the spacecraft approaches the ground. Pushing both engines (Action 2) reduces the Y velocity (slows descent), while inaction (Action 0) allows gravity to increase the descent speed.\\n3. **Angle and Angular Velocity**: Keep the spacecraf

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 77. Tamaño de logs: 49
episode  1 score -141.601 avg score -1.863


2024-07-02 20:14:45,419 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Df58B0YObxg72VbXW3CpIngt/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 20:14:45,421 - INFO - Compilación exitosa.
2024-07-02 20:14:45,421 - INFO - Status: requires_action
2024-07-02 20:14:45,650 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Df58B0YObxg72VbXW3CpIngt "HTTP/1.1 200 OK"
2024-07-02 20:15:05,652 - INFO - Status: in_progress
2024-07-02 20:15:05,951 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Df58B0YObxg72VbXW3CpIngt "HTTP/1.1 200 OK"
2024-07-02 20:15:25,954 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_IuQmHgh7jeZem8GlDbZdLGRx', function=Function(arguments='{"reasoning":"Analyzing the logs from the recent attempt, it\'s clear that the spacecraft faced several key issues:\\n\\n1. **Excessive Use of Side Engines (Action 1 and Action 3)**: The spacecraft increased its horizontal velocity and did not stabilize its angle, which led to the angle drifting significantly from 0 over time.\\n2. **Angle and Angular Velocity Control**: The spacecraft\'s angle increased continuously, leading to stability issues. Even though the code attempted to adjust based on the angle, it resulted in continuous use of the side engines, which exacerbated the problem.\\n3. **Insufficient Control of Descent Rate**: The spacecraft did not appropriately manage its vertical velocity. An effective descent strategy would require prioritizing control of Y velocity before worrying excessively about horizontal corrections.\\n4. **Score Loss Due to Engine Usage**: Excess

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 99. Tamaño de logs: 60
episode  1 score -109.814 avg score -1.121


2024-07-02 20:16:51,570 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_ziFNMzggavQwam4pp36ngD0H/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 20:16:51,572 - INFO - Compilación exitosa.
2024-07-02 20:16:51,572 - INFO - Status: requires_action
2024-07-02 20:16:51,817 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_ziFNMzggavQwam4pp36ngD0H "HTTP/1.1 200 OK"
2024-07-02 20:17:11,819 - INFO - Status: in_progress
2024-07-02 20:17:12,068 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_ziFNMzggavQwam4pp36ngD0H "HTTP/1.1 200 OK"
2024-07-02 20:17:32,071 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_hpZ7X0VgEGQI9e5lcz98Ldxi', function=Function(arguments='{"reasoning":"The latest logs indicate several persistent issues and some new insights:\\n1. **Angle and Angular Velocity Control**: The angle control improved initially, but after a few steps, the angle started diverging again. This implies that the simple threshold-based approach is insufficient. The spacecraft\'s tilt and changing angular velocity need more dynamic adjustments.\\n2. **Vertical Descent Rate**: The vertical descent was relatively stable, but the score impact due to continued usage of engines was still significant. Hence, a more refined strategy is required that uses the center engine sparingly.\\n3. **Excessive Engine Use**: The alternate firing between left and right engines caused continued oscillation, which degraded performance. The back-and-forth corrections must be minimized, and a more stable state maintained.\\n4. **Complex Maneuver Handling**: In the sc

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 99. Tamaño de logs: 60
episode  1 score -109.814 avg score -1.121


2024-07-02 20:18:58,745 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_ir3lt7EzkHRznOaTSWXQuDKy/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 20:18:58,748 - INFO - Compilación exitosa.
2024-07-02 20:18:58,749 - INFO - Status: requires_action
2024-07-02 20:18:59,058 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_ir3lt7EzkHRznOaTSWXQuDKy "HTTP/1.1 200 OK"
2024-07-02 20:19:19,060 - INFO - Status: in_progress
2024-07-02 20:19:19,351 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_ir3lt7EzkHRznOaTSWXQuDKy "HTTP/1.1 200 OK"
2024-07-02 20:19:39,353 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_5qe8TtFnaQJcNSZMM9eOMOyn', function=Function(arguments='{"reasoning":"From reviewing the recent logs, here are the persistent issues and insights:\\n\\n1. **Excessive Engine Use (Horizontal Control)**: Even with the angle control, continuous use of left and right engines persists, leading to oscillations and increased horizontal speed.\\n2. **Angle and Angular Velocity Control**: While combined, the strategy still fails at dynamically responding to changing velocities quickly enough, often leading to opposite engine fires which compound the problem.\\n3. **Vertical Descent and Engine Use**: Over-correcting with the center engine (Action 2) was noted, thus repeated vertical descents remained unstable particularly near critical phases.\\n4. **Contact and Land Efficiency**: Improvements still required for contact sensor utilization to reflect touchdown dynamics effectively.\\n\\n### Updated Strategy for Successful Landing:\\n1. **Primary

2024-07-02 20:21:04,367 - ERROR - Error: invalid syntax (Action.py, line 50)
Traceback (most recent call last):
  File "C:\Users\adria\AppData\Local\Temp\ipykernel_22056\3249554590.py", line 64, in create_and_run_llm_loop
    importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.
    ^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\adria\anaconda3\Lib\importlib\__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 621, in _exec
  File "<frozen importlib._bootstrap_external>", line 936, in exec_module
  File "<frozen importlib._bootstrap_external>", line 1074, in get_code
  File "<frozen importlib._bootstrap_external>", line 1004, in source_to_code
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\Action.py", line 50
    elif Y_vel < descent_speed_limit>:
                             

Tool call: [RequiredActionFunctionToolCall(id='call_dpxYJo5tg4btyRlZi3WUFi1R', function=Function(arguments='{"reasoning":"The syntax error from the previous attempt has been corrected. The focus remains on angle and angular velocity stabilization, vertical descent speed management, and horizontal position corrections. The adjustments aim to minimize engine use and ensure controlled and efficient landing.","code":"import math\\n\\ndef act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 77. Tamaño de logs: 49
episode  1 score -139.587 avg score -1.837


2024-07-02 20:31:25,926 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Mbk3Bg1ezOMUtB3M2yjxtIIC/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 20:31:25,927 - INFO - Compilación exitosa.
2024-07-02 20:31:25,928 - INFO - Status: requires_action
2024-07-02 20:31:26,133 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Mbk3Bg1ezOMUtB3M2yjxtIIC "HTTP/1.1 200 OK"
2024-07-02 20:31:46,135 - INFO - Status: in_progress
2024-07-02 20:31:46,390 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Mbk3Bg1ezOMUtB3M2yjxtIIC "HTTP/1.1 200 OK"
2024-07-02 20:32:06,393 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_I9MbO74c2ydNkxlHZTE5XqWU', function=Function(arguments='{"reasoning":"The logs reveal persistent challenges: \\n1. **Excessive Engine Usage**: Continuous use of left and right engines leads to increased horizontal speed. \\n2. **Angle and Angular Velocity Control**: Stabilizing these parameters hasn\'t been sufficiently dynamic. \\n3. **Vertical Stability**: Over-correction with the center engine is notable, impacting descent stability. \\n4. **Inefficiency Near Touchdown**: Contact sensor engagement shows instability, indicating poor fine control near the ground.\\n\\n### Updated Strategy for Successful Landing:\\n1. **Primary Focus**: Prioritize dynamic angle and angular velocity stabilization. \\n2. **Secondary Focus**: Ensure vertical stability using center engines judiciously.\\n3. **Tertiary Focus**: Horizontal position corrections based on stable angle maintenance.\\n4. **Efficient Engine Use**: Minimize firing duration to cons

KeyError: 'code'

#### Prompt antiguo exitoso.

In [15]:
DESCRIPTION = "You are an expert programer in Pyhton. Your specialty is to generate the code responsible for making decisions about actions to be taken in various spacecraft landing environments.The objective is to land the spacecraft within a target zone in the shortest possible time and very gently. A scoring system is used to evaluate the landings, which must be maximized."
INSTRUCTIONS = f"""Your task is: 
    1. Analyze and reason about the results received in the last landing attempts. This is the schema of the landing logs: {log_description}.
    2. Your goal is to be able to make the appropriate decision based on the results of previous iterations. You must code the decision making based on your reasoning in a Python function.
    3. Use these tips to get a good landing:
        - First you have to stabilize the falling ship and then land gently.
        - Use all observations when deciding what action to take at any given moment. Both position and velocities must be taken into account to know how the ship is doing and towards which states it is heading. All of these must be considered to achieve stability.
        - Landing zone is in the central area of the x-axis.
        - Learn how actions taken affect the future states of the spacecraft in the logs of past events so that you can take this into account when developing code to reach the landing zone.
    4. Based on the logs you should improve the code generated in the 'act' function in the last iteration, seeking to maximize the score received and generate a higher quality code.
    5. Save the code of the act function in the file 'Action.py' using the tools given.
    6. Improve your results and correct any errors you may have generated in your last code if they exist."""

NAME = "Spacecraft Landing Master"

initial_msg = f"Take a deep breath and think step by step. This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Analyze the results and improve the code."
logger = configura_log('Spacecraft_4o_exitosoAntiguo.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-03 17:34:11,146 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-03 17:34:11,342 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-03 17:34:11,635 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/messages "HTTP/1.1 200 OK"
2024-07-03 17:34:11,779 - INFO - Iteration: 1
2024-07-03 17:34:12,239 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs "HTTP/1.1 200 OK"
2024-07-03 17:34:12,469 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_eS9Z979IPNxLpwlCs5S2JEtJ "HTTP/1.1 200 OK"
2024-07-03 17:34:12,477 - INFO - Status: queued
2024-07-03 17:34:12,708 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_eS9Z979IPNxLpwlCs5S2JEtJ "HTTP/1.1 200 OK"
2024-07-03 17:34:32,711 - INFO - Status: queued
2024-07-03 17:34:33,007

Tool call: [RequiredActionFunctionToolCall(id='call_6Kl599fRlbIZSXnqisbrKZ2R', function=Function(arguments='{"reasoning":"After analyzing the provided logs for both a successful and unsuccessful landing, the following strategies can be derived:\\n1. Stabilize the spacecraft\'s vertical speed and angle before attempting to land.\\n2. Keep the X position close to the center.\\n3. Use actions that counteract undesirable velocities and angles.\\n4. Ensure the craft\'s vertical descent is minimized, especially as it gets closer to ground.\\n\\nUsing these strategies, a proportional control approach can be implemented, which continually adjusts the actions based on the observed state, targeting zero horizontal and vertical speeds, and a stable angle. Minor adjustments should be made until the vessel is stably landed.\\n\\nHere is an improved version of the \'act\' function that uses proportional control logic to decide on the appropriate action based on real-time observations:\\n","code":"de

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 75. Tamaño de logs: 48
episode  1 score -634.961 avg score -8.581


2024-07-03 17:34:56,773 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_eS9Z979IPNxLpwlCs5S2JEtJ/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 17:34:56,775 - INFO - Compilación exitosa.
2024-07-03 17:34:56,775 - INFO - Status: requires_action
2024-07-03 17:34:56,995 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_eS9Z979IPNxLpwlCs5S2JEtJ "HTTP/1.1 200 OK"
2024-07-03 17:35:16,997 - INFO - Status: in_progress
2024-07-03 17:35:17,268 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_eS9Z979IPNxLpwlCs5S2JEtJ "HTTP/1.1 200 OK"
2024-07-03 17:35:37,277 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_OwfUk3ugbkYARczRJ5eCjKg9', function=Function(arguments='{"reasoning":"After analyzing the logs from the improved function, it is clear that the descent was not properly controlled. The spacecraft continuously accelerated downward and the angle got worse over time, ultimately leading to a crash.\\n\\nHere are the critical observations:\\n1. The angular velocity and angle control were not prioritized correctly; the craft became increasingly tilted, affecting the descent.\\n2. The horizontal velocity control measures were ineffective given the negative impact on vertical descent control.\\n3. The current logic fires the left engine repeatedly, which did not address the descent in an effective manner.\\n\\nRevised Strategy:\\n- Prioritize reducing the downward vertical velocity before making finer adjustments to horizontal velocity and angle.\\n- Introduce thresholds where actions are changed to stabilize not just based on velocity but al

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 96. Tamaño de logs: 58
episode  1 score -559.795 avg score -5.893


2024-07-03 17:36:22,599 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_ixH1C2dlHMahlL7rGD2o5PDF/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 17:36:22,601 - INFO - Compilación exitosa.
2024-07-03 17:36:22,602 - INFO - Status: requires_action
2024-07-03 17:36:22,818 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_ixH1C2dlHMahlL7rGD2o5PDF "HTTP/1.1 200 OK"
2024-07-03 17:36:42,821 - INFO - Status: in_progress
2024-07-03 17:36:43,085 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_ixH1C2dlHMahlL7rGD2o5PDF "HTTP/1.1 200 OK"
2024-07-03 17:37:03,090 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_Bipx9QUnNhS5fVMb83nb8eR1', function=Function(arguments='{"reasoning":"From the analysis of the latest logs, it is clear that the prioritization is still off and the control logic for vertical and horizontal motion as well as angular control needs rethinking. The spacecraft becomes increasingly out of control as the descent progresses, leading to significant acceleration and tilting.\\n\\nKey Observations:\\n1. Actions are not effectively counteracting the horizontal and vertical movements nor the angular velocity as they should.\\n2. The spacecraft\'s angle continues to worsen without a proper control mechanism in place.\\n\\nImproved Strategy:\\n- Implement a proportional control system that adjusts the actions depending on how far the state is from the desired values (velocity close to zero, angle close to zero).\\n- Separate the concerns of controlling horizontal motion, vertical motion, and angular deviation, ensuring finer contro

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 94. Tamaño de logs: 57
episode  1 score -557.953 avg score -5.999


2024-07-03 17:37:49,118 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_2vq3dkmwcrfIPlOSECXHFnSB/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 17:37:49,121 - INFO - Compilación exitosa.
2024-07-03 17:37:49,122 - INFO - Status: requires_action
2024-07-03 17:37:49,327 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_2vq3dkmwcrfIPlOSECXHFnSB "HTTP/1.1 200 OK"
2024-07-03 17:38:09,331 - INFO - Status: in_progress
2024-07-03 17:38:09,601 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_2vq3dkmwcrfIPlOSECXHFnSB "HTTP/1.1 200 OK"
2024-07-03 17:38:29,605 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_JhFF75RhrfedwDl9TqRzlIOK', function=Function(arguments='{"reasoning":"From the logs of the last attempt, it is apparent that the spacecraft remained in a free fall state for a significant amount of time. The primary issue is that decisions to change actions weren\'t being made promptly. There was not enough emphasis on changing the course of action when required, leading to ineffective controls.\\n\\n1. **Vertical Descent Control**: The log shows that the downward velocity wasn\'t properly managed initially, leading to increasing speeds and worsening conditions over time. The vertical thrust should have been initiated much earlier to manage the descent speed effectively.\\n2. **Angle and Angular Velocity Control**: The spacecraft\'s angle and angular velocity controls were not adequately addressed. They need to be stable to maintain the correct posture for landing.\\n3. **Horizontal Position and Velocity Control**: The control for hor

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 94. Tamaño de logs: 57
episode  1 score -557.953 avg score -5.999


2024-07-03 17:39:15,196 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_gH9X9AudHQfsei6zG8mipoXw/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 17:39:15,199 - INFO - Compilación exitosa.
2024-07-03 17:39:15,199 - INFO - Status: requires_action
2024-07-03 17:39:15,415 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_gH9X9AudHQfsei6zG8mipoXw "HTTP/1.1 200 OK"
2024-07-03 17:39:35,417 - INFO - Status: in_progress
2024-07-03 17:39:35,675 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_gH9X9AudHQfsei6zG8mipoXw "HTTP/1.1 200 OK"
2024-07-03 17:39:55,678 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_wtlZ1POzpJdZV1apZWWgsi9v', function=Function(arguments='{"reasoning":"After analyzing the provided logs from the recent landing attempt, here are key observations to understand why the spacecraft performed poorly:\\n\\n1. **Vertical Descent Control**: The spacecraft was in free fall for a significant amount of time (e.g., action \'0\' repeated). No intervention was made to control the downward velocity early enough.\\n2. **Angle Control**: The angle was not actively controlled; it allowed for continuous deviation, further complicating descent stability.\\n3. **Action Delays**: The decisions to change actions need to be more responsive to environmental changes. The same actions were repeated without resulting in desired outcomes, leading to continuous poor scoring.\\n\\nImproved Strategy:\\n- Implement immediate and continuous checks and actions for vertical velocity right from the start to manage descent effectively.\\n- Actively cont

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 94. Tamaño de logs: 57
episode  1 score -557.953 avg score -5.999


2024-07-03 17:40:40,964 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_80UymoumzihMvIMeNc56gOs5/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 17:40:40,966 - INFO - Compilación exitosa.
2024-07-03 17:40:40,967 - INFO - Status: requires_action
2024-07-03 17:40:41,188 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_80UymoumzihMvIMeNc56gOs5 "HTTP/1.1 200 OK"
2024-07-03 17:41:01,192 - INFO - Status: in_progress
2024-07-03 17:41:01,454 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_80UymoumzihMvIMeNc56gOs5 "HTTP/1.1 200 OK"
2024-07-03 17:41:21,457 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

user: [TextContentBlock(text=Text(annotations=[], value='These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0.085, 0.0, 0.0], "score": 0.121}, {"time": 2, "action": 0, "current status": [0.015, 1.421, 0.38, 0.077, -0.017, -0.085, 0.0, 0.0], "score": 0.013}, {"time": 3, "action": 0, "current status": [0.019, 1.423, 0.38, 0.051, -0.021, -0.085, 0.0, 0.0], "score": -0.104}, {"time": 4, "action": 0, "current status": [0.023, 1.423, 0.38, 0.024, -0.026, -0.085, 0.0, 0.0], "score": -0.226}, {"time": 5, "action": 0, "current status": [0.026, 1.423, 0.38, -0.003, -0.03, -0.085, 0.0, 0.0], "score": -0.353}, {"time": 6, "action": 0, "current status": [0.03, 1.422, 0.38, -0.029, -0.034, -0.085, 0.0, 0.0], "score": -0.481}, {"time": 7, "action": 0, "current status": [0.034, 1.42

2024-07-03 17:41:22,476 - INFO - HTTP Request: DELETE https://api.openai.com/v1/assistants/asst_PkpU83LGyU1PPCgGtjUcUqx8 "HTTP/1.1 200 OK"
2024-07-03 17:41:22,692 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv "HTTP/1.1 200 OK"
2024-07-03 17:41:22,693 - INFO - 
Ejecución finalizada.




Asistente vaciado.
Run(id='run_80UymoumzihMvIMeNc56gOs5', assistant_id='asst_PkpU83LGyU1PPCgGtjUcUqx8', cancelled_at=None, completed_at=1720021245, created_at=1720021195, expires_at=None, failed_at=None, incomplete_details=None, instructions="Your task is: \n    1. Analyze and reason about the results received in the last landing attempts. This is the schema of the landing logs: {'description': 'Log data for each step of the spacecraft landing environment.', 'landing attempt': {'type': 'integer', 'description': 'The episode number.'}, 'logs': {'time': {'type': 'integer', 'description': 'The instant within the landing attempt where the current log is taken.'}, 'action': {'type': 'integer', 'description': 'The possible actions taken.', 'options': {'0': 'Switch off engines', '1': 'Push left engine', '2': 'Push both engines (upwards)', '3': 'Push right engine'}}, 'current status': {'type': 'array', 'description': 'The state of the environment after the action is taken.', 'positions': {'0':

#### Prompt zero shot.

In [15]:
DESCRIPTION = "You are an expert programer in Pyhton. Your specialty is to generate the code responsible for making decisions about actions to be taken in various spacecraft landing environments.The objective is to land the spacecraft within a target zone in the shortest possible time and very gently. A scoring system is used to evaluate the landings, which must be maximized."
INSTRUCTIONS = f"""Your task is: 
    1. Analyze and reason about the results received in the last landing attempts. This is the schema of the landing logs: {log_description}.
    2. Your goal is to be able to make the appropriate decision based on the results of previous iterations. You must code the decision making based on your reasoning in a Python function.
    3. Use these tips to get a good landing:
        - First you have to stabilize the falling ship and then land gently.
        - Use all observations when deciding what action to take at any given moment. Both position and velocities must be taken into account to know how the ship is doing and towards which states it is heading. All of these must be considered to achieve stability.
        - Landing zone is in the central area of the x-axis.
        - Learn how actions taken affect the future states of the spacecraft in the logs of past events so that you can take this into account when developing code to reach the landing zone.
    4. Based on the logs you should improve the code generated in the 'act' function in the last iteration, seeking to maximize the score received and generate a higher quality code.
    5. Save the code of the act function in the file 'Action.py' using the tools given.
    6. Improve your results and correct any errors you may have generated in your last code if they exist."""

NAME = "Spacecraft Landing Master"

initial_msg = f"Take a deep breath and think step by step. Analyze the results and improve the code."
logger = configura_log('Spacecraft_4o_ZeroShot.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=10)

print(response)

2024-07-04 14:21:13,448 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-04 14:21:13,637 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-04 14:21:13,841 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/messages "HTTP/1.1 200 OK"
2024-07-04 14:21:13,850 - INFO - Iteration: 1
2024-07-04 14:21:14,508 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs "HTTP/1.1 200 OK"
2024-07-04 14:21:14,757 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_lLx8RkJh51IikTQ9omjijrYk "HTTP/1.1 200 OK"
2024-07-04 14:21:14,759 - INFO - Status: queued
2024-07-04 14:21:15,003 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_lLx8RkJh51IikTQ9omjijrYk "HTTP/1.1 200 OK"
2024-07-04 14:21:35,006 - INFO - Status: queued
2024-07-04 14:21:35,261

Tool call: [RequiredActionFunctionToolCall(id='call_xwA7YjxuDntwpFeRPOidQ8CV', function=Function(arguments='{"reasoning": "After examining the schema of landing logs and considering the guidelines for a good landing, it was essential to stabilize the spacecraft first, given its position, velocity, and angle. Following the analysis of how actions influence future states in the logs, the primary goal will be to stabilize the fall within the central x-axis. This step-by-step stabilization involves adjusting the inclination and velocity while approaching the landing zone. Here\'s the enhanced code for the \'act\' function based on results and analysis.", "code": "import numpy as np\\n\\nclass SpacecraftLandingAgent:\\n    def __init__(self):\\n        pass\\n\\n    def act(self, state):\\n        x_pos, y_pos, x_vel, y_vel, angle, angular_vel, left_contact, right_contact = state\\n\\n        # Stabilize angular position first if it\'s too large\\n        if abs(angle) > 0.1:\\n            

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 100. Tamaño de logs: 60
episode  1 score -207.662 avg score -2.098


2024-07-04 14:21:59,065 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_lLx8RkJh51IikTQ9omjijrYk/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 14:21:59,068 - INFO - Compilación exitosa.
2024-07-04 14:21:59,068 - INFO - Status: requires_action
2024-07-04 14:21:59,271 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_lLx8RkJh51IikTQ9omjijrYk "HTTP/1.1 200 OK"
2024-07-04 14:22:19,273 - INFO - Status: in_progress
2024-07-04 14:22:19,517 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_lLx8RkJh51IikTQ9omjijrYk "HTTP/1.1 200 OK"
2024-07-04 14:22:39,523 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 3, "current status": [0.008, 1.417, 0.39, 0.131, -0.01, -0.123, 0.0, 0.0], "score": -0.871}, {"time": 1, "action": 3, "current status": [0.012, 1.42, 0.401, 0.104, -0.019, -

Tool call: [RequiredActionFunctionToolCall(id='call_c9RmrcdPAt7hkMynZoj6rC7W', function=Function(arguments='{"reasoning":"The previous logs indicate that the spacecraft failed to achieve stability and had a high final score penalty. The primary issues were excessive horizontal and vertical velocities, as well as poor angular stability. The new approach to address these involves setting priorities correctly: stabilizing the angle, then minimizing horizontal velocity, and finally, ensuring a gentle vertical descent towards the landing zone. A more systematic and effective strategy is coded below, focusing on limiting excessive changes in states and improving the gradual reduction of unwanted velocities while considering the proximity to the landing zone and sensor contacts for finalizing the landing.","code":"import numpy as np\\n\\nclass SpacecraftLandingAgent:\\n    def __init__(self):\\n        pass\\n\\n    def act(self, state):\\n        x_pos, y_pos, x_vel, y_vel, angle, angular_ve

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 97. Tamaño de logs: 59
episode  1 score -344.970 avg score -3.593


2024-07-04 14:23:24,526 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_kmjqDllKniD4I5cCtO1WpjY7/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 14:23:24,529 - INFO - Compilación exitosa.
2024-07-04 14:23:24,530 - INFO - Status: requires_action
2024-07-04 14:23:24,757 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_kmjqDllKniD4I5cCtO1WpjY7 "HTTP/1.1 200 OK"
2024-07-04 14:23:44,761 - INFO - Status: in_progress
2024-07-04 14:23:45,072 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_kmjqDllKniD4I5cCtO1WpjY7 "HTTP/1.1 200 OK"
2024-07-04 14:24:05,076 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 3, "current status": [0.011, 1.422, 0.389, 0.145, -0.016,

Tool call: [RequiredActionFunctionToolCall(id='call_LDROKJy8fxFPdnpZyCzoJ7Pe', function=Function(arguments='{"reasoning":"The recent logs indicate a continued failure in achieving stability, particularly with horizontal and vertical velocities increasing beyond safe limits. Reflecting deeply, the following improvements are crucial:\\n1. **Immediate Angular Correction:** Rapidly correct any noticeable angular deviation.\\n2. **Controlled Descent:** Carefully monitor and adjust vertical velocity and position.\\n3. **Smooth Horizontal Velocity Adjustments:** Ensure that horizontal movements do not induce excessive velocities while the craft approaches the central x-axis.\\n3. **Enhanced Decision List:** Make sure each decision point is strictly aligned with safety and landing priorities while considering proximity to the ground.\\n\\nHere\'s the code reflecting the refined reasoning, focusing on a more rigorous stabilization approach and smoother descent control.","code":"import numpy as 

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 92. Tamaño de logs: 56
episode  1 score -160.172 avg score -1.760


2024-07-04 14:24:50,272 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_YQcUQbEHwtvugrxQsljKyJnO/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 14:24:50,273 - INFO - Compilación exitosa.
2024-07-04 14:24:50,274 - INFO - Status: requires_action
2024-07-04 14:24:50,579 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_YQcUQbEHwtvugrxQsljKyJnO "HTTP/1.1 200 OK"
2024-07-04 14:25:10,582 - INFO - Status: in_progress
2024-07-04 14:25:10,882 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_YQcUQbEHwtvugrxQsljKyJnO "HTTP/1.1 200 OK"
2024-07-04 14:25:30,884 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 3, "current status": [0.011, 1.42, 0.382, 0.104, -0.011, -

Tool call: [RequiredActionFunctionToolCall(id='call_xs1OAFlXirggmML3UgxsOGoI', function=Function(arguments='{"reasoning":"The logs indicate an improvement over previous attempts, but there are still critical issues to address. The key improvement areas remain angular correction, horizontal and vertical velocity control, and gentle landing strategies. The primary causes of failure include excessive horizontal and vertical velocities and improper prioritization of actions. Reflecting deeply, the updated code should emphasize faster angular correction, more precise control over horizontal and vertical movements, and a better decision framework based on the craft\'s state. Let\'s refine the priorities and ensure a smoother stabilization and descent process.\\n\\nHere\'s the revised code, incorporating a step-by-step logic that balances the importance of stabilization and controlled descent.","code":"import numpy as np\\n\\nclass SpacecraftLandingAgent:\\n    def __init__(self):\\n        p

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4




Número de instantes: 109. Tamaño de logs: 65
episode  1 score -236.439 avg score -2.189


2024-07-04 14:26:16,203 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_mEYRRxa0PpeCwltjbOJIcFOV/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 14:26:16,206 - INFO - Compilación exitosa.
2024-07-04 14:26:16,207 - INFO - Status: requires_action
2024-07-04 14:26:16,438 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_mEYRRxa0PpeCwltjbOJIcFOV "HTTP/1.1 200 OK"
2024-07-04 14:26:36,442 - INFO - Status: in_progress
2024-07-04 14:26:36,686 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_mEYRRxa0PpeCwltjbOJIcFOV "HTTP/1.1 200 OK"
2024-07-04 14:26:56,688 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_3dxywyLWXghXGb4kYO3HdfH2', function=Function(arguments='{"reasoning":"Reviewing the latest logs, we observe that the priorities need to be adjusted for better stabilization. The spacecraft faces issues with increasing vertical and horizontal velocities, causing a failure in final landing. The updated strategy pushes for:\\n\\n1. **Immediate Angular Correction:** Rapidly adjusting any significant angular deviations.\\n2. **Controlled Descent and Horizontal Movement:** Balancing horizontal and vertical control simultaneously for stability.\\n3. **Final Gentle Landing:** Using a combination of engine thrust and sensor inputs to finish the landing gently.\\n\\nThis refined approach revisits how we prioritize and respond to states concurrently, ensuring the code aligns more closely with the desired behavior.","code":"import numpy as np\\n\\nclass SpacecraftLandingAgent:\\n    def __init__(self):\\n        pass\\n\\n    def act(self, state)

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 107. Tamaño de logs: 64
episode  1 score -134.717 avg score -1.271


2024-07-04 14:27:41,866 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_R6CfLCfojXP6kbdvBCFPtbdq/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 14:27:41,870 - INFO - Compilación exitosa.
2024-07-04 14:27:41,871 - INFO - Status: requires_action
2024-07-04 14:27:42,080 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_R6CfLCfojXP6kbdvBCFPtbdq "HTTP/1.1 200 OK"
2024-07-04 14:28:02,082 - INFO - Status: queued
2024-07-04 14:28:02,419 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_R6CfLCfojXP6kbdvBCFPtbdq "HTTP/1.1 200 OK"
2024-07-04 14:28:22,424 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011

Tool call: [RequiredActionFunctionToolCall(id='call_TDVX8rS7pZyvjmdngClVUsM9', function=Function(arguments='{"reasoning":"The new logs reveal notable improvements, but there are still issues with stability and control. The key problems are excessive horizontal and vertical velocities, and complex fluctuations in angular adjustments. The final error indicates uncontrolled high-velocity impact. The revision needs to ensure smoother transition and precise control focused on balancing all state variables effectively. Here is the refined version of the code to better achieve these goals.","code":"import numpy as np\\n\\nclass SpacecraftLandingAgent:\\n    def __init__(self):\\n        pass\\n\\n    def act(self, state):\\n        x_pos, y_pos, x_vel, y_vel, angle, angular_vel, left_contact, right_contact = state\\n\\n        # 1. Immediate Angular Correction\\n        if angle < -0.1:\\n            return 3  # Push right engine to rotate counter-clockwise\\n        elif angle > 0.1:\\n     

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_6.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_6.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_6.mp4\rl-video-episode-0.mp4
Número de instantes: 121. Tamaño de logs: 71
episode  1 score -276.309 avg score -2.303


2024-07-04 14:29:07,682 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_mWxuzZQaAy1qUqLJzOrvnZZb/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 14:29:07,685 - INFO - Compilación exitosa.
2024-07-04 14:29:07,686 - INFO - Status: requires_action
2024-07-04 14:29:07,898 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_mWxuzZQaAy1qUqLJzOrvnZZb "HTTP/1.1 200 OK"
2024-07-04 14:29:27,901 - INFO - Status: in_progress
2024-07-04 14:29:28,177 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_mWxuzZQaAy1qUqLJzOrvnZZb "HTTP/1.1 200 OK"
2024-07-04 14:29:48,182 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 2, "current status": [0.011, 1.42, 0.382, 0.137, -0.008, -

Tool call: [RequiredActionFunctionToolCall(id='call_f60X7QrWcfLZnSLRFA2XWlXm', function=Function(arguments='{"reasoning":"Analyzing the recent logs reveals persistent issues with vertical velocities, horizontal control, and stability leading to a high-impact landing. The updated strategy must ensure precise control over the spacecraft\'s movements through a well-ordered prioritization process. This means combining stability, vertical control, and sensor-based gentle landing. Below is the refined code that should better address these aspects.","code":"import numpy as np\\n\\nclass SpacecraftLandingAgent:\\n    def __init__(self):\\n        pass\\n\\n    def act(self, state):\\n        x_pos, y_pos, x_vel, y_vel, angle, angular_vel, left_contact, right_contact = state\\n\\n        # 1. Immediate Angular Correction\\n        if angle < -0.1:\\n            return 3  # Push right engine to rotate counter-clockwise\\n        elif angle > 0.1:\\n            return 1  # Push left engine to rot

                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_7.mp4\rl-video-episode-0.mp4
Número de instantes: 101. Tamaño de logs: 61
episode  1 score -246.971 avg score -2.470


2024-07-04 14:30:34,525 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_OcbmPp0b5ofY3kGoPeO2bEnC/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 14:30:34,526 - INFO - Compilación exitosa.
2024-07-04 14:30:34,527 - INFO - Status: requires_action
2024-07-04 14:30:34,770 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_OcbmPp0b5ofY3kGoPeO2bEnC "HTTP/1.1 200 OK"
2024-07-04 14:30:54,772 - INFO - Status: in_progress
2024-07-04 14:30:55,030 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_OcbmPp0b5ofY3kGoPeO2bEnC "HTTP/1.1 200 OK"
2024-07-04 14:31:15,034 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 0, "current status": [0.011, 1.422, 0.377, 0.146, -0.013,

Tool call: [RequiredActionFunctionToolCall(id='call_VDCQS6BgZiXOo9HLjRzSqKp3', function=Function(arguments='{"reasoning":"In reviewing the recent logs, issues with the vertical and horizontal velocities persisted alongside deteriorating stability control. The refined strategy should handle control of velocities more consistently to mitigate sudden impacts and stabilize descent. Below is a structured approach to tackling these performance concerns through better state prioritization and control logic.","code":"import numpy as np\\n\\nclass SpacecraftLandingAgent:\\n    def __init__(self):\\n        pass\\n\\n    def act(self, state):\\n        x_pos, y_pos, x_vel, y_vel, angle, angular_vel, left_contact, right_contact = state\\n\\n        # 1. Stabilize orientation first\\n        if angle < -0.1:\\n            return 3  # Push right engine to rotate counter-clockwise\\n        elif angle > 0.1:\\n            return 1  # Push left engine to rotate clockwise\\n\\n        # 2. Control hor

                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_8.mp4\rl-video-episode-0.mp4
Número de instantes: 100. Tamaño de logs: 60
episode  1 score -223.398 avg score -2.257


2024-07-04 14:32:00,068 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_99gVMtzK0tXBTVn36XilLW3s/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 14:32:00,071 - INFO - Compilación exitosa.
2024-07-04 14:32:00,071 - INFO - Status: requires_action
2024-07-04 14:32:00,318 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_99gVMtzK0tXBTVn36XilLW3s "HTTP/1.1 200 OK"
2024-07-04 14:32:20,321 - INFO - Status: in_progress
2024-07-04 14:32:20,619 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_99gVMtzK0tXBTVn36XilLW3s "HTTP/1.1 200 OK"
2024-07-04 14:32:40,636 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 2, "current status": [0.011, 1.42, 0.392, 0.137, -0.012, -

Tool call: [RequiredActionFunctionToolCall(id='call_9YpPWcZfMFDB2E1yq3tymmfi', function=Function(arguments='{"reasoning":"The landing logs show that while orientation stabilization is effective, vertical movement control remains problematic. The pattern indicates that descent becomes difficult to manage with the combined thrust and switching strategies not effectively reducing vertical velocity, leading to unstable landings. The refined approach needs enhanced vertical velocity control and smoother state transitions for improved stability. Here is an improved version of the control strategy.","code":"import numpy as np\\n\\nclass SpacecraftLandingAgent:\\n    def __init__(self):\\n        pass\\n\\n    def act(self, state):\\n        x_pos, y_pos, x_vel, y_vel, angle, angular_vel, left_contact, right_contact = state\\n\\n        # 1. Stabilize orientation\\n        if angle < -0.1:\\n            return 3  # Push right engine to rotate counter-clockwise\\n        elif angle > 0.1:\\n   

                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_9.mp4\rl-video-episode-0.mp4
Número de instantes: 97. Tamaño de logs: 59
episode  1 score -327.620 avg score -3.413


2024-07-04 14:33:25,639 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_7Aj33JTxR46jalSL5m1F1tt2/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 14:33:25,642 - INFO - Compilación exitosa.
2024-07-04 14:33:25,642 - INFO - Status: requires_action
2024-07-04 14:33:25,850 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_7Aj33JTxR46jalSL5m1F1tt2 "HTTP/1.1 200 OK"
2024-07-04 14:33:45,854 - INFO - Status: in_progress
2024-07-04 14:33:46,149 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_7Aj33JTxR46jalSL5m1F1tt2 "HTTP/1.1 200 OK"
2024-07-04 14:34:06,153 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 2, "current status": [0.011, 1.42, 0.382, 0.137, -0.008, -

Tool call: [RequiredActionFunctionToolCall(id='call_qarkE2dOwbM2OcduKMu5pCp6', function=Function(arguments='{"reasoning":"Analyzing the logs highlights continued issues with managing vertical descent and horizontal stability, leading to unstable velocities and eventual crashes. To improve, we must refine the approach to focus on more reliable vertical velocity control and smoother transitions for descent, while also managing horizontal adjustments effectively. Below is an updated function to address these aspects.","code":"import numpy as np\\n\\nclass SpacecraftLandingAgent:\\n    def __init__(self):\\n        pass\\n\\n    def act(self, state):\\n        x_pos, y_pos, x_vel, y_vel, angle, angular_vel, left_contact, right_contact = state\\n\\n        # Stabilize orientation\\n        if abs(angle) > 0.1:\\n            return 1 if angle > 0 else 3\\n\\n        # Control horizontal velocity\\n        if abs(x_vel) > 0.1:\\n            return 1 if x_vel > 0 else 3\\n\\n        # Manage v

                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_10.mp4\rl-video-episode-0.mp4
Número de instantes: 110. Tamaño de logs: 65
episode  1 score -39.725 avg score -0.364


2024-07-04 14:34:52,103 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_yURAJqGsTAHkjz164f8yzw3l/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 14:34:52,106 - INFO - Compilación exitosa.
2024-07-04 14:34:52,107 - INFO - Status: requires_action
2024-07-04 14:34:52,334 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_yURAJqGsTAHkjz164f8yzw3l "HTTP/1.1 200 OK"
2024-07-04 14:35:12,337 - INFO - Status: in_progress
2024-07-04 14:35:12,620 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX/runs/run_yURAJqGsTAHkjz164f8yzw3l "HTTP/1.1 200 OK"
2024-07-04 14:35:32,622 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 1, "current status": [0.011, 1.422, 0.369, 0.146, -0.012,

user: [TextContentBlock(text=Text(annotations=[], value='These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 1, "current status": [0.011, 1.422, 0.369, 0.146, -0.012, -0.056, 0.0, 0.0], "score": 1.171}, {"time": 2, "action": 2, "current status": [0.015, 1.425, 0.374, 0.166, -0.014, -0.051, 0.0, 0.0], "score": -2.218}, {"time": 3, "action": 2, "current status": [0.018, 1.43, 0.365, 0.193, -0.017, -0.062, 0.0, 0.0], "score": -1.343}, {"time": 4, "action": 1, "current status": [0.022, 1.433, 0.355, 0.166, -0.018, -0.024, 0.0, 0.0], "score": 1.52}, {"time": 5, "action": 3, "current status": [0.026, 1.437, 0.366, 0.139, -0.022, -0.067, 0.0, 0.0], "score": -0.641}, {"time": 6, "action": 2, "current status": [0.029, 1.44, 0.36, 0.152, -0.026, -0.075, 0.0, 0.0], "score": -0.916}, {"time": 7, "action": 1, "current status": [0.033, 

2024-07-04 14:35:33,939 - INFO - HTTP Request: DELETE https://api.openai.com/v1/assistants/asst_rBkqI4OV97N0wBG3W8I37J1E "HTTP/1.1 200 OK"
2024-07-04 14:35:34,221 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_JfVWjl5dkODgi1Bi0ILHk9fX "HTTP/1.1 200 OK"
2024-07-04 14:35:34,222 - INFO - 
Ejecución finalizada.




Asistente vaciado.
Run(id='run_yURAJqGsTAHkjz164f8yzw3l', assistant_id='asst_rBkqI4OV97N0wBG3W8I37J1E', cancelled_at=None, completed_at=1720096502, created_at=1720096446, expires_at=None, failed_at=None, incomplete_details=None, instructions="Your task is: \n    1. Analyze and reason about the results received in the last landing attempts. This is the schema of the landing logs: {'description': 'Log data for each step of the spacecraft landing environment.', 'landing attempt': {'type': 'integer', 'description': 'The episode number.'}, 'logs': {'time': {'type': 'integer', 'description': 'The instant within the landing attempt where the current log is taken.'}, 'action': {'type': 'integer', 'description': 'The possible actions taken.', 'options': {'0': 'Switch off engines', '1': 'Push left engine', '2': 'Push both engines (upwards)', '3': 'Push right engine'}}, 'current status': {'type': 'array', 'description': 'The state of the environment after the action is taken.', 'positions': {'0':

#### Take a step back.
Abstraer primero el problema.

### Incluir supervisor encargado del stepback

In [14]:
def inicializar_agente(model, msg, description, instructions, name, tools):
    """ Inicializa un agente con un asistente y un hilo.
    
    Args:
        model: str: el modelo de lenguaje a utilizar
        msg: str: el mensaje inicial del hilo
        
    Returns:
        agente: AssistantOpenAI: el agente inicializado
        asistente: dict: el asistente creado
        hilo: dict: el hilo creado
    """
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)
    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=description, instructions=instructions, name=name, tools=tools)
    # Crea un hilo
    hilo = agente.create_thread()
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=msg)
    
    return agente, asistente, hilo


def run_message_assistant(agente, asistente, hilo, tool_choice="auto"):
    """ Ejecuta un mensaje en el asistente.
    
    Args:
        agente: AssistantOpenAI: el agente
        asistente: dict: el asistente
        hilo: dict: el hilo
        tool_choice: str: la elección de herramienta a utilizar
        
    Returns:
        ejecucion: dict: la ejecución del mensaje
        response: dict: la respuesta del asistente
    """
    # Ejecuta el hilo
    ejecucion = agente.run(hilo.id, asistente.id, tool_choice=tool_choice)
    response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
    while response.status not in ["completed", "failed", "requires_action"]:
        logger.info(f"Status: {ejecucion.status}")
        response = agente.get_run(ejecucion.id, hilo.id)
        time.sleep(20)
    return ejecucion, response


In [14]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, code_msg, logger, model="gpt-3.5-turbo", num_iterations=5):
    
    # Creamos el asistente.
    #agente, asistente, hilo = inicializar_agente(model, Incial_msg, DESCRIPTION, INSTRUCTIONS, NAME, TOOLS)
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)
    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    # Crea un hilo
    hilo = agente.create_thread()
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)
    
    # Primero ejecutamos el razonamiento de STEP BACK.
    #ejecucion, response = run_message_assistant(agent, asistente, hilo, tool_choice='none')
    # Ejecuta el hilo
    ejecucion = agente.run(hilo.id, asistente.id, tool_choice='none')
    response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
    while response.status not in ["completed", "failed", "requires_action"]:
        logger.info(f"Status: {ejecucion.status}")
        response = agente.get_run(ejecucion.id, hilo.id)
        print("Waiting for response...")
        time.sleep(20)
    
    #print(response)
    msgs = agente.mostrar_mensajes(hilo.id)
    print(msgs[0].data[0].content[0].text.value)
    
    # Añadimos el mensaje de generación de código.
    msg = agente.add_message(hilo.id, role="user", content=code_msg)
    
    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id, tool_choice='required')
            response = agente.get_run(ejecucion.id, hilo.id)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            print(response)
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  

            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"Stepback 4o. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


In [15]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (speed, angle and location), keep falling under control and then land gently at the end.
    - Use all the elements of the vector of observations, all are relevant to make the right decision.
    - Pay close attention to successful events.
    - Environment and ship conditions may change but your code must be effective for all cases.
    - Find a balance in your landing policy to maximize your score (must exceed 200 points).
    - If you are rotating to the left (negative values), you should fire the left engine and vice versa. But always prioritizing a controlled speed with the main engine.
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Decision coder"


initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Take a deep breath and reason step-by-step.What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing? Do not implement any code."
code_msg = f"After the reasoning with the processes behind the decision making, code very carefully when structuring your code, keep in mind that certain actions can be left with starvation. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool, function calling is required."



In [16]:
logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(initial_msg, code_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-01 20:24:30,902 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-01 20:24:31,107 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-01 20:24:31,353 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/messages "HTTP/1.1 200 OK"
2024-07-01 20:24:31,917 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/runs "HTTP/1.1 200 OK"
2024-07-01 20:24:32,130 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/runs/run_rBlWNoPJc2HIQNThxrjQvaiT "HTTP/1.1 200 OK"
2024-07-01 20:24:32,132 - INFO - Status: queued
2024-07-01 20:24:32,336 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/runs/run_rBlWNoPJc2HIQNThxrjQvaiT "HTTP/1.1 200 OK"


Waiting for response...


### Dos agentes

In [15]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, code_msg, logger, model="gpt-3.5-turbo", num_iterations=5):
    
    # Creamos el asistente.
    #agente, asistente, hilo = inicializar_agente(model, Incial_msg, DESCRIPTION, INSTRUCTIONS, NAME, TOOLS)
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)
    # Crea un asistente supervisor y codificador.
    codificador = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    supervisor = agente.create_assistant(model=model, description=DESCRIPTION_SUPERVISOR, instructions=INSTRUCTIONS_SUPERVISOR, name=NAME_SUPERVISOR)
    # Crea un hilo
    hilo_supervisor = agente.create_thread()
    hilo_codificador = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo_supervisor.id, role="user", content=Incial_msg)
    
    # Primero ejecutamos el razonamiento de STEP BACK.
    #ejecucion, response = run_message_assistant(agent, asistente, hilo, tool_choice='none')
    # Ejecuta el hilo
    ejecucion = agente.run(hilo_supervisor.id, supervisor.id, tool_choice='none')
    response = agente.get_run(ejecucion.id, hilo_supervisor.id)

            # Esperamos a que la respuesta esté lista.
    while response.status not in ["completed", "failed", "requires_action"]:
        logger.info(f"Status: {ejecucion.status}")
        response = agente.get_run(ejecucion.id, hilo_supervisor.id)
        print("Waiting for response...")
        time.sleep(20)
    
    #print(response)
    msgs = agente.mostrar_mensajes(hilo_supervisor.id)
    logger.info(msgs[0].data[0].content[0].text.value)
    
    # Añadimos el mensaje de generación de código.
    msg = code_msg +  "\nSupervisor feedback: " + msgs[0].data[0].content[0].text.value
    print(msg)
    msg = agente.add_message(hilo_codificador.id, role="user", content=msg)
    
    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo_codificador.id, codificador.id)
            response = agente.get_run(ejecucion.id, hilo_codificador.id)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            print(response)
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  

            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"Stepback 4o. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo_codificador.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo_codificador.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success."""
                logger.info(msg)    
                agente.add_message(hilo_codificador.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo_codificador.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo_codificador.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo_codificador.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo_codificador.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


In [16]:
DESCRIPTION = "You are an expert agent in spacecraft landing and decision making codificiation. Based on the landing records received and the indications received by a supervising agent, you must generate the code to land the spacecraft in the environment whatever the initial conditions are."
DESCRIPTION_SUPERVISOR = "You are an expert agent in spacecraft landing environments. Your mission is to analyze landing logs and the code generated by the agent specialized in coding, to guide him in improving the performance of his decision making code."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""
INSTRUCTIONS_SUPERVISOR = f"""To complete the task you must follow the following steps and indications:
1. Revisa aterrizajes exitosos y fracasados para analizar el efecto de las acciones en cada una de las observaciones y decidir que decisión tomar en cada momento.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. All the knowledge obtained must be expressed in the best possible way to the coding LLM agent so that it can improve the performance of the generated decision making code.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Decision coder"
NAME_SUPERVISOR = "Spacecraft Landing Supervisor"


supervisor_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Take a deep breath and reason step-by-step.What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing? Do not implement any code."
code_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. After the reasoning with the processes behind the decision making, code very carefully when structuring your code, keep in mind that certain actions can be left with starvation. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool, function calling is required."



In [17]:
logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(supervisor_msg, code_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-03 20:13:38,787 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-03 20:13:39,055 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-03 20:13:39,320 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-03 20:13:39,541 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-03 20:13:39,833 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_PViG42Y34iK8TOuGG4SDc7qu/messages "HTTP/1.1 200 OK"
2024-07-03 20:13:40,506 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_PViG42Y34iK8TOuGG4SDc7qu/runs "HTTP/1.1 200 OK"
2024-07-03 20:13:40,738 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_PViG42Y34iK8TOuGG4SDc7qu/runs/run_EMBnqChalCCiYeU88LPwgzd3 "HTTP/1.1 200 OK"
2024-07-03 20:13:40,740 - INFO - Status: queued
2024-07-03 20:13:40,971 - INFO - HTTP Request: GET https://api.openai.com/v

Waiting for response...


2024-07-03 20:14:00,973 - INFO - Status: queued
2024-07-03 20:14:01,271 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_PViG42Y34iK8TOuGG4SDc7qu/runs/run_EMBnqChalCCiYeU88LPwgzd3 "HTTP/1.1 200 OK"


Waiting for response...


2024-07-03 20:14:21,576 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_PViG42Y34iK8TOuGG4SDc7qu/messages "HTTP/1.1 200 OK"
2024-07-03 20:14:21,818 - INFO - To achieve a successful landing in the spacecraft landing environment, it's essential to understand each observation's values and the effects of each action. Here's a detailed breakdown:

### Observation Values Explained:

1. **X Position:** 
   - The horizontal position of the spacecraft relative to the landing zone.
   - Range: [-1.5, 1.5]

2. **Y Position:**
   - The vertical position of the spacecraft.
   - Range: [-1.5, 1.5]

3. **X Velocity:**
   - The horizontal velocity of the spacecraft.
   - Range: [-5.0, 5.0]

4. **Y Velocity:**
   - The vertical velocity of the spacecraft.
   - Range: [-5.0, 5.0]

5. **Angle:**
   - The angle of the spacecraft relative to vertical.
   - Range: [-3.14, 3.14] (in radians, negative for left, positive for right)

6. **Angular Velocity:**
   - The rate of change of the an

assistant: [TextContentBlock(text=Text(annotations=[], value="To achieve a successful landing in the spacecraft landing environment, it's essential to understand each observation's values and the effects of each action. Here's a detailed breakdown:\n\n### Observation Values Explained:\n\n1. **X Position:** \n   - The horizontal position of the spacecraft relative to the landing zone.\n   - Range: [-1.5, 1.5]\n\n2. **Y Position:**\n   - The vertical position of the spacecraft.\n   - Range: [-1.5, 1.5]\n\n3. **X Velocity:**\n   - The horizontal velocity of the spacecraft.\n   - Range: [-5.0, 5.0]\n\n4. **Y Velocity:**\n   - The vertical velocity of the spacecraft.\n   - Range: [-5.0, 5.0]\n\n5. **Angle:**\n   - The angle of the spacecraft relative to vertical.\n   - Range: [-3.14, 3.14] (in radians, negative for left, positive for right)\n\n6. **Angular Velocity:**\n   - The rate of change of the angle.\n   - Range: [-5.0, 5.0]\n\n7. **Left Contact Sensor:**\n   - Indicates whether the l

2024-07-03 20:14:22,079 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_zTBYfsYvK9fZkvfeXS0HuSxj/messages "HTTP/1.1 200 OK"
2024-07-03 20:14:22,331 - INFO - Iteration: 1
2024-07-03 20:14:22,783 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_zTBYfsYvK9fZkvfeXS0HuSxj/runs "HTTP/1.1 200 OK"
2024-07-03 20:14:22,997 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_zTBYfsYvK9fZkvfeXS0HuSxj/runs/run_PZ9Ujmr3s8AmLYHK1k13iWrQ "HTTP/1.1 200 OK"
2024-07-03 20:14:23,000 - INFO - Status: in_progress


Run(id='run_PZ9Ujmr3s8AmLYHK1k13iWrQ', assistant_id='asst_HjIUSni3KRFfV0KDx36Hcajr', cancelled_at=None, completed_at=None, created_at=1720030462, expires_at=1720031062, failed_at=None, incomplete_details=None, instructions="To complete the task you must follow the following steps and indications:\n1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.\n2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:\n    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).\n    - Increased/decreased the slower/faster the spacecraft is moving.\n    - Decreased the more the spacecraft is tilted.\n    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.\n    - Receive 100 points for a successful la

AttributeError: 'NoneType' object has no attribute 'submit_tool_outputs'

### JSON MODE

In [15]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    asistente = agente.create_assistant_json_mode(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME)
    
    # Crea un hilo
    hilo = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)

    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id)
            response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
            while response.status not in ["completed", "failed", "requires_action"]:
                logger.info(f"Status: {ejecucion.status}")
                response = agente.get_run(ejecucion.id, hilo.id)
                time.sleep(10)
            
            print(response.response_format)
            msgs = agente.mostrar_mensajes(hilo.id)
            msg = msgs[0].data[0].content[0].text.value
            logger.info(msg)

            # # Descomponemos los elementos de la respuesta.
            # logger.info(f"Status: {response.status}")
            # if response.status == "completed":
            #     return response
            # tool_call = response.required_action.submit_tool_outputs.tool_calls
            # print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(msg)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  
            reasoning = code_dict["reasoning"]

            logger.info(f"\nRazonamiento:\n{reasoning}")
            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(reasoning, code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"4o JSONMODE. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # # Devolvemos la respuesta al asistente.
                # for call in tool_call:
                #     agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                # while response.status not in ["completed", "failed", "expired"]:
                #     logger.info(f"Status: {response.status}")
                #     response = agente.get_run(response.id, hilo.id)
                #     time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success. Remember that the code is a reflection of your reasoning and the way you approach the problem."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                # for call in tool_call:
                #     agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                # logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")

In [21]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Provide output in valid JSON format. The data schema should be as follows:  {json.dumps(json_shcema_response)}.
6. Generate the code of the act function in the file 'Action.py'.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = "This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?. Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_JsonMode.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

NameError: name 'json_shcema_response' is not defined

### Genetico

#### JSON Mode

In [29]:
def init_population(agente, model, initial_msg, logger, mode='json', N=5):
    dic_population = {}
    for i in range(N):
        if mode == 'json':
            asistente = agente.create_assistant_json_mode(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME)
        else:
            asistente = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
        hilo = agente.create_thread()
        msg = agente.add_message(hilo.id, role="user", content=initial_msg)
        logger.info(f"Asistente {i+1} creado. ID: {asistente.id}")
        dic_population[asistente.id] = (hilo, msg)
        
    return dic_population    

In [31]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    dic_population = init_population(agente, model, Incial_msg, logger, N=4)

    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False
        resultados = [1, float("-inf")]
        # Ejecuta los asistentes al mismo tiempo.
        for asistente_id in dic_population.keys():
            logger.info(f"Asistente: {asistente_id}")
            hilo, msg = dic_population[asistente_id]
            asistente = agente.get_assistant(asistente_id)
            compiled = False
            
            # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
            while not compiled:
                # Ejecuta el hilo
                ejecucion = agente.run(hilo.id, asistente.id)
                response = agente.get_run(ejecucion.id, hilo.id)

                # Esperamos a que la respuesta esté lista.
                while response.status not in ["completed", "failed", "requires_action"]:
                    logger.info(f"Status: {ejecucion.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(10)
                
                msgs = agente.mostrar_mensajes(hilo.id)
                msg = msgs[0].data[0].content[0].text.value
                #logger.info(msg)

                # Convierte el string a un diccionario
                code_dict = json.loads(msg)
                #logger.info(f"Arguments: {code_dict}")
                
                # Obtén el código Python de la llamada a la herramienta
                code = code_dict["code"]
                filename = code_dict["filename"]  
                reasoning = code_dict["reasoning"]

                logger.info(f"\nRazonamiento:\n{reasoning}")
                logger.info(f"\nCodigo generado:\n{code}")

                # Ejecuta el código Python
                try:
                    store_code_in_file(reasoning, code, filename)
                    time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                    
                    # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                    commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"4o genetic JSON. Iteración {i+1}.") 
                    
                    importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                    # Ejecutamos el código generado.
                    logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/Asitente_{asistente_id}/iteration_{i+1}.mp4")
                    json_logs = json.loads(logs)
                    
                    if float(json_logs['total score']) > resultados[1]:
                        resultados = [asistente_id, float(json_logs["total score"]), code, logs]
                    resultados.append(json_logs["total score"])
                             
                    compiled = True
                    logger.info(f"Compilación exitosa.")
                    
                    msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success. Remember that the code is a reflection of your reasoning and the way you approach the problem."""
                    logger.info(msg)    
                    agente.add_message(hilo.id, role="user", content=msg)
                
                # Alimentamos el asistente con el error generado en la ejecución del código.    
                except Exception as e:
                    logger.exception("Error: %s", e)
                    error_trace = traceback.format_exc()

                    while response.status not in ["completed", "failed", "expired"]:
                        logger.info(f"Status: {response.status}")
                        response = agente.get_run(ejecucion.id, hilo.id)
                        time.sleep(30)
                    msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                    logger.error(msg)   
                    agente.add_message(hilo.id, role="assistant", content=msg)
        logger.info(f"Mejor resultado: Asistentente {resultados[0]} con puntuación {resultados[1]}")
        # Después de determinar el asistente con la máxima puntuación y antes de finalizar el bucle de iteraciones
        for asistente_id in dic_population.keys():
            if asistente_id != resultados[0]:  # Verifica que no sea el asistente con la máxima puntuación
                hilo, _ = dic_population[asistente_id]
                # Aquí puedes añadir el resultado al asistente que no tiene la máxima puntuación
                # Por ejemplo, suponiendo que tienes una función para añadir mensajes o resultados:
                msg_resultado = f"The best result for the population was {resultados[1]} achieved with the following code: {resultados[2]}\ and the following logs:{resultados[3]}. \nCompare it with yours and draw conclusions. Re-generate the code step by step and think deeply to improve both results."
                agente.add_message(hilo.id, role="user", content=msg_resultado)
        
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")

In [32]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Provide output in valid JSON format. The data schema should be as follows:  {json.dumps(json_shcema_response)}.
6. Generate the code of the act function in the file 'Action.py'.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?. Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_genetic.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-04 17:50:01,448 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-04 17:50:01,722 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-04 17:50:02,054 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_FH4fDp5HvchEd7XnQGU95h8x/messages "HTTP/1.1 200 OK"
2024-07-04 17:50:02,208 - INFO - Asistente 1 creado. ID: asst_jBPjpPzklEWz1TYe4sMOxZnG
2024-07-04 17:50:02,476 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-04 17:50:02,680 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-04 17:50:02,962 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_jIB556bkpr7V8RCKfsbMdBq9/messages "HTTP/1.1 200 OK"
2024-07-04 17:50:03,112 - INFO - Asistente 2 creado. ID: asst_4wtHjzk2JkW11VNLz0kIWx7U
2024-07-04 17:50:03,361 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07

assistant: [TextContentBlock(text=Text(annotations=[], value='{"reasoning": "The log provided contains a sequence of actions and their respective impact on the spacecraft\'s status during a landing attempt. To achieve a successful landing, it\'s essential to understand the effects of each action on the observations and adjust them accordingly.\\n\\n1. **Current Status Variables:**\\n   - `X position`: horizontal position relative to the landing zone (negative is left, positive is right).\\n   - `Y position`: vertical position relative to the landing zone (higher values are higher up).\\n   - `X velocity`: horizontal velocity (negative is moving left, positive is moving right).\\n   - `Y velocity`: vertical velocity (negative is moving down, positive is moving up).\\n   - `Angle`: angle relative to the vertical (negative is tilting left, positive is tilting right).\\n   - `Angular velocity`: rate of change of the angle (negative means rotating counterclockwise, positive means rotating c

2024-07-04 17:50:41,239 - INFO - Compilación exitosa.         
2024-07-04 17:50:41,240 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0.085, 0.0, 0.0], "score": 0.121}, {"time": 2, "action": 0, "current status": [0.015, 1.421, 0.38, 0.077, -0.017, -0.085, 0.0, 0.0], "score": 0.013}, {"time": 3, "action": 0, "current status": [0.019, 1.423, 0.38, 0.051, -0.021, -0.085, 0.0, 0.0], "score": -0.104}, {"time": 4, "action": 0, "current status": [0.023, 1.423, 0.38, 0.024, -0.026, -0.085, 0.0, 0.0], "score": -0.226}, {"time": 5, "action": 0, "current status": [0.026, 1.423, 0.38, -0.003, -0.03, -0.085, 0.0, 0.0], "score": -0.353}, {"time": 6, "action": 0, "current status": [0.03, 1.422, 0.38, -0.029, -0.034, -0.085, 0.0, 0.0], "score": -0.481}, {"time": 7, "act

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_jBPjpPzklEWz1TYe4sMOxZnG\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 126. Tamaño de logs: 73
episode  1 score -405.068 avg score -3.241


2024-07-04 17:50:41,490 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_FH4fDp5HvchEd7XnQGU95h8x/messages "HTTP/1.1 200 OK"
2024-07-04 17:50:41,492 - INFO - Asistente: asst_4wtHjzk2JkW11VNLz0kIWx7U
2024-07-04 17:50:41,724 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_4wtHjzk2JkW11VNLz0kIWx7U "HTTP/1.1 200 OK"
2024-07-04 17:50:42,131 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_jIB556bkpr7V8RCKfsbMdBq9/runs "HTTP/1.1 200 OK"
2024-07-04 17:50:42,410 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_jIB556bkpr7V8RCKfsbMdBq9/runs/run_JAcPx9mjK2pqkyxRv7TwKVJo "HTTP/1.1 200 OK"
2024-07-04 17:50:42,415 - INFO - Status: queued
2024-07-04 17:50:42,649 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_jIB556bkpr7V8RCKfsbMdBq9/runs/run_JAcPx9mjK2pqkyxRv7TwKVJo "HTTP/1.1 200 OK"
2024-07-04 17:50:52,652 - INFO - Status: queued
2024-07-04 17:50:52,925 - INFO - HTTP Request: GET https://api.openai.

assistant: [TextContentBlock(text=Text(annotations=[], value='{\n  "reasoning": "The goal is to achieve a successful landing where the score exceeds 200 points. We need to control various aspects of the spacecraft\'s status: its position, velocities, and angles, minimizing the impact of lateral and angular movements. In the initial logs, we see specific actions yielding positive or negative scores. To create a successful landing strategy:\\n1. Minimize horizontal velocity (X velocity) and vertical velocity (Y velocity) to bring them close to zero.\\n2. Keep the spacecraft\'s angle (Angle) within a small range closer to zero and minimize angular velocity (Angular velocity).\\n3. Fire the engines judiciously to reduce velocities while conserving score penalties for engine use.\\n4. Keep horizontal position (X position) near zero as the target is the landing zone (0, 0).\\nSteps to follow:\\n1. If the Y velocity is high (falling too fast), fire the main engines (action 2) to slow descent.

2024-07-04 17:51:06,321 - INFO - Compilación exitosa.        
2024-07-04 17:51:06,322 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 1, "current status": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], "score": 1.473}, {"time": 3, "action": 1, "current status": [0.018, 1.423, 0.341, 0.051, -0.002, 0.072, 0.0, 0.0], "score": 1.809}, {"time": 4, "action": 1, "current status": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], "score": 1.038}, {"time": 5, "action": 1, "current status": [0.025, 1.423, 0.323, -0.003, 0.011, 0.146, 0.0, 0.0], "score": 0.221}, {"time": 6, "action": 1, "current status": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], "score": 0.039}, {"time": 7, "action": 1,

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_4wtHjzk2JkW11VNLz0kIWx7U\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 47
episode  1 score -612.950 avg score -8.513


2024-07-04 17:51:06,579 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_jIB556bkpr7V8RCKfsbMdBq9/messages "HTTP/1.1 200 OK"
2024-07-04 17:51:06,581 - INFO - Asistente: asst_hhLnPLWTPq2uXlfKRuX88qXX
2024-07-04 17:51:06,780 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_hhLnPLWTPq2uXlfKRuX88qXX "HTTP/1.1 200 OK"
2024-07-04 17:51:07,366 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_mmolgscBiTK6UYJ6lVkPivoL/runs "HTTP/1.1 200 OK"
2024-07-04 17:51:07,598 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_mmolgscBiTK6UYJ6lVkPivoL/runs/run_ZEf53qGfAzKHSDfF6KlS1TKE "HTTP/1.1 200 OK"
2024-07-04 17:51:07,602 - INFO - Status: queued
2024-07-04 17:51:07,806 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_mmolgscBiTK6UYJ6lVkPivoL/runs/run_ZEf53qGfAzKHSDfF6KlS1TKE "HTTP/1.1 200 OK"
2024-07-04 17:51:17,811 - INFO - Status: queued
2024-07-04 17:51:18,067 - INFO - HTTP Request: GET https://api.openai.

assistant: [TextContentBlock(text=Text(annotations=[], value='{"reasoning": "The primary goal during landing is to get the spacecraft as close to zero velocity and zero angle upon landing and keep the score from decreasing significantly. Here’s a step-by-step approach:\\n\\n1. **Initial Analysis**:\\n- The ideal scenario for landing is keeping the horizontal velocity (X velocity), vertical velocity (Y velocity), and angle close to zero as the spacecraft approaches the landing zone.\\n- The sensors should indicate that both left and right sides are in contact with the landing zone.\\n\\n2. **Actions and Observations**:\\n- **Action 0**: Switch off engines, generally maintains the status without changes unless there are significant velocity and angle offsets, risking higher velocities due to gravity pulling the spacecraft downwards. Sometimes useful to let the spacecraft stabilize.\\n- **Action 1**: Push left engine, useful to correct right tilt (positive angle) or move left if there\'s 

KeyError: 'code'

#### Function calling.

In [16]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    dic_population = init_population(agente, model, Incial_msg, logger, mode='fc', N=3)

    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False
        resultados = [1, float("-inf")]
        # Ejecuta los asistentes al mismo tiempo.
        for asistente_id in dic_population.keys():
            logger.info(f"Asistente: {asistente_id}")
            hilo, msg = dic_population[asistente_id]
            asistente = agente.get_assistant(asistente_id)
            compiled = False
            
            # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
            while not compiled:
                # Ejecuta el hilo
                ejecucion = agente.run(hilo.id, asistente.id, tool_choice='required')
                response = agente.get_run(ejecucion.id, hilo.id)

                # Esperamos a que la respuesta esté lista.
                while response.status not in ["completed", "failed", "requires_action"]:
                    logger.info(f"Status: {ejecucion.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(20)

                # Descomponemos los elementos de la respuesta.
                logger.info(f"Status: {response.status}")
                if response.status == "completed":
                    return response
                tool_call = response.required_action.submit_tool_outputs.tool_calls
                print(f"Tool call: {tool_call}")

                # Convierte el string a un diccionario
                code_dict = json.loads(tool_call[0].function.arguments)
                logger.info(f"Arguments: {code_dict}")
                
                # Obtén el código Python de la llamada a la herramienta
                code = code_dict["code"]
                filename = code_dict["filename"]  
                reasoning = code_dict["reasoning"]

                logger.info(f"\nRazonamiento:\n{reasoning}")
                logger.info(f"\nCodigo generado:\n{code}")

                # Ejecuta el código Python
                try:
                    store_code_in_file(reasoning, code, filename)
                    time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                    
                    # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                    commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"4o genetic FC. Iteración {i+1}.") 
                    
                    importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                    # Ejecutamos el código generado.
                    logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/Asitente_{asistente_id}/iteration_{i+1}.mp4")
                    json_logs = json.loads(logs)
                    
                    if float(json_logs['total score']) > resultados[1]:
                        resultados = [asistente_id, float(json_logs["total score"]), code, logs]
                    resultados.append(json_logs["total score"])
                    
                    # Devolvemos la respuesta al asistente.
                    for call in tool_call:
                        agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                    compiled = True
                    logger.info(f"Compilación exitosa.")
                    
                    # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                    while response.status not in ["completed", "failed", "expired"]:
                        logger.info(f"Status: {response.status}")
                        response = agente.get_run(response.id, hilo.id)
                        time.sleep(20)
                             
                    compiled = True
                    logger.info(f"Compilación exitosa.")
                    
                    msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success. Remember that the code is a reflection of your reasoning and the way you approach the problem."""
                    logger.info(msg)    
                    agente.add_message(hilo.id, role="user", content=msg)
                
                # Alimentamos el asistente con el error generado en la ejecución del código.    
                except Exception as e:
                    logger.exception("Error: %s", e)
                    error_trace = traceback.format_exc()
                    for call in tool_call:
                        agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                    logger.error(f"Error: {e}.")
                    while response.status not in ["completed", "failed", "expired"]:
                        logger.info(f"Status: {response.status}")
                        response = agente.get_run(ejecucion.id, hilo.id)
                        time.sleep(30)
                    msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                    logger.error(msg)   
                    agente.add_message(hilo.id, role="assistant", content=msg)
        logger.info(f"Mejor resultado: Asistentente {resultados[0]} con puntuación {resultados[1]}")
        # Después de determinar el asistente con la máxima puntuación y antes de finalizar el bucle de iteraciones
        for asistente_id in dic_population.keys():
            if asistente_id != resultados[0]:  # Verifica que no sea el asistente con la máxima puntuación
                hilo, _ = dic_population[asistente_id]
                # Aquí puedes añadir el resultado al asistente que no tiene la máxima puntuación
                # Por ejemplo, suponiendo que tienes una función para añadir mensajes o resultados:
                msg_resultado = f"The best result for the population was {resultados[1]} achieved with the following code: {resultados[2]}\n and the following logs: {resultados[3]}. \nCompare it with yours and draw conclusions. Re-generate the code step by step and think deeply to improve both results."
                agente.add_message(hilo.id, role="user", content=msg_resultado)
        
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")

In [17]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. You have to be able to learn from it to land successfully with any other conditions. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}.Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."
logger = configura_log('Spacecraft_4o_genetic_FC.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-04 15:09:52,911 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-04 15:09:53,106 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-04 15:09:53,374 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Bi4enb6Ysg1O8zExCXAWPkfi/messages "HTTP/1.1 200 OK"
2024-07-04 15:09:53,520 - INFO - Asistente 1 creado. ID: asst_UX6U0urLzKC4Etm1ldYodysW
2024-07-04 15:09:53,773 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-04 15:09:53,993 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-04 15:09:54,249 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Pt7jR1Z5xzNuNS9hmjfu1y2c/messages "HTTP/1.1 200 OK"
2024-07-04 15:09:54,391 - INFO - Asistente 2 creado. ID: asst_FKJIQTSp4DfUvNG0AZqxnvcg
2024-07-04 15:09:54,642 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07

Tool call: [RequiredActionFunctionToolCall(id='call_fEj4NA4QVIlV8p6B45hkmS9p', function=Function(arguments='{"reasoning": "1. Analyze Data:\\n- Successful landings show a pattern: reducing vertical and horizontal velocities, maintaining minimal tilt, and using engines judiciously.\\n2. Key Insights:\\n- Excessive tilt or angular velocity should be minimized.\\n- Engines should mostly be used to control descent speed and stabilize position.\\n- Horizontal and vertical speeds must be reduced as they approach minimal values near the ground.\\n3. Strategy Development:\\n- Prioritize stabilizing the spacecraft by balancing horizontal and vertical speeds to avoid high angular velocity and tilt.\\n- Fire engines to control both descent and positional stability, ensuring gradual reduction in speed and tilt.\\n- Implement conditional logic based on velocity, position, and angular status to decide the optimal action.\\n- Continuously fine-tune adjustments.\\n4. Implementation: Integrate above st

                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_UX6U0urLzKC4Etm1ldYodysW\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 112. Tamaño de logs: 66
episode  1 score -1101.679 avg score -9.925


2024-07-04 15:10:41,518 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Bi4enb6Ysg1O8zExCXAWPkfi/runs/run_1Sz3fabp3qcEUVvzCZtdmbsV/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 15:10:41,521 - INFO - Compilación exitosa.
2024-07-04 15:10:41,522 - INFO - Status: requires_action
2024-07-04 15:10:41,737 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Bi4enb6Ysg1O8zExCXAWPkfi/runs/run_1Sz3fabp3qcEUVvzCZtdmbsV "HTTP/1.1 200 OK"
2024-07-04 15:11:01,740 - INFO - Status: in_progress
2024-07-04 15:11:02,076 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Bi4enb6Ysg1O8zExCXAWPkfi/runs/run_1Sz3fabp3qcEUVvzCZtdmbsV "HTTP/1.1 200 OK"
2024-07-04 15:11:22,080 - INFO - Compilación exitosa.
2024-07-04 15:11:22,082 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 2, 

Tool call: [RequiredActionFunctionToolCall(id='call_xYJGsEPmLBzDe37hYoTEyBca', function=Function(arguments='{"reasoning": "First, we need to analyze the successful landing logs to extract patterns and critical conditions that lead to a successful landing. We\'ll implement this learning in the \'act\' function.\\n\\n1. **Stability and Speed Control**: The logs indicate that control actions significantly influence the horizontal and vertical speeds. For instance, vertical speed (Y velocity) was carefully managed by pushing both engines (action=2) frequently. Horizontal speed (X velocity) was kept minimal and slight adjustments were made using left and right engines (actions 1 and 3).\\n\\n2. **Angle Management**: Constant adjustments to the angle were made. Keeping the angle close to 0 degrees (vertical) is crucial. The score significantly drops with large angles and angular velocities.\\n\\n3. **Altitude Management**: The previous logs show that the Y position was gradually decreased, i

                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_FKJIQTSp4DfUvNG0AZqxnvcg\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 117. Tamaño de logs: 69
episode  1 score -1157.908 avg score -9.982


2024-07-04 15:12:07,428 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Pt7jR1Z5xzNuNS9hmjfu1y2c/runs/run_dQnCZE6Z47fFM8k3YH6uLMnz/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 15:12:07,430 - INFO - Compilación exitosa.
2024-07-04 15:12:07,431 - INFO - Status: requires_action
2024-07-04 15:12:07,637 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Pt7jR1Z5xzNuNS9hmjfu1y2c/runs/run_dQnCZE6Z47fFM8k3YH6uLMnz "HTTP/1.1 200 OK"
2024-07-04 15:12:27,640 - INFO - Status: in_progress
2024-07-04 15:12:27,889 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Pt7jR1Z5xzNuNS9hmjfu1y2c/runs/run_dQnCZE6Z47fFM8k3YH6uLMnz "HTTP/1.1 200 OK"
2024-07-04 15:12:47,892 - INFO - Compilación exitosa.
2024-07-04 15:12:47,892 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 2, 

Tool call: [RequiredActionFunctionToolCall(id='call_sa5p6x5LVMeLiF4hIdOAlHD8', function=Function(arguments='{"reasoning":"First, we analyze the logs of the successful landing to uncover some patterns. Here are some key observations:\\n\\n1. **Altitude Control**: The agent frequently fires both engines to control vertical speed, especially during the descent phase. This is evident from the repeated calls to action 2.\\n\\n2. **Horizontal and Angular Correction**: The agent occasionally fires the left or right engines (action 1 or 3) to correct horizontal position and angle. The corrections become more frequent when the spacecraft’s horizontal velocity or tilt becomes significant.\\n\\n3. **Engine Control Based on Velocity**: The agent prefers to stabilize the velocities (both horizontal and vertical) through precise engine control. For example, when the horizontal speed is low and manageable, actions to correct the angle are performed to ensure minimal tilt.\\n\\nBased on these observat

2024-07-04 15:13:32,261 - ERROR - Error: expected an indented block after 'if' statement on line 9 (Action.py, line 10)
Traceback (most recent call last):
  File "C:\Users\adria\AppData\Local\Temp\ipykernel_20920\4028744065.py", line 65, in create_and_run_llm_loop
    importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.
    ^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\adria\anaconda3\Lib\importlib\__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 621, in _exec
  File "<frozen importlib._bootstrap_external>", line 936, in exec_module
  File "<frozen importlib._bootstrap_external>", line 1074, in get_code
  File "<frozen importlib._bootstrap_external>", line 1004, in source_to_code
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\Action.py", line 10
    return 2 # fire both 

Tool call: [RequiredActionFunctionToolCall(id='call_1fHgEaU893cK4LuUR177BvL1', function=Function(arguments='{"reasoning":"First, we analyze the logs of the successful landing to uncover some patterns. Here are some key observations:\\n\\n1. **Altitude Control**: The agent frequently fires both engines to control vertical speed, especially during the descent phase. This is evident from the repeated calls to action 2.\\n\\n2. **Horizontal and Angular Correction**: The agent occasionally fires the left or right engines (action 1 or 3) to correct horizontal position and angle. The corrections become more frequent when the spacecraft’s horizontal velocity or tilt becomes significant.\\n\\n3. **Engine Control Based on Velocity**: The agent prefers to stabilize the velocities (both horizontal and vertical) through precise engine control. For example, when the horizontal speed is low and manageable, actions to correct the angle are performed to ensure minimal tilt.\\n\\nBased on these observat

                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_LqyJxwvI8l3UmBR9UXptFG2I\iteration_1.mp4\rl-video-episode-0.mp4




Número de instantes: 108. Tamaño de logs: 64
episode  1 score -643.809 avg score -6.017


2024-07-04 15:24:24,284 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_i1hvZbysSwEZuEt8VwO9sy6R/runs/run_YRXNUTrrcMTWeDft1RTTCzz2/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 15:24:24,286 - INFO - Compilación exitosa.
2024-07-04 15:24:24,286 - INFO - Status: requires_action
2024-07-04 15:24:24,527 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_i1hvZbysSwEZuEt8VwO9sy6R/runs/run_YRXNUTrrcMTWeDft1RTTCzz2 "HTTP/1.1 200 OK"
2024-07-04 15:24:44,533 - INFO - Status: in_progress
2024-07-04 15:24:44,834 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_i1hvZbysSwEZuEt8VwO9sy6R/runs/run_YRXNUTrrcMTWeDft1RTTCzz2 "HTTP/1.1 200 OK"
2024-07-04 15:25:04,838 - INFO - Compilación exitosa.
2024-07-04 15:25:04,839 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 2, 

Tool call: [RequiredActionFunctionToolCall(id='call_eI4bDZXLz6MYnsD1saUUSTZG', function=Function(arguments='{"reasoning":"The new approach of the previous user showcases a simpler decision-making process, focusing primarily on correcting any angular deviations and controlling vertical and horizontal speeds. Here are the steps to follow to craft a further improved strategy:\\n\\n1. **Adjust Vertical Speed**: Prioritize reducing the vertical speed if it exceeds the safe threshold value, which is crucial to land softly.\\n\\n2. **Correct Angles**: Adjust angles if they deviate. Correct the angle using left and right engines to keep the spacecraft upright.\\n\\n3. **Control Horizontal Speed**: Maintain horizontal speed within a safe range to avoid large lateral movements which could lead to a crash.\\n\\n4. **Idle When Stable**: Turn off engines if the spacecraft is stable (velocities and angles are within safe thresholds).\\n\\n5. **Additional Checks for Extreme Values**: Introduce checks

                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_UX6U0urLzKC4Etm1ldYodysW\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 91. Tamaño de logs: 56
episode  1 score -671.640 avg score -7.463


2024-07-04 15:25:51,838 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Bi4enb6Ysg1O8zExCXAWPkfi/runs/run_3GuBXX5Jq9tnqM7BlbUU0Kw4/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 15:25:51,841 - INFO - Compilación exitosa.
2024-07-04 15:25:51,842 - INFO - Status: requires_action
2024-07-04 15:25:52,134 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Bi4enb6Ysg1O8zExCXAWPkfi/runs/run_3GuBXX5Jq9tnqM7BlbUU0Kw4 "HTTP/1.1 200 OK"
2024-07-04 15:26:12,139 - INFO - Status: in_progress
2024-07-04 15:26:12,416 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Bi4enb6Ysg1O8zExCXAWPkfi/runs/run_3GuBXX5Jq9tnqM7BlbUU0Kw4 "HTTP/1.1 200 OK"
2024-07-04 15:26:32,418 - INFO - Compilación exitosa.
2024-07-04 15:26:32,420 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "

Tool call: [RequiredActionFunctionToolCall(id='call_oLmcdvPsvCTUJa7QmRhgbThN', function=Function(arguments='{"reasoning":"The code with the result -643.809 is simpler and deals primarily with thresholds for vertical speed, horizontal speed, and the angular deviation. This simpler approach is providing a much better result than the overly complicated multiple criteria checks in the previous code.\\n\\nBelow are some issues observed in the previous code and steps taken to fix them:\\n\\n1. **Excessive Firing of Both Engines (Action 2)**: The previous code frequently activated both engines, which quickly depleted the score. We need to be more conservative with this action.\\n\\n2. **Neglecting Horizontal Speed Control in Your Code**: Our prior code did not well handle scenarios where horizontal drift is minimal but needs fine adjustments for stabilization near landing. The updated code will handle this better.\\n\\n3. **Simplified Criteria**: The multiple conditional checks cause overlaps

                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_FKJIQTSp4DfUvNG0AZqxnvcg\iteration_2.mp4\rl-video-episode-0.mp4




Número de instantes: 108. Tamaño de logs: 64
episode  1 score -643.809 avg score -6.017


2024-07-04 15:27:17,655 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Pt7jR1Z5xzNuNS9hmjfu1y2c/runs/run_d4KEbNPwFPoF9Ta21wv6uU6K/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 15:27:17,657 - INFO - Compilación exitosa.
2024-07-04 15:27:17,658 - INFO - Status: requires_action
2024-07-04 15:27:17,897 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Pt7jR1Z5xzNuNS9hmjfu1y2c/runs/run_d4KEbNPwFPoF9Ta21wv6uU6K "HTTP/1.1 200 OK"
2024-07-04 15:27:37,913 - INFO - Status: queued
2024-07-04 15:27:38,261 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Pt7jR1Z5xzNuNS9hmjfu1y2c/runs/run_d4KEbNPwFPoF9Ta21wv6uU6K "HTTP/1.1 200 OK"
2024-07-04 15:27:58,266 - INFO - Compilación exitosa.
2024-07-04 15:27:58,269 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 2, "curr

Tool call: [RequiredActionFunctionToolCall(id='call_iB63eSRayG3gx6pD2c1fXVmw', function=Function(arguments='{"reasoning":"From the logs, it\'s evident that the new policy fired both engines too frequently, causing excessive fuel consumption and poor control over the spacecraft\'s velocity and position. The spacecraft\'s vertical and horizontal velocities increased continuously, leading to high speeds that are detrimental to a successful landing.\\n\\nKey issues identified:\\n1. **Excessive Engine Firing**: Continuously firing both engines led to uncontrollable speeds and high fuel consumption.\\n2. **Improper Handling of Horizontal Velocity and Angle**: The spaceship\'s angle and horizontal velocity were not adequately controlled, affecting the landing stability.\\n\\nUpdated Approach:\\n1. Implement a dynamic control strategy that adjusts actions based on the current state, focusing on gradual adjustments rather than continuous engine firing.\\n2. Add conditions to handle edge cases, 

                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_LqyJxwvI8l3UmBR9UXptFG2I\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 108. Tamaño de logs: 64
episode  1 score -643.809 avg score -6.017


2024-07-04 15:29:04,054 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_i1hvZbysSwEZuEt8VwO9sy6R/runs/run_833MG8mx7lrs7pLiQkfUMnoB/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 15:29:04,057 - INFO - Compilación exitosa.
2024-07-04 15:29:04,058 - INFO - Status: requires_action
2024-07-04 15:29:04,312 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_i1hvZbysSwEZuEt8VwO9sy6R/runs/run_833MG8mx7lrs7pLiQkfUMnoB "HTTP/1.1 200 OK"
2024-07-04 15:29:24,315 - INFO - Status: queued
2024-07-04 15:29:24,616 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_i1hvZbysSwEZuEt8VwO9sy6R/runs/run_833MG8mx7lrs7pLiQkfUMnoB "HTTP/1.1 200 OK"
2024-07-04 15:29:44,620 - INFO - Compilación exitosa.
2024-07-04 15:29:44,620 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 2, "curr

Tool call: [RequiredActionFunctionToolCall(id='call_5wXHUQeOgxZNFGpUkkWarED0', function=Function(arguments='{"reasoning":"Both the best population code and my previous approach focus on controlling vertical speed, horizontal speed, and angle. However, my previous code introduced additional checks which did not perform as intended under the given constraints. Let\'s refine the code by taking the following steps:\\n\\n1. **Vertical Speed Control**: Continue prioritizing vertical speed but finetune the threshold.\\n2. **Horizontal Speed Control**: React to horizontal speeds that exceed the threshold, avoiding random oscillations due to small deviations.\\n3. **Angle Control**: Adjust the spacecraft\'s angle, ensuring it remains close to vertical.\\n4. **Iterative Adjustments**: Fine-tune the thresholds and implement practical checks to optimize the landing process further while avoiding unnecessary actions.\\n\\nFinal steps will consolidate these priorities.","code":"import numpy as np\\n

                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_UX6U0urLzKC4Etm1ldYodysW\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 108. Tamaño de logs: 64
episode  1 score -643.809 avg score -6.017


2024-07-04 15:30:30,368 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Bi4enb6Ysg1O8zExCXAWPkfi/runs/run_KG19EndREk2xyZ10n0EYdYgV/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 15:30:30,370 - INFO - Compilación exitosa.
2024-07-04 15:30:30,371 - INFO - Status: requires_action
2024-07-04 15:30:30,631 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Bi4enb6Ysg1O8zExCXAWPkfi/runs/run_KG19EndREk2xyZ10n0EYdYgV "HTTP/1.1 200 OK"
2024-07-04 15:30:50,633 - INFO - Status: in_progress
2024-07-04 15:30:50,970 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Bi4enb6Ysg1O8zExCXAWPkfi/runs/run_KG19EndREk2xyZ10n0EYdYgV "HTTP/1.1 200 OK"
2024-07-04 15:31:10,972 - INFO - Compilación exitosa.
2024-07-04 15:31:10,973 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 2, 

Tool call: [RequiredActionFunctionToolCall(id='call_ToCWXkpZZXPkBz3aUtRkW5kq', function=Function(arguments='{"reasoning":"The previous scoring logs show that the spacecraft was rising instead of descending. This happened because the engines were activated, increasing the Y position until the spacecraft crashed. This reversal in the upward direction is undesirable. Here are some changes and improvements to address this situation:\\n\\n1. **Control Vertical Acceleration with Less Aggressive Engine Use**: The engines were too aggressively firing both engines (action 2) in previous attempts; this needs to be more controlled. We\'ll use the vertical speed and vertical position more effectively to decide when to fire engines.\\n\\n2. **Include Horizontal Position Management**: We have previously neglected the control over horizontal positions. We should add constraints on the X position to navigate towards the vertical axis over the landing zone when close to the ground.\\n\\n3. **Combinatio

                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_FKJIQTSp4DfUvNG0AZqxnvcg\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 47
episode  1 score -618.694 avg score -8.593


2024-07-04 15:31:56,033 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Pt7jR1Z5xzNuNS9hmjfu1y2c/runs/run_qbK11cA4sFC9mYMWFBRFqKEt/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 15:31:56,037 - INFO - Compilación exitosa.
2024-07-04 15:31:56,038 - INFO - Status: requires_action
2024-07-04 15:31:56,240 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Pt7jR1Z5xzNuNS9hmjfu1y2c/runs/run_qbK11cA4sFC9mYMWFBRFqKEt "HTTP/1.1 200 OK"
2024-07-04 15:32:16,243 - INFO - Status: in_progress
2024-07-04 15:32:16,501 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Pt7jR1Z5xzNuNS9hmjfu1y2c/runs/run_qbK11cA4sFC9mYMWFBRFqKEt "HTTP/1.1 200 OK"
2024-07-04 15:32:36,506 - INFO - Compilación exitosa.
2024-07-04 15:32:36,506 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "

Tool call: [RequiredActionFunctionToolCall(id='call_PfpF2HNcszFI82BxNC6ENowE', function=Function(arguments='{"reasoning":"Comparing the provided code and the updated approach that yielded -643.809, we notice both versions tend to over-use the central engines, causing unintended acceleration and fuel consumption. The crafted logic did not properly balance the demands, suggesting the need for incremental adjustments rather than abrupt, excessive corrections.\\n\\n#### Conclusions:\\n1. **Over-Reaction**: Both codes quickly resort to firing both engines, escalating speed rather than fire engines proportionally.\\n2. **Lack of Proportionality**: There\'s no consideration for the magnitude of velocity or angle; it’s a binary all-or-nothing approach.\\n3. **Prioritization Frequency**: Crucial balancing decisions weren\'t frequent enough to accommodate dynamic changes in environmental conditions.\\n4. **Progressive Adjustment**: Introduce adjustments more gradually and variably to better mimi

                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_LqyJxwvI8l3UmBR9UXptFG2I\iteration_3.mp4\rl-video-episode-0.mp4




Número de instantes: 108. Tamaño de logs: 64
episode  1 score -643.809 avg score -6.017


2024-07-04 15:33:22,043 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_i1hvZbysSwEZuEt8VwO9sy6R/runs/run_5eEcD1LPirqXxRa2LJTOa0Bi/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 15:33:22,046 - INFO - Compilación exitosa.
2024-07-04 15:33:22,047 - INFO - Status: requires_action
2024-07-04 15:33:22,287 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_i1hvZbysSwEZuEt8VwO9sy6R/runs/run_5eEcD1LPirqXxRa2LJTOa0Bi "HTTP/1.1 200 OK"
2024-07-04 15:33:42,290 - INFO - Status: in_progress
2024-07-04 15:33:42,588 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_i1hvZbysSwEZuEt8VwO9sy6R/runs/run_5eEcD1LPirqXxRa2LJTOa0Bi "HTTP/1.1 200 OK"
2024-07-04 15:34:02,592 - INFO - Compilación exitosa.
2024-07-04 15:34:02,593 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 2, 

Tool call: [RequiredActionFunctionToolCall(id='call_EMfwbk1lc4rYslUJjp4gYlQV', function=Function(arguments='{"reasoning":"This population code introduces checks for close-to-ground conditions, corrects for x- and y-positions, and includes final checks for safe landing which simplifies and supports better control under various conditions. To further improve performance, the new code will incorporate these strategies using these steps:\\n\\n1. **Close-to-Ground Checks**: Implement checks for vertical speed only if the spacecraft is nearing the ground to prevent overshooting.\\n2. **Horizontal Speed Control**: Employ corrections based on the current horizontal velocity, pushing engines in the opposite direction to dampen drifts.\\n3. **Angle Correction**: Adjust angles only when they deviate notably from vertical to limit the spacecraft\'s tilt.\\n4. **Position Control**: Use position thresholds to engage engines, ensuring the spacecraft maintains a safe descent path.\\n5. **Safe Landing 

                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_UX6U0urLzKC4Etm1ldYodysW\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 47
episode  1 score -618.694 avg score -8.593


2024-07-04 15:34:48,315 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Bi4enb6Ysg1O8zExCXAWPkfi/runs/run_imPCrel5q4jA6miiRVqWpJdt/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 15:34:48,317 - INFO - Compilación exitosa.
2024-07-04 15:34:48,318 - INFO - Status: requires_action
2024-07-04 15:34:48,646 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Bi4enb6Ysg1O8zExCXAWPkfi/runs/run_imPCrel5q4jA6miiRVqWpJdt "HTTP/1.1 200 OK"
2024-07-04 15:35:08,650 - INFO - Status: in_progress
2024-07-04 15:35:08,957 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Bi4enb6Ysg1O8zExCXAWPkfi/runs/run_imPCrel5q4jA6miiRVqWpJdt "HTTP/1.1 200 OK"
2024-07-04 15:35:28,959 - INFO - Compilación exitosa.
2024-07-04 15:35:28,961 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "

Tool call: [RequiredActionFunctionToolCall(id='call_b7PItz7NSXoT4q6T40wel6Ex', function=Function(arguments='{"reasoning":"From the last logs, we can see the spacecraft was constantly firing the left engine, leading to an increasing angle and losing control eventually. This indicates the logic for engine firing when stabilizing angle needs to be rethought.\\n\\n### Issues Identified:\\n1. **Overuse of Action 1 (Left Engine)**: The spacecraft used the left engine excessively to correct the angle, growing progressively larger.\\n2. **Lack of Correction for Right Angles**: The code doesn\'t switch back to action 0 or action 3 efficiently.\\n3. **Improper Descent Management**: The code applies excessive vertical thrust, leading to uncontrollable states.\\n\\n### Solution Approach:\\n1. **Horizontal and Angle Stability**: Addition of more checks and balances based on both angles and horizontal speed, and ensuring right and left engines are not overused sequentially.\\n2. **Control Descent:**

                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_FKJIQTSp4DfUvNG0AZqxnvcg\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 91. Tamaño de logs: 56
episode  1 score -705.548 avg score -7.839


2024-07-04 15:36:14,425 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Pt7jR1Z5xzNuNS9hmjfu1y2c/runs/run_yjrPjgIkf4FR5tsQx1hmTPCm/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 15:36:14,427 - INFO - Compilación exitosa.
2024-07-04 15:36:14,427 - INFO - Status: requires_action
2024-07-04 15:36:14,649 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Pt7jR1Z5xzNuNS9hmjfu1y2c/runs/run_yjrPjgIkf4FR5tsQx1hmTPCm "HTTP/1.1 200 OK"
2024-07-04 15:36:34,652 - INFO - Status: in_progress
2024-07-04 15:36:34,965 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Pt7jR1Z5xzNuNS9hmjfu1y2c/runs/run_yjrPjgIkf4FR5tsQx1hmTPCm "HTTP/1.1 200 OK"
2024-07-04 15:36:54,969 - INFO - Compilación exitosa.
2024-07-04 15:36:54,970 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "

Tool call: [RequiredActionFunctionToolCall(id='call_z7ZHc2b4QxHRUGzMZ96MmJIe', function=Function(arguments='{"reasoning":"Comparing the provided code (achieved -618.694) with our most recent iteration, several distinct strategies and criteria emerge. Notably, the best-performing code introduces checks to modulate engine firing, leveraging position proximity to the ground and effective thrust adjustments on yaw (balance conditions). Here’s an analysis of differences and a guided blend to further refine performance.\\n\\n#### Key Insights from Comparison:\\n1. **Positional Awareness**: The optimal code uses `y_position` astutely, firing engines based on proximity to the ground—a crucial insight absent from previous versions.\\n2. **Balanced Controls**: The optimal code combines xy-velocity and angle checks with `if-else` prioritization—allowing smoother control, unlike the all-or-nothing blanket thrusting approach resulting in minimal impact.\\n3. **Vertical Suppression**: The superior c

                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\Asitente_asst_LqyJxwvI8l3UmBR9UXptFG2I\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 47
episode  1 score -618.694 avg score -8.593


2024-07-04 15:38:01,541 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_i1hvZbysSwEZuEt8VwO9sy6R/runs/run_sHu4MjSQmH3g33bwk1crRfE3/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-04 15:38:01,544 - INFO - Compilación exitosa.
2024-07-04 15:38:01,545 - INFO - Status: requires_action
2024-07-04 15:38:01,771 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_i1hvZbysSwEZuEt8VwO9sy6R/runs/run_sHu4MjSQmH3g33bwk1crRfE3 "HTTP/1.1 200 OK"
2024-07-04 15:38:21,776 - INFO - Status: in_progress
2024-07-04 15:38:22,051 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_i1hvZbysSwEZuEt8VwO9sy6R/runs/run_sHu4MjSQmH3g33bwk1crRfE3 "HTTP/1.1 200 OK"
2024-07-04 15:38:42,055 - INFO - Compilación exitosa.
2024-07-04 15:38:42,056 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "

Tool call: [RequiredActionFunctionToolCall(id='call_WIeU9n0cquB5owszXt63rYCf', function=Function(arguments='{"reasoning":"To improve from the last attempt and achieve better results, we need to adjust the priorities and refine thresholds with specific conditions gained from comparison and iteration.\\n\\nKey insights from comparing population logs:\\n1. The past logs showed a constant downward vertical speed, resulting in a heavier landing and crashes. We need to ensure slower descent near the ground.\\n2. Horizontally drift should be corrected better when the descent control is in action.\\n3. Timing of angle corrections and engine use is crucial for stability.\\n\\nUpdated Strategy:\\n1. **Stabilize Descent Speed**: Reduce downwards speed more effectively in the final approach towards the ground, with stricter and dynamic vertical speed controls.\n2. **Horizontal and Angular Stability**: Tightening thresholds for x-velocity and the angle deviations, ensuring continuous position check

JSONDecodeError: Invalid control character at: line 1 column 745 (char 744)

## Tasks
- Corregir rotación.
- Cantidad de logs.
- Partir de ejemplo exitoso para afinarlo. (y generalizar)
- Incluir take step back.
- Probar con 2 modelos. Uno encargado del razonamiento y otro de codificar. (O uno que retroalimente al otro con las conclusiones obtenidas)