#### Imports

In [1]:
import time
from openai import OpenAI
from Assistant import AssistantOpenAI
import logging
import os
import json
import gymnasium as gym
import random
import matplotlib.pyplot as plt
import os
import time
from git import Repo

import numpy as np
#from collections import deque, namedtuple

# For visualization
import gymnasium.wrappers.record_video as record_video

#### Parameters

In [2]:
num_episodes = 200
ENV_NAME = 'LunarLander-v2'
ARCLABKEY_OPENAI = "sk-proj-DvHDR3hpgbm2r3kCA9jKT3BlbkFJL57ABXkfaWAIYKBxdhM6"
ARCLABKEY_OPENAI = "sk-proj-GvaDXazpibWA2M1I5Pu2T3BlbkFJxDuKlr9AcoVG98ctJZ7Q"

#### Function calling.

Función encargada de almacenar el código generado mediante la opción de function calling del asistente de OpenAI.

In [3]:
def store_code_in_file(reasoning, code, filename,):
    """ Store code in a file
    
    Args:
        reasoning: str: Pre-coding reasoning on log analysis and the influence of actions on observations and steps to follow to achieve a successful landing.
        code: str: code to store. Use the reasoning parameter to generate this code.
        filename: str: filename to store code in
    
    Returns:
        None
    """
    print(f"Model reasoning: {reasoning}")
    with open(filename, 'w') as f:
        f.write(code)
        
store_code_in_file_schema = {
    "name": "store_code_in_file",
    "description": "Store code in a file",
    "parameters": {
        "type": "object",
        "properties": {
            "reasoning": {
                "type": "string",
                "description": "Pre-coding deep reasoning on log analysis and the influence of actions on observations and steps to follow to achieve a successful landing. It is an expnasion but not the code itself."
            },
            "code": {
                "type": "string",
                "description": "The Python code to store. Use the reasoning parameter to generate this code."
            },
            "filename": {
                "type": "string",
                "description": "The filename to store the code in."
            }
        },
        "required": ["code", "filename"]
    }
}

TOOLS = [{"type": "function", "function": store_code_in_file_schema}]

#### Entorno Lunnar Lander

In [4]:
env = gym.make('LunarLander-v2')
print('State shape: ', env.observation_space.shape)
print('Number of actions: ', env.action_space.n)

State shape:  (8,)
Number of actions:  4


#### Código inicial

In [5]:
agent_initial_code = """
import random

def act(observation):
    '''
    The function that codifies the action to be taken in each instant of time.

    Args:
        observation (numpy.array):
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": "X position",
                "1": "Y position",
                "2": "X velocity",
                "3": "Y velocity",
                "4": "Angle",
                "5": "Angular velocity",
                "6": "Left contact sensor",
                "7": "Right contact sensor"
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]

    Returns:
        Integer  : The action to be taken.
        "options": {
                '0' : "Switch off engines",
                '1' : "Push left engine",
                '2' : "Push both engines (upwards)",
                '3' : "Push right engine"
            }
    '''
    return random.randint(0, 3)
"""

#### Mapeador de logs a JSON

In [6]:
log_description = {
    "description": "Log data for each step of the spacecraft landing environment.",
    "landing attempt": {
        "type": "integer",
        "description": "The episode number."
    },
    "logs": {
        "time": {
            "type": "integer",
            "description": "The instant within the landing attempt where the current log is taken."
        },
        "action": {
            "type": "integer",
            "description": "The possible actions taken.",
            "options": {
                '0' : "Switch off engines",
                '1' : "Push left engine",
                '2' : "Push both engines (upwards)",
                '3' : "Push right engine"
            }
        },  
        "current status": {
            "type": "array",
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": {
                    "name": "X position",
                    "description": "The horizontal position of the spacecraft relative to the landing zone."
                },
                "1": {
                    "name": "Y position",
                    "description": "The vertical position of the spacecraft relative to the landing zone."
                },
                "2": {
                    "name": "X velocity",
                    "description": "The horizontal velocity of the spacecraft."
                },
                "3": {
                    "name": "Y velocity",
                    "description": "The vertical velocity of the spacecraft."
                },
                "4": {
                    "name": "Angle",
                    "description": "The angle of the spacecraft relative to the vertical (left negative, right positive)."
                },
                "5": {
                    "name": "Angular velocity",
                    "description": "The rate of change of the angle of the spacecraft."
                },
                "6": {
                    "name": "Left contact sensor with landing zone",
                    "description": "Indicates whether the left side of the spacecraft is in contact with the landing zone."
                },
                "7": {
                    "name": "Right contact sensor with landing zone",
                    "description": "Indicates whether the right side of the spacecraft is in contact with the landing zone."
                }
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1],
            
        }, 
        "score": {
            "type": "number",
            "description": "The score received for the action."
        },  
        "completed": {
            "type": "boolean",
            "description": "Whether the landing event has ended (landing or accident)."
        }
    },
    "total score": {
        "type": "number",
        "description": "The total score received for the landing attempt."
    }
}

#### Esquema JSON MODE

In [7]:
json_shcema_response = {
    "reasoning": {
        "type": "string",
        "description": "Pre-coding deep reasoning on log analysis and the influence of actions on observations and steps to follow to achieve a successful landing. It is an expnasion but not the code itself."
    },
    "code": {
        "type": "string",
        "description": "The Python code to store. Use the reasoning parameter to generate this code."
    },
    "filename": {
        "type": "string",
        "description": "The filename to store the code in (Action.py)."
    }
}

#### LOGS

In [8]:
import json

def format_step_data(instant, action, next_state, reward, terminated):
    """ Format the step data into a JSON string
    
    Args:
        instant: int: the instant within the landing attempt where the current log is taken
        action: int: the action taken
        next_state: np.array: the next state of the environment
        reward: float: the reward received
        terminated: bool: whether the landing event has ended
        
    Returns:
        str: the step data formatted as a JSON string
    """
    # Convertir el array numpy a una lista
    next_state_list = next_state.tolist()

    # Redondear los elementos de la lista a 4 decimales
    next_state_list_rounded = [round(x, 3) for x in next_state_list]

    step_data = {
        'time': instant,
        'action' : int(action),
        'current status': next_state_list_rounded,  # Convert numpy array to list
        'score': round(reward, 3),
        #'completed': terminated,
        #'truncated': truncated
        #'info': info
    }

    # Convert the dictionary to a JSON string
    #step_data_json = json.dumps(step_data)

    return step_data


def format_episode_logs(logs, episode, total_score):
    """ Format the logs into a JSON string
    
    Args:
        logs: list: the logs for each step of the environment
        episode: int: the episode number
        
    Returns:
        str: the logs formatted as a JSON string
    """
    logs_data = {
        'landing attempt': episode,
        'logs': logs,
        'total score': round(total_score, 3)
    }

    # Convert the dictionary to a JSON string
    logs_json = json.dumps(logs_data)

    return logs_json

#### Commit changes.

In [9]:
def commit_changes(repo_path, commit_message):
    """ Commit changes to the repository.

    Args:
        repo_path (str): Path to the repository.
        commit_message (str): The commit message.
    """
    repo = Repo(repo_path)
    repo.git.add(update=True)
    repo.index.commit(commit_message)
    origin = repo.remote(name='origin')
    origin.push()

#### Código encargado de ejecutar los eventos en el entorno.

In [10]:
import Action

def lunar_lander(max_t=1000, n_games=1, display=False, seed=38, agent=None, recoder=False, video_filename='video/video.mp4'):
    """ Run the Lunar Lander environment
    
    Args:
        max_t: int: the maximum number of timesteps
        n_games: int: the number of games to play
        display: bool: whether to display the environment
        seed: int: the seed for the environment
        agent: object: the agent to use
        recoder: bool: whether to record the video
        video_filename: str: the filename for the video
        
    Returns:
        str: the logs for the landing attempts
    """
    
    # Incluimos la opción del entorno gráfico y la de grabación.
    if display:
        env = gym.make(ENV_NAME, render_mode='human')
        if recoder:
            env = gym.make(ENV_NAME, render_mode='rgb_array')
            env.reset() 
            video_recorder = record_video.RecordVideo(env, video_filename)
    else:
        env = gym.make(ENV_NAME)

    # Bucle principal de ejecución de los episodios.
    logs = []
    for episode in range(1, n_games+1):
        # Revisamos que la semilla sea un número entero o una lista de semillas.
        if isinstance(seed, list):
            semilla = seed[episode-1]
        else:
            semilla = seed
        print(f"Semilla {semilla}")    
        state = env.reset(seed=semilla) # Set a seed for the environment
        state = state[0] # Eliminamos el diccionario vacio y dejamos unicamente el estado de 8 elementos.
        score = 0
        instant = 0
        episode_actions = []
        if recoder: # En caso de que se quiera grabar el video, se inicia el grabador.
            video_recorder.start_video_recorder()
            
        for i in range(max_t): # Bucle de ejecución de los instantes de tiempo.
            
            # Seleccionamos el método de elegir la acción (agente exitoso o método de decisión del asistente)
            if agent: 
                action = agent.act(state)
            else:
                action = Action.act(state)
            
            # Avanzamos un instante de tiempo en el entorno en función de si se quiere grabar el video o no.
            if recoder:
                next_state, reward, terminated, truncated, info = video_recorder.step(action)
            else:
                next_state, reward, terminated, truncated, info = env.step(action)
            
            # Formateamos los datos del instante de tiempo y los alcenamos.
            json_logs = format_step_data(instant, action, next_state, reward, terminated)
            score += reward
            instant += 1
            if len(episode_actions) < 20 or len(episode_actions) % 2 == 0 or terminated:
                logs.append(json_logs)
            #logs.append(json_logs)
            episode_actions.append(action)
            state = next_state
            if terminated: # Condición de salida del bucle, si el episodio ha terminado.
                break
        json_episode_logs = format_episode_logs(logs, episode, score) 
        
        print(f"Número de instantes: {instant+1}. Tamaño de logs: {len(logs)}")
        print('episode ', episode, 'score %.3f' % float(score), 'avg score %.3f' % (float(score) / instant))
        
    if recoder:
        video_recorder.close()
    else:
        env.close()
        
    return json_episode_logs

#### Logs del código inicial

In [11]:
# Genera una lista de n números enteros aleatorios entre 0 y 1000.
seeds = [random.randint(0, 1000) for i in range(4)]
initial_logs = lunar_lander(n_games=4, seed=seeds, display=True)
initial_logs

Semilla 626
Número de instantes: 414. Tamaño de logs: 217
episode  1 score 237.296 avg score 0.575
Semilla 411
Número de instantes: 175. Tamaño de logs: 315
episode  2 score -491.257 avg score -2.823
Semilla 217
Número de instantes: 147. Tamaño de logs: 399
episode  3 score -549.970 avg score -3.767
Semilla 533
Número de instantes: 247. Tamaño de logs: 533
episode  4 score -116.103 avg score -0.472


'{"landing attempt": 4, "logs": [{"time": 0, "action": 3, "current status": [-0.011, 1.431, -0.528, 0.425, 0.01, 0.085, 0.0, 0.0], "score": 0.977}, {"time": 1, "action": 3, "current status": [-0.016, 1.44, -0.517, 0.398, 0.012, 0.039, 0.0, 0.0], "score": 1.409}, {"time": 2, "action": 3, "current status": [-0.021, 1.448, -0.505, 0.371, 0.012, -0.009, 0.0, 0.0], "score": 1.735}, {"time": 3, "action": 3, "current status": [-0.026, 1.456, -0.494, 0.344, 0.009, -0.052, 0.0, 0.0], "score": 1.9}, {"time": 4, "action": 3, "current status": [-0.031, 1.463, -0.485, 0.318, 0.005, -0.087, 0.0, 0.0], "score": 1.909}, {"time": 5, "action": 3, "current status": [-0.035, 1.469, -0.478, 0.291, -0.001, -0.118, 0.0, 0.0], "score": 1.756}, {"time": 6, "action": 3, "current status": [-0.04, 1.475, -0.469, 0.265, -0.009, -0.155, 0.0, 0.0], "score": 0.703}, {"time": 7, "action": 3, "current status": [-0.045, 1.481, -0.461, 0.238, -0.018, -0.186, 0.0, 0.0], "score": 0.474}, {"time": 8, "action": 3, "current s

#### Logs exitosos
Utilizar otras semillas para que no memorice el entorno

In [12]:
import DuelingDQN

agent = DuelingDQN.Agent(num_observaciones=8, num_acciones=4, red_modelo=DuelingDQN.DuelingQNetwork, seed=0)
# Cargamos los pesos del agente entrenado.
agent.load_weights('checkpoint_Dueling.pth')

seeds = [130, 412]

success_logs = lunar_lander(n_games=len(seeds), display=True, seed=seeds, agent=agent)
success_logs

KeyboardInterrupt: 

#### Bucle iterativo

Sección principal del código encargada de conectar con el asistente de la API de OpenAI e iterar en la generación de código nuevo a partir de los registros del generado previamente.

In [None]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    
    # Crea un hilo
    hilo = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)

    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id, tool_choice='required')
            response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
            while response.status not in ["completed", "failed", "requires_action"]:
                logger.info(f"Status: {ejecucion.status}")
                response = agente.get_run(ejecucion.id, hilo.id)
                time.sleep(20)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  
            reasoning = code_dict["reasoning"]

            logger.info(f"\nRazonamiento:\n{reasoning}")
            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(reasoning, code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"4o Setpback. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success. Remember that the code is a reflection of your reasoning and the way you approach the problem."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


#### Logger

In [None]:
def configura_log(nombre_archivo):
    """ 
    Configura el logger para que guarde los logs en un archivo y los muestre en la consola.
    
    Args:
        nombre_archivo: str Nombre del archivo donde se guardarán los logs.
        
    Returns:
        logger: logging.Logger Objeto logger configurado.
    """
    # Crear la carpeta logs si no existe
    if not os.path.exists('logs'):
        os.makedirs('logs')
    # Configura el logger
    logging.basicConfig(filename=f'logs/{nombre_archivo}', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger()
    
    # Añade un StreamHandler para mostrar los logs en la consola
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)
    
    return logger

#### Ejecución con el asistente

Prompt inicial y mensajes del sistema para el asistente.

In [None]:
DESCRIPTION = "You are an expert programer in Pyhton. Your specialty is to generate the code responsible for making decisions about actions to be taken in various spacecraft landing environments.The objective is to land the spacecraft within a target zone in the shortest possible time and very gently. A scoring system is used to evaluate the landings, which must be maximized."
INSTRUCTIONS = f"""Your task is:
You have two modes of action depending on whether the use of function call is required. Reason deeply about what you are asked to do or code the requested task.
1. Analyze and reason about the logs received in the last landing attempts.
2. Your goal is to be able to make the correct decision based on what you have learned from the results of previous iterations. You must code the decision making based on your reasoning in a Python function.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (angle and location), keep falling under control and then land gently at the end.
    - It is mandatory to use all the elements of the array of observations received by parameter in your code when deciding what action to take at any given moment. Both position and velocities must be taken into account to know how the ship is doing and towards which states it is heading. All of these must be considered to achieve stability.
    - Learn how actions taken affect the future states of the spacecraft in the logs of past events so that you can take this into account when developing code to reach the landing zone.
    - The landing zone is in the central area of the x-axis.
    - Carefully choose the actions involving the lateral engines according to the rotation of the spacecraft.
4. This is the schema of the landing logs: {log_description}.
5. You should analyze the performance that appear in the logs of the code you have generated. You should improve the code generated in the 'act' function in the last iteration without fear of making major changes, seeking to maximize the score received and generate a higher quality code.
6. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. You have to be able to learn from it to land successfully with any other conditions. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}.Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_betterprompt.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-02 00:37:43,931 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 00:37:43,931 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 00:37:44,122 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 00:37:44,122 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 00:37:44,385 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_QoB6MCbRbYhGkgNXtosUNZOa/messages "HTTP/1.1 200 OK"
2024-07-02 00:37:44,385 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_QoB6MCbRbYhGkgNXtosUNZOa/messages "HTTP/1.1 200 OK"
2024-07-02 00:37:44,532 - INFO - Iteration: 1
2024-07-02 00:37:44,532 - INFO - Iteration: 1
2024-07-02 00:37:44,929 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_QoB6MCbRbYhGkgNXtosUNZOa/runs "HTTP/1.1 200 OK"
2024-07-02 00:37:44,929 - INFO - HTTP Request: POST htt

Tool call: [RequiredActionFunctionToolCall(id='call_aFYAf5toEx9JxbWztDqncIya', function=Function(arguments='{"code":"import numpy as np\\n\\ndef act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



2024-07-02 00:38:30,678 - ERROR - Error: Missing required arguments; Expected either ('thread_id' and 'tool_outputs') or ('thread_id', 'stream' and 'tool_outputs') arguments to be given
Traceback (most recent call last):
  File "C:\Users\adria\AppData\Local\Temp\ipykernel_19840\4102630422.py", line 69, in create_and_run_llm_loop
    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])
  File "c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\Assistant.py", line 171, in devolver_respuesta
    response = self.client.beta.threads.runs.submit_tool_outputs(run_id, thread_id=thread_id, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\adria\anaconda3\Lib\site-packages\openai\_utils\_utils.py", line 276, in wrapper
    raise TypeError(msg)
TypeError: Missing required arguments; Expected either ('thread_id' and 'tool_outputs') or ('thread_id', '

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 47
episode  1 score -612.950 avg score -8.513


TypeError: Missing required arguments; Expected either ('thread_id' and 'tool_outputs') or ('thread_id', 'stream' and 'tool_outputs') arguments to be given

#### Prueba de cambio grande en el prompt.

In [14]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (speed, angle and location), keep falling under control and then land gently at the end.
    - Use all the elements of the vector of observations, all are relevant to make the right decision.
    - Pay close attention to successful events.
    - Environment and ship conditions may change but your code must be effective for all cases.
    - Find a balance in your landing policy to maximize your score (must exceed 200 points).
    - If you are rotating to the left (negative values), you should fire the left engine and vice versa. But always prioritizing a controlled speed with the main engine.
4. This is the schema of the landing logs: {log_description}.
5. You should analyze the performance that appear in the logs of the code you have generated. You should improve the code generated in the 'act' function in the last iteration without fear of making major changes, seeking to maximize the score received and generate a higher quality code.
6. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?. Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-02 01:28:50,965 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 01:28:51,143 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 01:28:51,438 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/messages "HTTP/1.1 200 OK"
2024-07-02 01:28:51,571 - INFO - Iteration: 1
2024-07-02 01:28:51,986 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs "HTTP/1.1 200 OK"
2024-07-02 01:28:52,391 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7 "HTTP/1.1 200 OK"
2024-07-02 01:28:52,394 - INFO - Status: queued
2024-07-02 01:28:52,595 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7 "HTTP/1.1 200 OK"
2024-07-02 01:29:12,599 - INFO - Status: queued
2024-07-02 01:29:12,854

Tool call: [RequiredActionFunctionToolCall(id='call_09KGpk8yWlzAUJfv4sZ0RvbY', function=Function(arguments='{"reasoning": "### Analysis and Reasoning\\n\\n#### Observations and Actions:\\n1. **X and Y Position (`obs[0]`, `obs[1]`)**: These are the horizontal and vertical positions of the spacecraft. The goal is to bring the X position to 0 (centered on the landing pad) and the Y position to 0 (landed on the pad).\\n2. **X and Y Velocity (`obs[2]`, `obs[3]`)**: These are the velocities in the X and Y directions. We aim to reduce these to near zero for a soft landing.\\n3. **Angle (`obs[4]`)**: This is the tilt angle of the spacecraft. We aim to keep this at 0 (vertical) to avoid tipping over.\\n4. **Angular Velocity (`obs[5]`)**: This is the rate at which the angle is changing. This should also be minimized to ensure the spaceship is stable.\\n5. **Left and Right Contact Sensors (`obs[6]`, `obs[7]`)**: These indicate if the left or right side of the spacecraft is in contact with the gro

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 145. Tamaño de logs: 83
episode  1 score -23.203 avg score -0.161


2024-07-02 01:29:37,623 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:29:37,625 - INFO - Compilación exitosa.
2024-07-02 01:29:37,626 - INFO - Status: requires_action
2024-07-02 01:29:37,836 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7 "HTTP/1.1 200 OK"
2024-07-02 01:29:57,839 - INFO - Status: in_progress
2024-07-02 01:29:58,153 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7 "HTTP/1.1 200 OK"
2024-07-02 01:30:18,157 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_UinaJHyHMgLlfrraivUpESvO', function=Function(arguments='{"reasoning":"### Analysis of Performance\\n\\nBased on the latest log, we can observe the following issues and necessary adjustments:\\n\\n1. **Inaction During Flight**: For a significant portion of the logs, the spacecraft did not perform any action (action 0) despite requiring stabilization and velocity adjustments. This led to a continuous accumulation of negative scores.\\n2. **Angular Stabilization**: The initial steps correctly assessed no significant angular issues, but corrective actions continued to be minimal or late.\\n3. **Horizontal and Vertical Control**: Actions to manage horizontal and vertical velocities need to be more responsive and accountable for both current velocity and position.\\n4. **Boosters Activation**: The code needs a more effective control routine for when to use which engines to maximize stabilization and minimize unwanted tilts or velocities.\\n

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 78. Tamaño de logs: 49
episode  1 score -116.178 avg score -1.509


2024-07-02 01:31:03,144 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_k50bHdiqmVqIjvXUI2M1HDLC/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:31:03,149 - INFO - Compilación exitosa.
2024-07-02 01:31:03,149 - INFO - Status: requires_action
2024-07-02 01:31:03,354 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_k50bHdiqmVqIjvXUI2M1HDLC "HTTP/1.1 200 OK"
2024-07-02 01:31:23,358 - INFO - Status: in_progress
2024-07-02 01:31:23,609 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_k50bHdiqmVqIjvXUI2M1HDLC "HTTP/1.1 200 OK"
2024-07-02 01:31:43,611 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_Xt1NDiRvOUIFCBDveYWYRgeC', function=Function(arguments='{"reasoning":"### Analysis of Performance and Adjustments Needed\\n\\n1. **Inaction During Flight**: The spacecraft remains inactive (action 0) despite needing corrections in its state. This improper action leads to the build-up of negative scores as no stabilization or velocity adjustments are being made.\\n2. **Angular Stabilization and Velocity Correction**: Late or ineffective corrections when the spacecraft starts tilting or gaining excessive velocities (both angular and linear).\\n3. **Too Many Corrections of Single Type**: The spacecraft consistently uses action 3 without achieving the required adjustments.\\n4. **Combining Stabilization and Velocity Control**: Need to manage angular corrections in combination with linear velocity adjustments effectively.\\n\\n### Revised Strategy\\n\\n1. **Continuous Adjustment**: Ensure that adjustments are made continuously rather than 

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 86. Tamaño de logs: 53
episode  1 score -116.692 avg score -1.373


2024-07-02 01:32:29,064 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_WXkvgqehu6HQvfvKUV4zAF8Z/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:32:29,065 - INFO - Compilación exitosa.
2024-07-02 01:32:29,066 - INFO - Status: requires_action
2024-07-02 01:32:29,273 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_WXkvgqehu6HQvfvKUV4zAF8Z "HTTP/1.1 200 OK"
2024-07-02 01:32:49,276 - INFO - Status: in_progress
2024-07-02 01:32:49,583 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_WXkvgqehu6HQvfvKUV4zAF8Z "HTTP/1.1 200 OK"
2024-07-02 01:33:09,588 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_9SDGGnjEawlwGNfEa36MYxk1', function=Function(arguments='{"reasoning":"### Analysis of Recent Performance\\n\\nThe recent logs show that the spacecraft again failed to land successfully. The code didn\'t manage to correct the trajectory effectively, leading to unnecessary extended periods of inaction and incorrect sequential thrust applications. Notably, the code failed to properly balance horizontal velocity, vertical descent rate, and spacecraft stabilization.\\n\\n### Problems Identified:\\n1. **Extended Inaction**: No action was taken when corrections were evidently needed, leading to continuous increases in vertical and horizontal velocities.\\n2. **Sequential Thrusts**: Excessive repeated side thrusts were applied without balancing or correcting the overall motion effectively.\\n3. **Incorrect Prioritization**: Angular stabilization seemed to overshadow the need to manage descent rate and horizontal drift effectively.\\n\\n### Mo

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 86. Tamaño de logs: 53
episode  1 score -116.692 avg score -1.373


2024-07-02 01:33:54,509 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_e1PASc5cGFVYMq46o9beOyxA/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:33:54,514 - INFO - Compilación exitosa.
2024-07-02 01:33:54,517 - INFO - Status: requires_action
2024-07-02 01:33:54,740 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_e1PASc5cGFVYMq46o9beOyxA "HTTP/1.1 200 OK"
2024-07-02 01:34:14,742 - INFO - Status: in_progress
2024-07-02 01:34:15,012 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_e1PASc5cGFVYMq46o9beOyxA "HTTP/1.1 200 OK"
2024-07-02 01:34:35,016 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_T5Nlba0CpBh7SAAPOx0rHZCR', function=Function(arguments='{"reasoning":"### Reanalysis and Code Revision\\n\\nUpon carefully analyzing the recent logs and performance, the issue still lies in timely managing the spacecraft\'s trajectory and velocity corrections. The sequence of actions observed has shown prolonged execution of a single action type, leading to insufficient response to accumulated velocity, both horizontal and vertical. The spacecraft’s descent was not controlled properly, leading it to crash.\\n\\n### Identified issues\\n1. **Prolonged Inaction**: Multiple consecutive steps executed no action, making the spaceship quickly deviate further from the desired descent path.\\n2. **Insufficient Handling of Descent Speed**: Although vertical stabilization commands were triggered, they were neither appropriately timed nor frequent enough.\\n3. **Over-corrective Angular Adjustments**: Excessive thrust applications in one direction

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 86. Tamaño de logs: 53
episode  1 score -116.692 avg score -1.373


2024-07-02 01:35:19,698 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_nOjPMiKC59rz6dTUyDqrDpkW/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:35:19,701 - INFO - Compilación exitosa.
2024-07-02 01:35:19,702 - INFO - Status: requires_action
2024-07-02 01:35:19,905 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_nOjPMiKC59rz6dTUyDqrDpkW "HTTP/1.1 200 OK"
2024-07-02 01:35:39,910 - INFO - Status: in_progress
2024-07-02 01:35:40,156 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_nOjPMiKC59rz6dTUyDqrDpkW "HTTP/1.1 200 OK"
2024-07-02 01:36:00,161 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

user: [TextContentBlock(text=Text(annotations=[], value='These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0.085, 0.0, 0.0], "score": 0.121}, {"time": 2, "action": 0, "current status": [0.015, 1.421, 0.38, 0.077, -0.017, -0.085, 0.0, 0.0], "score": 0.013}, {"time": 3, "action": 0, "current status": [0.019, 1.423, 0.38, 0.051, -0.021, -0.085, 0.0, 0.0], "score": -0.104}, {"time": 4, "action": 0, "current status": [0.023, 1.423, 0.38, 0.024, -0.026, -0.085, 0.0, 0.0], "score": -0.226}, {"time": 5, "action": 0, "current status": [0.026, 1.423, 0.38, -0.003, -0.03, -0.085, 0.0, 0.0], "score": -0.353}, {"time": 6, "action": 0, "current status": [0.03, 1.422, 0.38, -0.029, -0.034, -0.085, 0.0, 0.0], "score": -0.481}, {"time": 7, "action": 0, "current status": [0.034, 1.42

2024-07-02 01:36:01,593 - INFO - HTTP Request: DELETE https://api.openai.com/v1/assistants/asst_oz37cI4Jj4423yR963RqZPpk "HTTP/1.1 200 OK"
2024-07-02 01:36:01,926 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v "HTTP/1.1 200 OK"
2024-07-02 01:36:01,927 - INFO - 
Ejecución finalizada.




Asistente vaciado.
Run(id='run_nOjPMiKC59rz6dTUyDqrDpkW', assistant_id='asst_oz37cI4Jj4423yR963RqZPpk', cancelled_at=None, completed_at=1719876926, created_at=1719876875, expires_at=None, failed_at=None, incomplete_details=None, instructions="To complete the task you must follow the following steps and indications:\n1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.\n2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:\n    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).\n    - Increased/decreased the slower/faster the spacecraft is moving.\n    - Decreased the more the spacecraft is tilted.\n    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.\n    - Receive 100 points 

In [14]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?. Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-02 20:13:38,487 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 20:13:38,682 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 20:13:38,976 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/messages "HTTP/1.1 200 OK"
2024-07-02 20:13:39,134 - INFO - Iteration: 1
2024-07-02 20:13:39,768 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs "HTTP/1.1 200 OK"
2024-07-02 20:13:40,001 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Df58B0YObxg72VbXW3CpIngt "HTTP/1.1 200 OK"
2024-07-02 20:13:40,003 - INFO - Status: queued
2024-07-02 20:13:40,209 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Df58B0YObxg72VbXW3CpIngt "HTTP/1.1 200 OK"
2024-07-02 20:14:00,212 - INFO - Status: queued
2024-07-02 20:14:00,474

Tool call: [RequiredActionFunctionToolCall(id='call_OImymi36vVR9f8le1Dvbn3N8', function=Function(arguments='{"reasoning":"Analyzing the successful and unsuccessful landing attempts, the spacecraft\'s behavior can be broken down into several key observations and corresponding actions:\\n\\n1. **X Position and Velocity**: Ideally, keep the spacecraft near the center (X position ≈ 0) with minimal horizontal velocity (X velocity ≈ 0). Pushing the left engine (Action 1) moves the spacecraft right, and pushing the right engine (Action 3) moves it left. Pushing both engines (Action 2) slightly affects the horizontal velocity but mainly affects vertical dynamics.\\n2. **Y Position and Velocity**: The goal is to reduce the Y velocity (downward speed) to zero as the spacecraft approaches the ground. Pushing both engines (Action 2) reduces the Y velocity (slows descent), while inaction (Action 0) allows gravity to increase the descent speed.\\n3. **Angle and Angular Velocity**: Keep the spacecraf

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 77. Tamaño de logs: 49
episode  1 score -141.601 avg score -1.863


2024-07-02 20:14:45,419 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Df58B0YObxg72VbXW3CpIngt/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 20:14:45,421 - INFO - Compilación exitosa.
2024-07-02 20:14:45,421 - INFO - Status: requires_action
2024-07-02 20:14:45,650 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Df58B0YObxg72VbXW3CpIngt "HTTP/1.1 200 OK"
2024-07-02 20:15:05,652 - INFO - Status: in_progress
2024-07-02 20:15:05,951 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Df58B0YObxg72VbXW3CpIngt "HTTP/1.1 200 OK"
2024-07-02 20:15:25,954 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_IuQmHgh7jeZem8GlDbZdLGRx', function=Function(arguments='{"reasoning":"Analyzing the logs from the recent attempt, it\'s clear that the spacecraft faced several key issues:\\n\\n1. **Excessive Use of Side Engines (Action 1 and Action 3)**: The spacecraft increased its horizontal velocity and did not stabilize its angle, which led to the angle drifting significantly from 0 over time.\\n2. **Angle and Angular Velocity Control**: The spacecraft\'s angle increased continuously, leading to stability issues. Even though the code attempted to adjust based on the angle, it resulted in continuous use of the side engines, which exacerbated the problem.\\n3. **Insufficient Control of Descent Rate**: The spacecraft did not appropriately manage its vertical velocity. An effective descent strategy would require prioritizing control of Y velocity before worrying excessively about horizontal corrections.\\n4. **Score Loss Due to Engine Usage**: Excess

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 99. Tamaño de logs: 60
episode  1 score -109.814 avg score -1.121


2024-07-02 20:16:51,570 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_ziFNMzggavQwam4pp36ngD0H/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 20:16:51,572 - INFO - Compilación exitosa.
2024-07-02 20:16:51,572 - INFO - Status: requires_action
2024-07-02 20:16:51,817 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_ziFNMzggavQwam4pp36ngD0H "HTTP/1.1 200 OK"
2024-07-02 20:17:11,819 - INFO - Status: in_progress
2024-07-02 20:17:12,068 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_ziFNMzggavQwam4pp36ngD0H "HTTP/1.1 200 OK"
2024-07-02 20:17:32,071 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_hpZ7X0VgEGQI9e5lcz98Ldxi', function=Function(arguments='{"reasoning":"The latest logs indicate several persistent issues and some new insights:\\n1. **Angle and Angular Velocity Control**: The angle control improved initially, but after a few steps, the angle started diverging again. This implies that the simple threshold-based approach is insufficient. The spacecraft\'s tilt and changing angular velocity need more dynamic adjustments.\\n2. **Vertical Descent Rate**: The vertical descent was relatively stable, but the score impact due to continued usage of engines was still significant. Hence, a more refined strategy is required that uses the center engine sparingly.\\n3. **Excessive Engine Use**: The alternate firing between left and right engines caused continued oscillation, which degraded performance. The back-and-forth corrections must be minimized, and a more stable state maintained.\\n4. **Complex Maneuver Handling**: In the sc

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 99. Tamaño de logs: 60
episode  1 score -109.814 avg score -1.121


2024-07-02 20:18:58,745 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_ir3lt7EzkHRznOaTSWXQuDKy/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 20:18:58,748 - INFO - Compilación exitosa.
2024-07-02 20:18:58,749 - INFO - Status: requires_action
2024-07-02 20:18:59,058 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_ir3lt7EzkHRznOaTSWXQuDKy "HTTP/1.1 200 OK"
2024-07-02 20:19:19,060 - INFO - Status: in_progress
2024-07-02 20:19:19,351 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_ir3lt7EzkHRznOaTSWXQuDKy "HTTP/1.1 200 OK"
2024-07-02 20:19:39,353 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_5qe8TtFnaQJcNSZMM9eOMOyn', function=Function(arguments='{"reasoning":"From reviewing the recent logs, here are the persistent issues and insights:\\n\\n1. **Excessive Engine Use (Horizontal Control)**: Even with the angle control, continuous use of left and right engines persists, leading to oscillations and increased horizontal speed.\\n2. **Angle and Angular Velocity Control**: While combined, the strategy still fails at dynamically responding to changing velocities quickly enough, often leading to opposite engine fires which compound the problem.\\n3. **Vertical Descent and Engine Use**: Over-correcting with the center engine (Action 2) was noted, thus repeated vertical descents remained unstable particularly near critical phases.\\n4. **Contact and Land Efficiency**: Improvements still required for contact sensor utilization to reflect touchdown dynamics effectively.\\n\\n### Updated Strategy for Successful Landing:\\n1. **Primary

2024-07-02 20:21:04,367 - ERROR - Error: invalid syntax (Action.py, line 50)
Traceback (most recent call last):
  File "C:\Users\adria\AppData\Local\Temp\ipykernel_22056\3249554590.py", line 64, in create_and_run_llm_loop
    importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.
    ^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\adria\anaconda3\Lib\importlib\__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 621, in _exec
  File "<frozen importlib._bootstrap_external>", line 936, in exec_module
  File "<frozen importlib._bootstrap_external>", line 1074, in get_code
  File "<frozen importlib._bootstrap_external>", line 1004, in source_to_code
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\Action.py", line 50
    elif Y_vel < descent_speed_limit>:
                             

Tool call: [RequiredActionFunctionToolCall(id='call_dpxYJo5tg4btyRlZi3WUFi1R', function=Function(arguments='{"reasoning":"The syntax error from the previous attempt has been corrected. The focus remains on angle and angular velocity stabilization, vertical descent speed management, and horizontal position corrections. The adjustments aim to minimize engine use and ensure controlled and efficient landing.","code":"import math\\n\\ndef act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 77. Tamaño de logs: 49
episode  1 score -139.587 avg score -1.837


2024-07-02 20:31:25,926 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Mbk3Bg1ezOMUtB3M2yjxtIIC/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 20:31:25,927 - INFO - Compilación exitosa.
2024-07-02 20:31:25,928 - INFO - Status: requires_action
2024-07-02 20:31:26,133 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Mbk3Bg1ezOMUtB3M2yjxtIIC "HTTP/1.1 200 OK"
2024-07-02 20:31:46,135 - INFO - Status: in_progress
2024-07-02 20:31:46,390 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_ZDDzGedNEV4UQxGCR92T9JFg/runs/run_Mbk3Bg1ezOMUtB3M2yjxtIIC "HTTP/1.1 200 OK"
2024-07-02 20:32:06,393 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_I9MbO74c2ydNkxlHZTE5XqWU', function=Function(arguments='{"reasoning":"The logs reveal persistent challenges: \\n1. **Excessive Engine Usage**: Continuous use of left and right engines leads to increased horizontal speed. \\n2. **Angle and Angular Velocity Control**: Stabilizing these parameters hasn\'t been sufficiently dynamic. \\n3. **Vertical Stability**: Over-correction with the center engine is notable, impacting descent stability. \\n4. **Inefficiency Near Touchdown**: Contact sensor engagement shows instability, indicating poor fine control near the ground.\\n\\n### Updated Strategy for Successful Landing:\\n1. **Primary Focus**: Prioritize dynamic angle and angular velocity stabilization. \\n2. **Secondary Focus**: Ensure vertical stability using center engines judiciously.\\n3. **Tertiary Focus**: Horizontal position corrections based on stable angle maintenance.\\n4. **Efficient Engine Use**: Minimize firing duration to cons

KeyError: 'code'

#### Prompt antiguo exitoso.

In [15]:
DESCRIPTION = "You are an expert programer in Pyhton. Your specialty is to generate the code responsible for making decisions about actions to be taken in various spacecraft landing environments.The objective is to land the spacecraft within a target zone in the shortest possible time and very gently. A scoring system is used to evaluate the landings, which must be maximized."
INSTRUCTIONS = f"""Your task is: 
    1. Analyze and reason about the results received in the last landing attempts. This is the schema of the landing logs: {log_description}.
    2. Your goal is to be able to make the appropriate decision based on the results of previous iterations. You must code the decision making based on your reasoning in a Python function.
    3. Use these tips to get a good landing:
        - First you have to stabilize the falling ship and then land gently.
        - Use all observations when deciding what action to take at any given moment. Both position and velocities must be taken into account to know how the ship is doing and towards which states it is heading. All of these must be considered to achieve stability.
        - Landing zone is in the central area of the x-axis.
        - Learn how actions taken affect the future states of the spacecraft in the logs of past events so that you can take this into account when developing code to reach the landing zone.
    4. Based on the logs you should improve the code generated in the 'act' function in the last iteration, seeking to maximize the score received and generate a higher quality code.
    5. Save the code of the act function in the file 'Action.py' using the tools given.
    6. Improve your results and correct any errors you may have generated in your last code if they exist."""

NAME = "Spacecraft Landing Master"

initial_msg = f"Take a deep breath and think step by step. This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Analyze the results and improve the code."
logger = configura_log('Spacecraft_4o_exitosoAntiguo.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-03 17:34:11,146 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-03 17:34:11,342 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-03 17:34:11,635 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/messages "HTTP/1.1 200 OK"
2024-07-03 17:34:11,779 - INFO - Iteration: 1
2024-07-03 17:34:12,239 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs "HTTP/1.1 200 OK"
2024-07-03 17:34:12,469 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_eS9Z979IPNxLpwlCs5S2JEtJ "HTTP/1.1 200 OK"
2024-07-03 17:34:12,477 - INFO - Status: queued
2024-07-03 17:34:12,708 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_eS9Z979IPNxLpwlCs5S2JEtJ "HTTP/1.1 200 OK"
2024-07-03 17:34:32,711 - INFO - Status: queued
2024-07-03 17:34:33,007

Tool call: [RequiredActionFunctionToolCall(id='call_6Kl599fRlbIZSXnqisbrKZ2R', function=Function(arguments='{"reasoning":"After analyzing the provided logs for both a successful and unsuccessful landing, the following strategies can be derived:\\n1. Stabilize the spacecraft\'s vertical speed and angle before attempting to land.\\n2. Keep the X position close to the center.\\n3. Use actions that counteract undesirable velocities and angles.\\n4. Ensure the craft\'s vertical descent is minimized, especially as it gets closer to ground.\\n\\nUsing these strategies, a proportional control approach can be implemented, which continually adjusts the actions based on the observed state, targeting zero horizontal and vertical speeds, and a stable angle. Minor adjustments should be made until the vessel is stably landed.\\n\\nHere is an improved version of the \'act\' function that uses proportional control logic to decide on the appropriate action based on real-time observations:\\n","code":"de

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 75. Tamaño de logs: 48
episode  1 score -634.961 avg score -8.581


2024-07-03 17:34:56,773 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_eS9Z979IPNxLpwlCs5S2JEtJ/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 17:34:56,775 - INFO - Compilación exitosa.
2024-07-03 17:34:56,775 - INFO - Status: requires_action
2024-07-03 17:34:56,995 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_eS9Z979IPNxLpwlCs5S2JEtJ "HTTP/1.1 200 OK"
2024-07-03 17:35:16,997 - INFO - Status: in_progress
2024-07-03 17:35:17,268 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_eS9Z979IPNxLpwlCs5S2JEtJ "HTTP/1.1 200 OK"
2024-07-03 17:35:37,277 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_OwfUk3ugbkYARczRJ5eCjKg9', function=Function(arguments='{"reasoning":"After analyzing the logs from the improved function, it is clear that the descent was not properly controlled. The spacecraft continuously accelerated downward and the angle got worse over time, ultimately leading to a crash.\\n\\nHere are the critical observations:\\n1. The angular velocity and angle control were not prioritized correctly; the craft became increasingly tilted, affecting the descent.\\n2. The horizontal velocity control measures were ineffective given the negative impact on vertical descent control.\\n3. The current logic fires the left engine repeatedly, which did not address the descent in an effective manner.\\n\\nRevised Strategy:\\n- Prioritize reducing the downward vertical velocity before making finer adjustments to horizontal velocity and angle.\\n- Introduce thresholds where actions are changed to stabilize not just based on velocity but al

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 96. Tamaño de logs: 58
episode  1 score -559.795 avg score -5.893


2024-07-03 17:36:22,599 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_ixH1C2dlHMahlL7rGD2o5PDF/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 17:36:22,601 - INFO - Compilación exitosa.
2024-07-03 17:36:22,602 - INFO - Status: requires_action
2024-07-03 17:36:22,818 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_ixH1C2dlHMahlL7rGD2o5PDF "HTTP/1.1 200 OK"
2024-07-03 17:36:42,821 - INFO - Status: in_progress
2024-07-03 17:36:43,085 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_ixH1C2dlHMahlL7rGD2o5PDF "HTTP/1.1 200 OK"
2024-07-03 17:37:03,090 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_Bipx9QUnNhS5fVMb83nb8eR1', function=Function(arguments='{"reasoning":"From the analysis of the latest logs, it is clear that the prioritization is still off and the control logic for vertical and horizontal motion as well as angular control needs rethinking. The spacecraft becomes increasingly out of control as the descent progresses, leading to significant acceleration and tilting.\\n\\nKey Observations:\\n1. Actions are not effectively counteracting the horizontal and vertical movements nor the angular velocity as they should.\\n2. The spacecraft\'s angle continues to worsen without a proper control mechanism in place.\\n\\nImproved Strategy:\\n- Implement a proportional control system that adjusts the actions depending on how far the state is from the desired values (velocity close to zero, angle close to zero).\\n- Separate the concerns of controlling horizontal motion, vertical motion, and angular deviation, ensuring finer contro

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 94. Tamaño de logs: 57
episode  1 score -557.953 avg score -5.999


2024-07-03 17:37:49,118 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_2vq3dkmwcrfIPlOSECXHFnSB/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 17:37:49,121 - INFO - Compilación exitosa.
2024-07-03 17:37:49,122 - INFO - Status: requires_action
2024-07-03 17:37:49,327 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_2vq3dkmwcrfIPlOSECXHFnSB "HTTP/1.1 200 OK"
2024-07-03 17:38:09,331 - INFO - Status: in_progress
2024-07-03 17:38:09,601 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_2vq3dkmwcrfIPlOSECXHFnSB "HTTP/1.1 200 OK"
2024-07-03 17:38:29,605 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_JhFF75RhrfedwDl9TqRzlIOK', function=Function(arguments='{"reasoning":"From the logs of the last attempt, it is apparent that the spacecraft remained in a free fall state for a significant amount of time. The primary issue is that decisions to change actions weren\'t being made promptly. There was not enough emphasis on changing the course of action when required, leading to ineffective controls.\\n\\n1. **Vertical Descent Control**: The log shows that the downward velocity wasn\'t properly managed initially, leading to increasing speeds and worsening conditions over time. The vertical thrust should have been initiated much earlier to manage the descent speed effectively.\\n2. **Angle and Angular Velocity Control**: The spacecraft\'s angle and angular velocity controls were not adequately addressed. They need to be stable to maintain the correct posture for landing.\\n3. **Horizontal Position and Velocity Control**: The control for hor

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 94. Tamaño de logs: 57
episode  1 score -557.953 avg score -5.999


2024-07-03 17:39:15,196 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_gH9X9AudHQfsei6zG8mipoXw/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 17:39:15,199 - INFO - Compilación exitosa.
2024-07-03 17:39:15,199 - INFO - Status: requires_action
2024-07-03 17:39:15,415 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_gH9X9AudHQfsei6zG8mipoXw "HTTP/1.1 200 OK"
2024-07-03 17:39:35,417 - INFO - Status: in_progress
2024-07-03 17:39:35,675 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_gH9X9AudHQfsei6zG8mipoXw "HTTP/1.1 200 OK"
2024-07-03 17:39:55,678 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_wtlZ1POzpJdZV1apZWWgsi9v', function=Function(arguments='{"reasoning":"After analyzing the provided logs from the recent landing attempt, here are key observations to understand why the spacecraft performed poorly:\\n\\n1. **Vertical Descent Control**: The spacecraft was in free fall for a significant amount of time (e.g., action \'0\' repeated). No intervention was made to control the downward velocity early enough.\\n2. **Angle Control**: The angle was not actively controlled; it allowed for continuous deviation, further complicating descent stability.\\n3. **Action Delays**: The decisions to change actions need to be more responsive to environmental changes. The same actions were repeated without resulting in desired outcomes, leading to continuous poor scoring.\\n\\nImproved Strategy:\\n- Implement immediate and continuous checks and actions for vertical velocity right from the start to manage descent effectively.\\n- Actively cont

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 94. Tamaño de logs: 57
episode  1 score -557.953 avg score -5.999


2024-07-03 17:40:40,964 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_80UymoumzihMvIMeNc56gOs5/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 17:40:40,966 - INFO - Compilación exitosa.
2024-07-03 17:40:40,967 - INFO - Status: requires_action
2024-07-03 17:40:41,188 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_80UymoumzihMvIMeNc56gOs5 "HTTP/1.1 200 OK"
2024-07-03 17:41:01,192 - INFO - Status: in_progress
2024-07-03 17:41:01,454 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv/runs/run_80UymoumzihMvIMeNc56gOs5 "HTTP/1.1 200 OK"
2024-07-03 17:41:21,457 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

user: [TextContentBlock(text=Text(annotations=[], value='These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0.085, 0.0, 0.0], "score": 0.121}, {"time": 2, "action": 0, "current status": [0.015, 1.421, 0.38, 0.077, -0.017, -0.085, 0.0, 0.0], "score": 0.013}, {"time": 3, "action": 0, "current status": [0.019, 1.423, 0.38, 0.051, -0.021, -0.085, 0.0, 0.0], "score": -0.104}, {"time": 4, "action": 0, "current status": [0.023, 1.423, 0.38, 0.024, -0.026, -0.085, 0.0, 0.0], "score": -0.226}, {"time": 5, "action": 0, "current status": [0.026, 1.423, 0.38, -0.003, -0.03, -0.085, 0.0, 0.0], "score": -0.353}, {"time": 6, "action": 0, "current status": [0.03, 1.422, 0.38, -0.029, -0.034, -0.085, 0.0, 0.0], "score": -0.481}, {"time": 7, "action": 0, "current status": [0.034, 1.42

2024-07-03 17:41:22,476 - INFO - HTTP Request: DELETE https://api.openai.com/v1/assistants/asst_PkpU83LGyU1PPCgGtjUcUqx8 "HTTP/1.1 200 OK"
2024-07-03 17:41:22,692 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_6wvsBNrPNjBxrOkvv9jtQGOv "HTTP/1.1 200 OK"
2024-07-03 17:41:22,693 - INFO - 
Ejecución finalizada.




Asistente vaciado.
Run(id='run_80UymoumzihMvIMeNc56gOs5', assistant_id='asst_PkpU83LGyU1PPCgGtjUcUqx8', cancelled_at=None, completed_at=1720021245, created_at=1720021195, expires_at=None, failed_at=None, incomplete_details=None, instructions="Your task is: \n    1. Analyze and reason about the results received in the last landing attempts. This is the schema of the landing logs: {'description': 'Log data for each step of the spacecraft landing environment.', 'landing attempt': {'type': 'integer', 'description': 'The episode number.'}, 'logs': {'time': {'type': 'integer', 'description': 'The instant within the landing attempt where the current log is taken.'}, 'action': {'type': 'integer', 'description': 'The possible actions taken.', 'options': {'0': 'Switch off engines', '1': 'Push left engine', '2': 'Push both engines (upwards)', '3': 'Push right engine'}}, 'current status': {'type': 'array', 'description': 'The state of the environment after the action is taken.', 'positions': {'0':

#### Prompt zero shot.

In [15]:
DESCRIPTION = "You are an expert programer in Pyhton. Your specialty is to generate the code responsible for making decisions about actions to be taken in various spacecraft landing environments.The objective is to land the spacecraft within a target zone in the shortest possible time and very gently. A scoring system is used to evaluate the landings, which must be maximized."
INSTRUCTIONS = f"""Your task is: 
    1. Analyze and reason about the results received in the last landing attempts. This is the schema of the landing logs: {log_description}.
    2. Your goal is to be able to make the appropriate decision based on the results of previous iterations. You must code the decision making based on your reasoning in a Python function.
    3. Use these tips to get a good landing:
        - First you have to stabilize the falling ship and then land gently.
        - Use all observations when deciding what action to take at any given moment. Both position and velocities must be taken into account to know how the ship is doing and towards which states it is heading. All of these must be considered to achieve stability.
        - Landing zone is in the central area of the x-axis.
        - Learn how actions taken affect the future states of the spacecraft in the logs of past events so that you can take this into account when developing code to reach the landing zone.
    4. Based on the logs you should improve the code generated in the 'act' function in the last iteration, seeking to maximize the score received and generate a higher quality code.
    5. Save the code of the act function in the file 'Action.py' using the tools given.
    6. Improve your results and correct any errors you may have generated in your last code if they exist."""

NAME = "Spacecraft Landing Master"

initial_msg = f"Take a deep breath and think step by step. Analyze the results and improve the code."
logger = configura_log('Spacecraft_4o_ZeroShot.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-03 19:59:17,407 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-03 19:59:17,633 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-03 19:59:17,853 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/messages "HTTP/1.1 200 OK"
2024-07-03 19:59:17,860 - INFO - Iteration: 1
2024-07-03 19:59:18,345 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs "HTTP/1.1 200 OK"
2024-07-03 19:59:18,599 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_H69x6DxzTn03SaBNidVYRPxc "HTTP/1.1 200 OK"
2024-07-03 19:59:18,609 - INFO - Status: queued
2024-07-03 19:59:18,830 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_H69x6DxzTn03SaBNidVYRPxc "HTTP/1.1 200 OK"
2024-07-03 19:59:38,833 - INFO - Status: queued
2024-07-03 19:59:39,151

Tool call: [RequiredActionFunctionToolCall(id='call_nviXa99spIO45VA5pvsXasDp', function=Function(arguments='{"reasoning": "Analyzing the landing logs reveals a structured approach to stabilizing and landing the spacecraft. First, we need to focus on stabilizing the spacecraft by minimizing its angular velocity, horizontal and vertical velocities, and ensuring it is over the landing zone. Then, we can perform a gentle landing. To accomplish this, the code should react to the current state of the spaceship, focusing on four key factors: X velocity, Y velocity, Angle, and Angular velocity.\\n\\nHere\'s a step-by-step reasoning:\\n1. **Stabilization**:\\n    - If the ship\'s angle is not close to zero, adjust it by pushing the appropriate side engine (left or right) to reduce the angular velocity.\\n    - If the X velocity is high, push the opposite side engine to counter this.\\n2. **Horizontal Positioning**:\\n    - If the ship is too far left or right (X position), push the opposite sid

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 117. Tamaño de logs: 69
episode  1 score -1157.908 avg score -9.982


2024-07-03 20:00:03,206 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_H69x6DxzTn03SaBNidVYRPxc/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 20:00:03,210 - INFO - Compilación exitosa.
2024-07-03 20:00:03,211 - INFO - Status: requires_action
2024-07-03 20:00:03,437 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_H69x6DxzTn03SaBNidVYRPxc "HTTP/1.1 200 OK"
2024-07-03 20:00:23,441 - INFO - Status: in_progress
2024-07-03 20:00:27,428 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_H69x6DxzTn03SaBNidVYRPxc "HTTP/1.1 200 OK"
2024-07-03 20:00:47,431 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 2, "current status": [0.011, 1.422, 0.389, 0.179, -0.013,

Tool call: [RequiredActionFunctionToolCall(id='call_d7wRDl2y1CyhJiIUr5r7wjC9', function=Function(arguments='{"reasoning":"From the analysis of the logs, it is clear that the spacecraft is not stabilizing properly and is continuously gaining speed. The action decisions are not adequately controlling the angular velocity or the descent speed. Several observations can be made:\\n\\n1. **Angular Velocity**: The angular velocity continues to grow negatively, indicating a need to counteract effectively and stabilize the angle.\\n2. **Y Velocity**: The vertical descent speed increases, requiring a more aggressive use of upward thrust to slow down.\\n3. **X Velocity and Position**: The spacecraft is continuously drifting horizontally out of the center, indicating that counter-thrust mechanisms to balance horizontal positioning are ineffective.\\n\\n### Updated Decision-Making Process\\n\\nTo improve the landing attempt, we need to introduce a more granular decision-making process that prioriti

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 75. Tamaño de logs: 48
episode  1 score -634.961 avg score -8.581


2024-07-03 20:01:32,896 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_hNlYP5zxn2XG9REpMG8nKQnK/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 20:01:32,900 - INFO - Compilación exitosa.
2024-07-03 20:01:32,901 - INFO - Status: requires_action
2024-07-03 20:01:33,148 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_hNlYP5zxn2XG9REpMG8nKQnK "HTTP/1.1 200 OK"
2024-07-03 20:01:53,152 - INFO - Status: in_progress
2024-07-03 20:01:53,516 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_hNlYP5zxn2XG9REpMG8nKQnK "HTTP/1.1 200 OK"
2024-07-03 20:02:13,519 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_hLuyNHvhIsBgkXWi5JKX7ks7', function=Function(arguments='{"reasoning":"### Log Analysis Summary:\\n\\nThe spacecraft seems to be consistently pushing the left engine (action `1`), leading to a gradual increase in the angle, angular velocity, and horizontal velocity in one direction. Eventually, this causes an uncontrollable state that results in failure. The overall behavior indicates that the sequence of actions is not achieving the required stabilization and controlled descent.\\n\\n### Strategic Errors Observed:\\n\\n1. **Angular Stabilization:** The current approach is not effectively countering angular velocity. It seems to set off a chain reaction, increasing angular velocity and position without ever stabilizing it.\\n2. **Descent Control:** The descent is not being managed properly. Even when the vertical velocity is high, it does not appropriately deploy both engines for stabilization, leading to a rapid descent.\\n3. **Horizo

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 94. Tamaño de logs: 57
episode  1 score -701.602 avg score -7.544


2024-07-03 20:02:58,960 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_2qhc7W7QKm0AONK1VBQJPRvk/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 20:02:58,963 - INFO - Compilación exitosa.
2024-07-03 20:02:58,965 - INFO - Status: requires_action
2024-07-03 20:02:59,185 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_2qhc7W7QKm0AONK1VBQJPRvk "HTTP/1.1 200 OK"
2024-07-03 20:03:19,190 - INFO - Status: in_progress
2024-07-03 20:03:19,526 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_2qhc7W7QKm0AONK1VBQJPRvk "HTTP/1.1 200 OK"
2024-07-03 20:03:39,530 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_xZ1xQNwPz3NcDpU0aLmhWRpf', function=Function(arguments='{"reasoning":"### Log Analysis Summary:\\n\\nThe function shows a consistent failure to maintain control over horizontal and vertical movements, resulting in high velocities and increasing angle deviation. Continuous use of both engines when vertical velocity is high doesn\'t adequately prioritize controlling horizontal drift and angular velocity.\\n\\n### Strategic Errors Observed:\\n\\n1. **Angular Stabilization:** The angular stabilization is not happening effectively. The craft fails to correct its angle early enough, leading to large deviations and increasing instability.\\n2. **Horizontal Control:** The craft is drifting more horizontally without corrective actions to bring it back to the center effectively.\\n3. **Descent Control:** The vertical thrusters are not engaged appropriately to manage both descent speed and horizontal drift. Vertical stabilization needs to be mor

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 98. Tamaño de logs: 59
episode  1 score -94.877 avg score -0.978


2024-07-03 20:04:24,836 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_vHPmA5PFtSNgSxI65tpycGtk/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 20:04:24,840 - INFO - Compilación exitosa.
2024-07-03 20:04:24,840 - INFO - Status: requires_action
2024-07-03 20:04:25,058 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_vHPmA5PFtSNgSxI65tpycGtk "HTTP/1.1 200 OK"
2024-07-03 20:04:45,061 - INFO - Status: in_progress
2024-07-03 20:04:45,351 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_vHPmA5PFtSNgSxI65tpycGtk "HTTP/1.1 200 OK"
2024-07-03 20:05:05,355 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_M5XhofXBrFTAM0WzW8ILYnlY', function=Function(arguments='{"reasoning":"### Log Analysis Summary:\\n\\nThe spacecraft\'s performance showed improvement in managing initial states but gradually lost control over vertical and horizontal velocities, despite appropriate angular stabilization. By the last few steps, the descent speed increased significantly, causing a sharp downward trajectory and leading to a crash.\\n\\n### Strategic Errors Observed:\\n\\n1. **Vertical Control:** The descent rate tends to become uncontrollable as the attempts progress. Both engines need to counter this proactively when descent speed begins accelerating past thresholds.\\n2. **Horizontal Drift:** While the angular stabilization was effective, horizontal drift controls need to be more dynamically adjusted relative to horizontal velocities.\\n3. **Dynamic Priority Rebalancing:** The function needs to weigh the urgency of stabilizing vertical descent against o

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4



                                                                

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 846. Tamaño de logs: 433
episode  1 score -371.466 avg score -0.440


2024-07-03 20:05:53,779 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_BKsENnhWSmT5dVK3Ea2WwbY9/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-03 20:05:53,783 - INFO - Compilación exitosa.
2024-07-03 20:05:53,784 - INFO - Status: requires_action
2024-07-03 20:05:54,046 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_BKsENnhWSmT5dVK3Ea2WwbY9 "HTTP/1.1 200 OK"
2024-07-03 20:06:14,050 - INFO - Status: in_progress
2024-07-03 20:06:14,325 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK/runs/run_BKsENnhWSmT5dVK3Ea2WwbY9 "HTTP/1.1 200 OK"
2024-07-03 20:06:34,330 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

user: [TextContentBlock(text=Text(annotations=[], value='These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 1, "current status": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], "score": 1.473}, {"time": 3, "action": 1, "current status": [0.018, 1.423, 0.341, 0.051, -0.002, 0.072, 0.0, 0.0], "score": 1.809}, {"time": 4, "action": 1, "current status": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], "score": 1.038}, {"time": 5, "action": 1, "current status": [0.025, 1.423, 0.323, -0.003, 0.011, 0.146, 0.0, 0.0], "score": 0.221}, {"time": 6, "action": 1, "current status": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], "score": 0.039}, {"time": 7, "action": 1, "current status": [0.031, 1.421, 0.30

2024-07-03 20:06:35,754 - INFO - HTTP Request: DELETE https://api.openai.com/v1/assistants/asst_BEVYe7HSWmLAsK1s6KakV1M6 "HTTP/1.1 200 OK"
2024-07-03 20:06:35,978 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_oJWBt4hwO9PwfGOyZ8H3CfAK "HTTP/1.1 200 OK"
2024-07-03 20:06:35,981 - INFO - 
Ejecución finalizada.




Asistente vaciado.
Run(id='run_BKsENnhWSmT5dVK3Ea2WwbY9', assistant_id='asst_BEVYe7HSWmLAsK1s6KakV1M6', cancelled_at=None, completed_at=1720029954, created_at=1720029905, expires_at=None, failed_at=None, incomplete_details=None, instructions="Your task is: \n    1. Analyze and reason about the results received in the last landing attempts. This is the schema of the landing logs: {'description': 'Log data for each step of the spacecraft landing environment.', 'landing attempt': {'type': 'integer', 'description': 'The episode number.'}, 'logs': {'time': {'type': 'integer', 'description': 'The instant within the landing attempt where the current log is taken.'}, 'action': {'type': 'integer', 'description': 'The possible actions taken.', 'options': {'0': 'Switch off engines', '1': 'Push left engine', '2': 'Push both engines (upwards)', '3': 'Push right engine'}}, 'current status': {'type': 'array', 'description': 'The state of the environment after the action is taken.', 'positions': {'0':

#### Take a step back.
Abstraer primero el problema.

### Incluir supervisor encargado del stepback

In [14]:
def inicializar_agente(model, msg, description, instructions, name, tools):
    """ Inicializa un agente con un asistente y un hilo.
    
    Args:
        model: str: el modelo de lenguaje a utilizar
        msg: str: el mensaje inicial del hilo
        
    Returns:
        agente: AssistantOpenAI: el agente inicializado
        asistente: dict: el asistente creado
        hilo: dict: el hilo creado
    """
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)
    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=description, instructions=instructions, name=name, tools=tools)
    # Crea un hilo
    hilo = agente.create_thread()
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=msg)
    
    return agente, asistente, hilo


def run_message_assistant(agente, asistente, hilo, tool_choice="auto"):
    """ Ejecuta un mensaje en el asistente.
    
    Args:
        agente: AssistantOpenAI: el agente
        asistente: dict: el asistente
        hilo: dict: el hilo
        tool_choice: str: la elección de herramienta a utilizar
        
    Returns:
        ejecucion: dict: la ejecución del mensaje
        response: dict: la respuesta del asistente
    """
    # Ejecuta el hilo
    ejecucion = agente.run(hilo.id, asistente.id, tool_choice=tool_choice)
    response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
    while response.status not in ["completed", "failed", "requires_action"]:
        logger.info(f"Status: {ejecucion.status}")
        response = agente.get_run(ejecucion.id, hilo.id)
        time.sleep(20)
    return ejecucion, response


In [14]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, code_msg, logger, model="gpt-3.5-turbo", num_iterations=5):
    
    # Creamos el asistente.
    #agente, asistente, hilo = inicializar_agente(model, Incial_msg, DESCRIPTION, INSTRUCTIONS, NAME, TOOLS)
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)
    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    # Crea un hilo
    hilo = agente.create_thread()
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)
    
    # Primero ejecutamos el razonamiento de STEP BACK.
    #ejecucion, response = run_message_assistant(agent, asistente, hilo, tool_choice='none')
    # Ejecuta el hilo
    ejecucion = agente.run(hilo.id, asistente.id, tool_choice='none')
    response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
    while response.status not in ["completed", "failed", "requires_action"]:
        logger.info(f"Status: {ejecucion.status}")
        response = agente.get_run(ejecucion.id, hilo.id)
        print("Waiting for response...")
        time.sleep(20)
    
    #print(response)
    msgs = agente.mostrar_mensajes(hilo.id)
    print(msgs[0].data[0].content[0].text.value)
    
    # Añadimos el mensaje de generación de código.
    msg = agente.add_message(hilo.id, role="user", content=code_msg)
    
    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id, tool_choice='required')
            response = agente.get_run(ejecucion.id, hilo.id)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            print(response)
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  

            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"Stepback 4o. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


In [15]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (speed, angle and location), keep falling under control and then land gently at the end.
    - Use all the elements of the vector of observations, all are relevant to make the right decision.
    - Pay close attention to successful events.
    - Environment and ship conditions may change but your code must be effective for all cases.
    - Find a balance in your landing policy to maximize your score (must exceed 200 points).
    - If you are rotating to the left (negative values), you should fire the left engine and vice versa. But always prioritizing a controlled speed with the main engine.
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Decision coder"


initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Take a deep breath and reason step-by-step.What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing? Do not implement any code."
code_msg = f"After the reasoning with the processes behind the decision making, code very carefully when structuring your code, keep in mind that certain actions can be left with starvation. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool, function calling is required."



In [16]:
logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(initial_msg, code_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-01 20:24:30,902 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-01 20:24:31,107 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-01 20:24:31,353 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/messages "HTTP/1.1 200 OK"
2024-07-01 20:24:31,917 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/runs "HTTP/1.1 200 OK"
2024-07-01 20:24:32,130 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/runs/run_rBlWNoPJc2HIQNThxrjQvaiT "HTTP/1.1 200 OK"
2024-07-01 20:24:32,132 - INFO - Status: queued
2024-07-01 20:24:32,336 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/runs/run_rBlWNoPJc2HIQNThxrjQvaiT "HTTP/1.1 200 OK"


Waiting for response...


In [None]:
agente = AssistantOpenAI(ARCLABKEY_OPENAI)

# Crea un asistente
asistente = agente.create_assistant(model="gpt-3.5-turbo", description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    
# Crea un hilo
hilo = agente.create_thread()

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Before coding take a deep breath and reason step-by-step.What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?"
    
# Añade un mensaje inicial al hilo.
msg = agente.add_message(hilo.id, role="user", content=initial_msg)


# Ejecuta el hilo
ejecucion = agente.run(hilo.id, asistente.id, tool_choice='none')
response = agente.get_run(ejecucion.id, hilo.id)

# Esperamos a que la respuesta esté lista.
while response.status not in ["completed", "failed", "requires_action"]:
    #logger.info(f"Status: {ejecucion.status}")
    print("Waiting for response...")
    response = agente.get_run(ejecucion.id, hilo.id)
    time.sleep(20)
print(response)


agente.mostrar_mensajes(hilo.id)

Waiting for response...
Waiting for response...
Run(id='run_5ip2qIKcf1KIU1L1XCyvPVyK', assistant_id='asst_qqONwHALF6YCJlMyXttFXsnu', cancelled_at=None, completed_at=1719491913, created_at=1719491908, expires_at=None, failed_at=None, incomplete_details=None, instructions="To complete the task you must follow the following steps and indications:\n1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.\n2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:\n    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).\n    - Increased/decreased the slower/faster the spacecraft is moving.\n    - Decreased the more the spacecraft is tilted.\n    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engin

(SyncCursorPage[Message](data=[Message(id='msg_4CYNsLrcWXj8sEnvIm9EyV4G', assistant_id='asst_qqONwHALF6YCJlMyXttFXsnu', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value="Based on the successful landing record and the scoring system provided, we can infer some strategies to achieve a successful landing with a high score:\n\n1. Initially, the spacecraft should start with a gentle descent while maintaining a controlled speed. This helps in stabilizing the falling ship.\n2. As the spacecraft approaches the landing area, it should decrease the altitude gradually and adjust the horizontal position to align with the landing location.\n3. The spacecraft should minimize lateral movement and keep the speed under control to ensure a safe landing.\n4. Utilize the main engine for controlled descent and adjust the tilt angle to reduce lateral velocity.\n5. When close to the landing area, reduce the speed further and ensure the spacecraft is aligned properl

In [19]:
msgs, ids = agente.mostrar_mensajes(hilo.id)
print(msgs)
print(msgs.data[0].content[0].text.value)

assistant: [TextContentBlock(text=Text(annotations=[], value="Based on the successful landing record and the scoring system provided, we can infer some strategies to achieve a successful landing with a high score:\n\n1. Initially, the spacecraft should start with a gentle descent while maintaining a controlled speed. This helps in stabilizing the falling ship.\n2. As the spacecraft approaches the landing area, it should decrease the altitude gradually and adjust the horizontal position to align with the landing location.\n3. The spacecraft should minimize lateral movement and keep the speed under control to ensure a safe landing.\n4. Utilize the main engine for controlled descent and adjust the tilt angle to reduce lateral velocity.\n5. When close to the landing area, reduce the speed further and ensure the spacecraft is aligned properly for a gentle landing.\n6. Avoid unnecessary firing of the side engines as they decrease the overall score. Only use them for small adjustments if nece

TypeError: 'SyncCursorPage[Message]' object is not subscriptable

### Dos agentes

In [15]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, code_msg, logger, model="gpt-3.5-turbo", num_iterations=5):
    
    # Creamos el asistente.
    #agente, asistente, hilo = inicializar_agente(model, Incial_msg, DESCRIPTION, INSTRUCTIONS, NAME, TOOLS)
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)
    # Crea un asistente supervisor y codificador.
    codificador = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    supervisor = agente.create_assistant(model=model, description=DESCRIPTION_SUPERVISOR, instructions=INSTRUCTIONS_SUPERVISOR, name=NAME_SUPERVISOR)
    # Crea un hilo
    hilo_supervisor = agente.create_thread()
    hilo_codificador = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo_supervisor.id, role="user", content=Incial_msg)
    
    # Primero ejecutamos el razonamiento de STEP BACK.
    #ejecucion, response = run_message_assistant(agent, asistente, hilo, tool_choice='none')
    # Ejecuta el hilo
    ejecucion = agente.run(hilo_supervisor.id, supervisor.id, tool_choice='none')
    response = agente.get_run(ejecucion.id, hilo_supervisor.id)

            # Esperamos a que la respuesta esté lista.
    while response.status not in ["completed", "failed", "requires_action"]:
        logger.info(f"Status: {ejecucion.status}")
        response = agente.get_run(ejecucion.id, hilo_supervisor.id)
        print("Waiting for response...")
        time.sleep(20)
    
    #print(response)
    msgs = agente.mostrar_mensajes(hilo_supervisor.id)
    logger.info(msgs[0].data[0].content[0].text.value)
    
    # Añadimos el mensaje de generación de código.
    msg = code_msg +  "\nSupervisor feedback: " + msgs[0].data[0].content[0].text.value
    print(msg)
    msg = agente.add_message(hilo_codificador.id, role="user", content=msg)
    
    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo_codificador.id, codificador.id)
            response = agente.get_run(ejecucion.id, hilo_codificador.id)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            print(response)
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  

            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"Stepback 4o. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo_codificador.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo_codificador.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success."""
                logger.info(msg)    
                agente.add_message(hilo_codificador.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo_codificador.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo_codificador.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo_codificador.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo_codificador.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


In [16]:
DESCRIPTION = "You are an expert agent in spacecraft landing and decision making codificiation. Based on the landing records received and the indications received by a supervising agent, you must generate the code to land the spacecraft in the environment whatever the initial conditions are."
DESCRIPTION_SUPERVISOR = "You are an expert agent in spacecraft landing environments. Your mission is to analyze landing logs and the code generated by the agent specialized in coding, to guide him in improving the performance of his decision making code."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""
INSTRUCTIONS_SUPERVISOR = f"""To complete the task you must follow the following steps and indications:
1. Revisa aterrizajes exitosos y fracasados para analizar el efecto de las acciones en cada una de las observaciones y decidir que decisión tomar en cada momento.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. All the knowledge obtained must be expressed in the best possible way to the coding LLM agent so that it can improve the performance of the generated decision making code.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Decision coder"
NAME_SUPERVISOR = "Spacecraft Landing Supervisor"


supervisor_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Take a deep breath and reason step-by-step.What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing? Do not implement any code."
code_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. After the reasoning with the processes behind the decision making, code very carefully when structuring your code, keep in mind that certain actions can be left with starvation. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool, function calling is required."



In [17]:
logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(supervisor_msg, code_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-03 20:13:38,787 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-03 20:13:39,055 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-03 20:13:39,320 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-03 20:13:39,541 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-03 20:13:39,833 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_PViG42Y34iK8TOuGG4SDc7qu/messages "HTTP/1.1 200 OK"
2024-07-03 20:13:40,506 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_PViG42Y34iK8TOuGG4SDc7qu/runs "HTTP/1.1 200 OK"
2024-07-03 20:13:40,738 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_PViG42Y34iK8TOuGG4SDc7qu/runs/run_EMBnqChalCCiYeU88LPwgzd3 "HTTP/1.1 200 OK"
2024-07-03 20:13:40,740 - INFO - Status: queued
2024-07-03 20:13:40,971 - INFO - HTTP Request: GET https://api.openai.com/v

Waiting for response...


2024-07-03 20:14:00,973 - INFO - Status: queued
2024-07-03 20:14:01,271 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_PViG42Y34iK8TOuGG4SDc7qu/runs/run_EMBnqChalCCiYeU88LPwgzd3 "HTTP/1.1 200 OK"


Waiting for response...


2024-07-03 20:14:21,576 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_PViG42Y34iK8TOuGG4SDc7qu/messages "HTTP/1.1 200 OK"
2024-07-03 20:14:21,818 - INFO - To achieve a successful landing in the spacecraft landing environment, it's essential to understand each observation's values and the effects of each action. Here's a detailed breakdown:

### Observation Values Explained:

1. **X Position:** 
   - The horizontal position of the spacecraft relative to the landing zone.
   - Range: [-1.5, 1.5]

2. **Y Position:**
   - The vertical position of the spacecraft.
   - Range: [-1.5, 1.5]

3. **X Velocity:**
   - The horizontal velocity of the spacecraft.
   - Range: [-5.0, 5.0]

4. **Y Velocity:**
   - The vertical velocity of the spacecraft.
   - Range: [-5.0, 5.0]

5. **Angle:**
   - The angle of the spacecraft relative to vertical.
   - Range: [-3.14, 3.14] (in radians, negative for left, positive for right)

6. **Angular Velocity:**
   - The rate of change of the an

assistant: [TextContentBlock(text=Text(annotations=[], value="To achieve a successful landing in the spacecraft landing environment, it's essential to understand each observation's values and the effects of each action. Here's a detailed breakdown:\n\n### Observation Values Explained:\n\n1. **X Position:** \n   - The horizontal position of the spacecraft relative to the landing zone.\n   - Range: [-1.5, 1.5]\n\n2. **Y Position:**\n   - The vertical position of the spacecraft.\n   - Range: [-1.5, 1.5]\n\n3. **X Velocity:**\n   - The horizontal velocity of the spacecraft.\n   - Range: [-5.0, 5.0]\n\n4. **Y Velocity:**\n   - The vertical velocity of the spacecraft.\n   - Range: [-5.0, 5.0]\n\n5. **Angle:**\n   - The angle of the spacecraft relative to vertical.\n   - Range: [-3.14, 3.14] (in radians, negative for left, positive for right)\n\n6. **Angular Velocity:**\n   - The rate of change of the angle.\n   - Range: [-5.0, 5.0]\n\n7. **Left Contact Sensor:**\n   - Indicates whether the l

2024-07-03 20:14:22,079 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_zTBYfsYvK9fZkvfeXS0HuSxj/messages "HTTP/1.1 200 OK"
2024-07-03 20:14:22,331 - INFO - Iteration: 1
2024-07-03 20:14:22,783 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_zTBYfsYvK9fZkvfeXS0HuSxj/runs "HTTP/1.1 200 OK"
2024-07-03 20:14:22,997 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_zTBYfsYvK9fZkvfeXS0HuSxj/runs/run_PZ9Ujmr3s8AmLYHK1k13iWrQ "HTTP/1.1 200 OK"
2024-07-03 20:14:23,000 - INFO - Status: in_progress


Run(id='run_PZ9Ujmr3s8AmLYHK1k13iWrQ', assistant_id='asst_HjIUSni3KRFfV0KDx36Hcajr', cancelled_at=None, completed_at=None, created_at=1720030462, expires_at=1720031062, failed_at=None, incomplete_details=None, instructions="To complete the task you must follow the following steps and indications:\n1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.\n2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:\n    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).\n    - Increased/decreased the slower/faster the spacecraft is moving.\n    - Decreased the more the spacecraft is tilted.\n    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.\n    - Receive 100 points for a successful la

AttributeError: 'NoneType' object has no attribute 'submit_tool_outputs'

### JSON MODE

In [15]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    asistente = agente.create_assistant_json_mode(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME)
    
    # Crea un hilo
    hilo = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)

    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id)
            response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
            while response.status not in ["completed", "failed", "requires_action"]:
                logger.info(f"Status: {ejecucion.status}")
                response = agente.get_run(ejecucion.id, hilo.id)
                time.sleep(10)
            
            print(response.response_format)
            msgs = agente.mostrar_mensajes(hilo.id)
            msg = msgs[0].data[0].content[0].text.value
            logger.info(msg)

            # # Descomponemos los elementos de la respuesta.
            # logger.info(f"Status: {response.status}")
            # if response.status == "completed":
            #     return response
            # tool_call = response.required_action.submit_tool_outputs.tool_calls
            # print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(msg)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  
            reasoning = code_dict["reasoning"]

            logger.info(f"\nRazonamiento:\n{reasoning}")
            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(reasoning, code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"4o JSONMODE. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # # Devolvemos la respuesta al asistente.
                # for call in tool_call:
                #     agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                # while response.status not in ["completed", "failed", "expired"]:
                #     logger.info(f"Status: {response.status}")
                #     response = agente.get_run(response.id, hilo.id)
                #     time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success. Remember that the code is a reflection of your reasoning and the way you approach the problem."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                # for call in tool_call:
                #     agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                # logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")

In [21]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Provide output in valid JSON format. The data schema should be as follows:  {json.dumps(json_shcema_response)}.
6. Generate the code of the act function in the file 'Action.py'.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = "This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?. Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_JsonMode.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

NameError: name 'json_shcema_response' is not defined

### Genetico

#### JSON Mode

In [16]:
def init_population(agente, model, initial_msg, logger, mode='json', N=5):
    dic_population = {}
    for i in range(N):
        if mode == 'json':
            asistente = agente.create_assistant_json_mode(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME)
        else:
            asistente = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
        asistente = agente.create_assistant_json_mode(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME)
        hilo = agente.create_thread()
        msg = agente.add_message(hilo.id, role="user", content=initial_msg)
        logger.info(f"Asistente {i+1} creado. ID: {asistente.id}")
        dic_population[asistente.id] = (hilo, msg)
        
    return dic_population    

In [17]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    dic_population = init_population(agente, model, Incial_msg, logger, N=3)

    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False
        resultados = [1, float("-inf")]
        # Ejecuta los asistentes al mismo tiempo.
        for asistente_id in dic_population.keys():
            logger.info(f"Asistente: {asistente_id}")
            hilo, msg = dic_population[asistente_id]
            asistente = agente.get_assistant(asistente_id)
            compiled = False
            
            # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
            while not compiled:
                # Ejecuta el hilo
                ejecucion = agente.run(hilo.id, asistente.id)
                response = agente.get_run(ejecucion.id, hilo.id)

                # Esperamos a que la respuesta esté lista.
                while response.status not in ["completed", "failed", "requires_action"]:
                    logger.info(f"Status: {ejecucion.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(10)
                
                msgs = agente.mostrar_mensajes(hilo.id)
                msg = msgs[0].data[0].content[0].text.value
                #logger.info(msg)

                # Convierte el string a un diccionario
                code_dict = json.loads(msg)
                #logger.info(f"Arguments: {code_dict}")
                
                # Obtén el código Python de la llamada a la herramienta
                code = code_dict["code"]
                filename = code_dict["filename"]  
                reasoning = code_dict["reasoning"]

                logger.info(f"\nRazonamiento:\n{reasoning}")
                logger.info(f"\nCodigo generado:\n{code}")

                # Ejecuta el código Python
                try:
                    store_code_in_file(reasoning, code, filename)
                    time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                    
                    # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                    commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"4o genetic JSON
                                   . Iteración {i+1}.") 
                    
                    importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                    # Ejecutamos el código generado.
                    logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/Asitente_{asistente_id}/iteration_{i+1}.mp4")
                    json_logs = json.loads(logs)
                    
                    if float(json_logs['total score']) > resultados[1]:
                        resultados = [asistente_id, float(json_logs["total score"]), code]
                    resultados.append(json_logs["total score"])
                             
                    compiled = True
                    logger.info(f"Compilación exitosa.")
                    
                    msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success. Remember that the code is a reflection of your reasoning and the way you approach the problem."""
                    logger.info(msg)    
                    agente.add_message(hilo.id, role="user", content=msg)
                
                # Alimentamos el asistente con el error generado en la ejecución del código.    
                except Exception as e:
                    logger.exception("Error: %s", e)
                    error_trace = traceback.format_exc()

                    while response.status not in ["completed", "failed", "expired"]:
                        logger.info(f"Status: {response.status}")
                        response = agente.get_run(ejecucion.id, hilo.id)
                        time.sleep(30)
                    msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                    logger.error(msg)   
                    agente.add_message(hilo.id, role="assistant", content=msg)
        logger.info(f"Mejor resultado: Asistentente {resultados[0]} con puntuación {resultados[1]}")
        # Después de determinar el asistente con la máxima puntuación y antes de finalizar el bucle de iteraciones
        for asistente_id in dic_population.keys():
            if asistente_id != resultados[0]:  # Verifica que no sea el asistente con la máxima puntuación
                hilo, _ = dic_population[asistente_id]
                # Aquí puedes añadir el resultado al asistente que no tiene la máxima puntuación
                # Por ejemplo, suponiendo que tienes una función para añadir mensajes o resultados:
                msg_resultado = f"The best result for the population was {resultados[1]} achieved with the following code: {resultados[2]}. \nCompare it with yours and draw conclusions. Re-generate the code step by step and think deeply to improve both results."
                agente.add_message(hilo.id, role="user", content=msg_resultado)
        
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")

In [18]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Provide output in valid JSON format. The data schema should be as follows:  {json.dumps(json_shcema_response)}.
6. Generate the code of the act function in the file 'Action.py'.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?. Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_genetic.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-03 12:41:46,651 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-03 12:41:46,954 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-03 12:41:47,202 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA/messages "HTTP/1.1 200 OK"
2024-07-03 12:41:47,400 - INFO - Asistente 1 creado. ID: asst_g80WvbiVksILFWSWqlrjR0oD
2024-07-03 12:41:47,675 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-03 12:41:47,883 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-03 12:41:48,182 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/messages "HTTP/1.1 200 OK"
2024-07-03 12:41:48,392 - INFO - Asistente 2 creado. ID: asst_1qm0ClDWY6sldqhJdjf5xafQ
2024-07-03 12:41:48,693 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07

assistant: [TextContentBlock(text=Text(annotations=[], value='{"reasoning": "To achieve a successful landing, the spacecraft must be guided to a safe descent. The successful landing example provided sheds light on the relationship between actions and observations: We can deduce the following influences of actions on state parameters: \\n- Action **0**: "  , \n "code":  "import numpy as np\\n\\n# Constants\\nLEFT_ENGINE_COST = 0.03\\nCENTER_ENGINE_COST = 0.3\\nRIGHT_ENGINE_COST = 0.03\\n\\n# Indices for observation for clarity\\nX_POS = 0\\nY_POS = 1\\nX_VEL = 2\\nY_VEL = 3\\nANGLE = 4\\nANG_VEL = 5\\nLEFT_CONTACT = 6\\nRIGHT_CONTACT = 7\\n\\n\\ndef act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            Description: The state of the environment after the action is taken.\\n            Positions: {  \\n                0: X position,\\n                1: Y position,\\

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



2024-07-03 12:42:16,952 - INFO - Compilación exitosa.         
2024-07-03 12:42:16,953 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 2, "current status": [0.011, 1.422, 0.389, 0.179, -0.013, -0.077, 0.0, 0.0], "score": -2.465}, {"time": 2, "action": 2, "current status": [0.015, 1.427, 0.395, 0.2, -0.016, -0.072, 0.0, 0.0], "score": -2.498}, {"time": 3, "action": 2, "current status": [0.019, 1.432, 0.385, 0.226, -0.02, -0.083, 0.0, 0.0], "score": -1.675}, {"time": 4, "action": 2, "current status": [0.023, 1.437, 0.389, 0.229, -0.024, -0.08, 0.0, 0.0], "score": -1.691}, {"time": 5, "action": 2, "current status": [0.027, 1.443, 0.398, 0.251, -0.028, -0.074, 0.0, 0.0], "score": -3.085}, {"time": 6, "action": 2, "current status": [0.031, 1.449, 0.392, 0.264, -0.032, -0.081, 0.0, 0.0], "score": -1.511}, {"time": 7

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 158. Tamaño de logs: 89
episode  1 score -1417.727 avg score -9.030


2024-07-03 12:42:17,267 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA/messages "HTTP/1.1 200 OK"
2024-07-03 12:42:17,290 - INFO - Asistente: asst_1qm0ClDWY6sldqhJdjf5xafQ
2024-07-03 12:42:17,575 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_1qm0ClDWY6sldqhJdjf5xafQ "HTTP/1.1 200 OK"
2024-07-03 12:42:17,983 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs "HTTP/1.1 200 OK"
2024-07-03 12:42:18,223 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs/run_1kxf80lo3OuZSu0Uz8jczXLQ "HTTP/1.1 200 OK"
2024-07-03 12:42:18,226 - INFO - Status: queued
2024-07-03 12:42:18,496 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs/run_1kxf80lo3OuZSu0Uz8jczXLQ "HTTP/1.1 200 OK"
2024-07-03 12:42:28,500 - INFO - Status: queued
2024-07-03 12:42:28,814 - INFO - HTTP Request: GET https://api.openai.

assistant: [TextContentBlock(text=Text(annotations=[], value='{"reasoning": "In the task of landing a spacecraft, each component of the current observation array plays a vital role in understanding its condition. Here is an analysis of each observation and the observed effects of actions taken:\\n\\n**Observations:**\\n1. `X position`: The horizontal position of the spacecraft relative to the landing zone. Values close to 0 indicate that the spacecraft is near the landing zone horizontally.\\n2. `Y position`: The vertical position of the spacecraft relative to the landing zone. A value of 0 indicates that the spacecraft has reached ground level.\\n3. `X velocity`: The horizontal velocity of the spacecraft. A smaller absolute value (close to 0) indicates that the spacecraft is moving slower horizontally, which is ideal for a safe landing.\\n4. `Y velocity`: The vertical velocity of the spacecraft. Negative values represent a downward motion. A smaller absolute value is also preferable h

2024-07-03 12:42:54,123 - INFO - Compilación exitosa.        
2024-07-03 12:42:54,124 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0.085, 0.0, 0.0], "score": 0.121}, {"time": 2, "action": 0, "current status": [0.015, 1.421, 0.38, 0.077, -0.017, -0.085, 0.0, 0.0], "score": 0.013}, {"time": 3, "action": 0, "current status": [0.019, 1.423, 0.38, 0.051, -0.021, -0.085, 0.0, 0.0], "score": -0.104}, {"time": 4, "action": 0, "current status": [0.023, 1.423, 0.38, 0.024, -0.026, -0.085, 0.0, 0.0], "score": -0.226}, {"time": 5, "action": 0, "current status": [0.026, 1.423, 0.38, -0.003, -0.03, -0.085, 0.0, 0.0], "score": -0.353}, {"time": 6, "action": 0, "current status": [0.03, 1.422, 0.38, -0.029, -0.034, -0.085, 0.0, 0.0], "score": -0.481}, {"time": 7, "acti

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 90. Tamaño de logs: 55
episode  1 score -474.946 avg score -5.336


2024-07-03 12:42:54,438 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/messages "HTTP/1.1 200 OK"
2024-07-03 12:42:54,441 - INFO - Asistente: asst_IWiXcKSkXWwTtBvguf9N1XGP
2024-07-03 12:42:54,745 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_IWiXcKSkXWwTtBvguf9N1XGP "HTTP/1.1 200 OK"
2024-07-03 12:42:55,360 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs "HTTP/1.1 200 OK"
2024-07-03 12:42:55,590 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs/run_VA6crnSmX1bw2grez8x59lHp "HTTP/1.1 200 OK"
2024-07-03 12:42:55,594 - INFO - Status: queued
2024-07-03 12:42:55,871 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs/run_VA6crnSmX1bw2grez8x59lHp "HTTP/1.1 200 OK"
2024-07-03 12:43:05,875 - INFO - Status: queued
2024-07-03 12:43:06,203 - INFO - HTTP Request: GET https://api.openai.

assistant: [TextContentBlock(text=Text(annotations=[], value='{\n  "reasoning": "To achieve a successful landing, we need to analyze the observations and understand the impact of each action on the status of the spacecraft. The key observations include X and Y positions, X and Y velocities, angle, and angular velocity. The desired outcome is to land with minimum horizontal and vertical velocities, a small angle, and no angular velocity. Each of the actions affects these observations as follows:\\n\\n1. **Switch off engines (0)**: Halts any current thrust application leading to a natural descent due to gravity, with gradual increases in vertical velocity and stabilization of horizontal velocity.\\n2. **Push left engine (1)**: Applies thrust from the left engine, which adjusts the X velocity (usually decreasing rightward movement or increasing leftward movement) and introduces torque that changes the angle (making it tilt to the right) and angular velocity.\\n3. **Push both engines (2)**

2024-07-03 12:43:31,328 - INFO - Compilación exitosa.        
2024-07-03 12:43:31,328 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 1, "current status": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], "score": 1.473}, {"time": 3, "action": 1, "current status": [0.018, 1.423, 0.341, 0.051, -0.002, 0.072, 0.0, 0.0], "score": 1.809}, {"time": 4, "action": 1, "current status": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], "score": 1.038}, {"time": 5, "action": 1, "current status": [0.025, 1.423, 0.323, -0.003, 0.011, 0.146, 0.0, 0.0], "score": 0.221}, {"time": 6, "action": 1, "current status": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], "score": 0.039}, {"time": 7, "action": 1,

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 47
episode  1 score -612.950 avg score -8.513


2024-07-03 12:43:31,610 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/messages "HTTP/1.1 200 OK"
2024-07-03 12:43:31,613 - INFO - Mejor resultado: Asistentente asst_1qm0ClDWY6sldqhJdjf5xafQ con puntuación -474.946
2024-07-03 12:43:31,828 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA/messages "HTTP/1.1 200 OK"
2024-07-03 12:43:32,121 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/messages "HTTP/1.1 200 OK"
2024-07-03 12:43:32,124 - INFO - Iteration: 2
2024-07-03 12:43:32,127 - INFO - Asistente: asst_g80WvbiVksILFWSWqlrjR0oD
2024-07-03 12:43:32,428 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_g80WvbiVksILFWSWqlrjR0oD "HTTP/1.1 200 OK"
2024-07-03 12:43:33,043 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA/runs "HTTP/1.1 200 OK"
2024-07-03 12:43:33,268 - INFO - HTTP Request: GET 

assistant: [TextContentBlock(text=Text(annotations=[], value='{"reasoning": "The last code provided achieved the best result of -474.946 by focusing on the main aspects of the spacecraft landing. Below is a comparison and analysis between the previous code and the best result:\\n\\n- **Horizontal Movement Adjustment**: The previous attempts were not adequately adjusting for X position and X velocity. The best code adjusts the horizontal movement if both the position and velocity exceed certain thresholds.\\n\\n- **Vertical Control**: The best code emphasizes maintaining vertical control by checking for significant negative Y velocity. When Y velocity drops below -0.3 or when the Y position is more than 0.1 with a slight downward velocity, it fires both engines.\\n\\n- **Angle Control**: The best code adjusts for angular errors more conservatively. It checks both the angle and angular velocity before deciding to push the left or right engine.\\n\\n- **Landing Detection**: Both codes det

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



2024-07-03 12:44:08,779 - INFO - Compilación exitosa.        
2024-07-03 12:44:08,780 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0.085, 0.0, 0.0], "score": 0.121}, {"time": 2, "action": 0, "current status": [0.015, 1.421, 0.38, 0.077, -0.017, -0.085, 0.0, 0.0], "score": 0.013}, {"time": 3, "action": 0, "current status": [0.019, 1.423, 0.38, 0.051, -0.021, -0.085, 0.0, 0.0], "score": -0.104}, {"time": 4, "action": 0, "current status": [0.023, 1.423, 0.38, 0.024, -0.026, -0.085, 0.0, 0.0], "score": -0.226}, {"time": 5, "action": 0, "current status": [0.026, 1.423, 0.38, -0.003, -0.03, -0.085, 0.0, 0.0], "score": -0.353}, {"time": 6, "action": 0, "current status": [0.03, 1.422, 0.38, -0.029, -0.034, -0.085, 0.0, 0.0], "score": -0.481}, {"time": 7, "acti

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 90. Tamaño de logs: 55
episode  1 score -474.946 avg score -5.336


2024-07-03 12:44:09,192 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA/messages "HTTP/1.1 200 OK"
2024-07-03 12:44:09,196 - INFO - Asistente: asst_1qm0ClDWY6sldqhJdjf5xafQ
2024-07-03 12:44:09,445 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_1qm0ClDWY6sldqhJdjf5xafQ "HTTP/1.1 200 OK"
2024-07-03 12:44:10,050 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs "HTTP/1.1 200 OK"
2024-07-03 12:44:10,254 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs/run_GzGJHGEa1V39i8rGUhedGdo7 "HTTP/1.1 200 OK"
2024-07-03 12:44:10,256 - INFO - Status: queued
2024-07-03 12:44:10,472 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs/run_GzGJHGEa1V39i8rGUhedGdo7 "HTTP/1.1 200 OK"
2024-07-03 12:44:20,477 - INFO - Status: queued
2024-07-03 12:44:20,865 - INFO - HTTP Request: GET https://api.openai.

assistant: [TextContentBlock(text=Text(annotations=[], value='{"reasoning": "From the logs, the spacecraft consistently failed to stabilize, maintain its velocity, and control its angle, leading to a crash with a significant negative score. The key areas needing improvement involve ensuring horizontal and vertical velocities are reduced to safe levels and maintaining a stable angle throughout the descent. Here are the steps and rationale for modifying the code:\\n\\n1. **Control the descent (Y velocity):** Action 2 (push both engines) needs to be used more judiciously to manage the descent speed effectively.\\n2. **Stabilize horizontal movement (X velocity):** Actions 1 and 3 (push left/right engines) should be used to correct horizontal drift and maintain a central position over the landing zone.\\n3. **Maintain vertical orientation (angle control):** Adjust angle using actions 1 and 3 to keep it close to zero, ensuring stable descent.\\n4. **Reduce reliance on continuous engine thrus

2024-07-03 12:44:46,090 - INFO - Compilación exitosa.         
2024-07-03 12:44:46,091 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0.085, 0.0, 0.0], "score": 0.121}, {"time": 2, "action": 0, "current status": [0.015, 1.421, 0.38, 0.077, -0.017, -0.085, 0.0, 0.0], "score": 0.013}, {"time": 3, "action": 0, "current status": [0.019, 1.423, 0.38, 0.051, -0.021, -0.085, 0.0, 0.0], "score": -0.104}, {"time": 4, "action": 0, "current status": [0.023, 1.423, 0.38, 0.024, -0.026, -0.085, 0.0, 0.0], "score": -0.226}, {"time": 5, "action": 0, "current status": [0.026, 1.423, 0.38, -0.003, -0.03, -0.085, 0.0, 0.0], "score": -0.353}, {"time": 6, "action": 0, "current status": [0.03, 1.422, 0.38, -0.029, -0.034, -0.085, 0.0, 0.0], "score": -0.481}, {"time": 7, "act

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 126. Tamaño de logs: 73
episode  1 score -405.068 avg score -3.241


2024-07-03 12:44:46,361 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/messages "HTTP/1.1 200 OK"
2024-07-03 12:44:46,366 - INFO - Asistente: asst_IWiXcKSkXWwTtBvguf9N1XGP
2024-07-03 12:44:47,116 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_IWiXcKSkXWwTtBvguf9N1XGP "HTTP/1.1 200 OK"
2024-07-03 12:44:47,694 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs "HTTP/1.1 200 OK"
2024-07-03 12:44:47,918 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs/run_h59xofczsv9SMnT75taXFmzG "HTTP/1.1 200 OK"
2024-07-03 12:44:47,922 - INFO - Status: queued
2024-07-03 12:44:48,204 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs/run_h59xofczsv9SMnT75taXFmzG "HTTP/1.1 200 OK"
2024-07-03 12:44:58,207 - INFO - Status: queued
2024-07-03 12:44:58,547 - INFO - HTTP Request: GET https://api.openai.

assistant: [TextContentBlock(text=Text(annotations=[], value='{\n  "reasoning": "After analyzing both codes and their respective performances, it is evident that the best-scoring code leverages the observation values more effectively to make decisions. The priorities include managing both horizontal (X) and vertical (Y) velocities, correcting the angle, and ensuring smooth landing by reducing velocities. This example shows that the spacecraft needs constant adjustments in both horizontal and vertical movements and angle orientation to facilitate a successful landing, considering critical conditions like velocities and angles.\\n\\nTo improve the performance of the spacecraft landing, the following incremental improvements can be implemented:\\n1. **Horizontal Movement Correction**: Adjust horizontal displacement by controlling the side engines, ensuring the X position is closer to 0 with minimal X velocity.\\n2. **Vertical Movement and Descent Control**: Regulate Y velocity by using bo

2024-07-03 12:45:12,914 - INFO - Compilación exitosa.        
2024-07-03 12:45:12,914 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 1, "current status": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], "score": 1.473}, {"time": 3, "action": 1, "current status": [0.018, 1.423, 0.341, 0.051, -0.002, 0.072, 0.0, 0.0], "score": 1.809}, {"time": 4, "action": 1, "current status": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], "score": 1.038}, {"time": 5, "action": 1, "current status": [0.025, 1.423, 0.323, -0.003, 0.011, 0.146, 0.0, 0.0], "score": 0.221}, {"time": 6, "action": 1, "current status": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], "score": 0.039}, {"time": 7, "action": 1,

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 90. Tamaño de logs: 55
episode  1 score -233.267 avg score -2.621


2024-07-03 12:45:13,185 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/messages "HTTP/1.1 200 OK"
2024-07-03 12:45:13,186 - INFO - Mejor resultado: Asistentente asst_IWiXcKSkXWwTtBvguf9N1XGP con puntuación -233.267
2024-07-03 12:45:13,433 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA/messages "HTTP/1.1 200 OK"
2024-07-03 12:45:13,726 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/messages "HTTP/1.1 200 OK"
2024-07-03 12:45:13,727 - INFO - Iteration: 3
2024-07-03 12:45:13,729 - INFO - Asistente: asst_g80WvbiVksILFWSWqlrjR0oD
2024-07-03 12:45:14,000 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_g80WvbiVksILFWSWqlrjR0oD "HTTP/1.1 200 OK"
2024-07-03 12:45:14,415 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA/runs "HTTP/1.1 200 OK"
2024-07-03 12:45:14,632 - INFO - HTTP Request: GET 

assistant: [TextContentBlock(text=Text(annotations=[], value='{"reasoning":"The latest code which achieved a score of -233.267 allows us to review its strategy:\\n\\n- The code focuses on correcting the horizontal position and velocity immediately if any threshold is crossed.\\n- Then, it checks for vertical velocity and ensures upward thrust if the spacecraft is descending too quickly.\\n- Lastly, the code focuses on correcting the angle, allowing action to be taken if even the angle or angular velocity exceeds thresholds.\\n- The conditions are ordered to prioritize horizontal stability, followed by vertical descent control, and finally angle correction. \\n\\nFrom the given results, it\'s evident that prioritizing horizontal adjustment initially, followed by vertical control and angle correction, leads to a more stable descent trajectory towards landing. \\n\\nLet\'s combine these identified strategies correctly to improve our code.\\n\\nUPDATED CODE:", "code":"import numpy as np\\n

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



2024-07-03 12:45:51,603 - INFO - Compilación exitosa.        
2024-07-03 12:45:51,603 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 1, "current status": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], "score": 1.473}, {"time": 3, "action": 1, "current status": [0.018, 1.423, 0.341, 0.051, -0.002, 0.072, 0.0, 0.0], "score": 1.809}, {"time": 4, "action": 1, "current status": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], "score": 1.038}, {"time": 5, "action": 1, "current status": [0.025, 1.423, 0.323, -0.003, 0.011, 0.146, 0.0, 0.0], "score": 0.221}, {"time": 6, "action": 1, "current status": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], "score": 0.039}, {"time": 7, "action": 1,

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 90. Tamaño de logs: 55
episode  1 score -233.267 avg score -2.621


2024-07-03 12:45:51,865 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA/messages "HTTP/1.1 200 OK"
2024-07-03 12:45:51,867 - INFO - Asistente: asst_1qm0ClDWY6sldqhJdjf5xafQ
2024-07-03 12:45:52,098 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_1qm0ClDWY6sldqhJdjf5xafQ "HTTP/1.1 200 OK"
2024-07-03 12:45:52,514 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs "HTTP/1.1 200 OK"
2024-07-03 12:45:52,756 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs/run_SoC3aKAqvxQBGaj8uZVwhXla "HTTP/1.1 200 OK"
2024-07-03 12:45:52,759 - INFO - Status: queued
2024-07-03 12:45:52,986 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs/run_SoC3aKAqvxQBGaj8uZVwhXla "HTTP/1.1 200 OK"
2024-07-03 12:46:02,989 - INFO - Status: queued
2024-07-03 12:46:03,278 - INFO - HTTP Request: GET https://api.openai.

assistant: [TextContentBlock(text=Text(annotations=[], value='{"reasoning": "The performance of the spacecraft in the latest logs indicates multiple inefficiencies in handling the descent and maintaining stability of movement. Further analysis shows that continuous vertical and horizontal thrusting was not optimal. A comparison between the provided code and mine reveals key insights and opportunities for improvement.\\n\\n**Comparison and Issues:**\\n1. **Horizontal Control:** Both implementations handle horizontal control using positions and velocities, but the provided code had simpler checks (`> 0.1 or < -0.1`) compared to my use of excess velocities (`> 0.1 or < -0.1`). Incorporating both checks helps more accurately account for drift and position.\\n2. **Vertical Control:** The provided code more simply manages descent speed. My implementation overused action `1` and failed to control `Y` and `angle` as tightly, leading to a crash. Maintaining thresholds and logical checks by comb

2024-07-03 12:46:16,768 - INFO - Compilación exitosa.        
2024-07-03 12:46:16,769 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 1, "current status": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], "score": 1.473}, {"time": 3, "action": 1, "current status": [0.018, 1.423, 0.341, 0.051, -0.002, 0.072, 0.0, 0.0], "score": 1.809}, {"time": 4, "action": 1, "current status": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], "score": 1.038}, {"time": 5, "action": 1, "current status": [0.025, 1.423, 0.323, -0.003, 0.011, 0.146, 0.0, 0.0], "score": 0.221}, {"time": 6, "action": 1, "current status": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], "score": 0.039}, {"time": 7, "action": 1,

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 47
episode  1 score -612.950 avg score -8.513


2024-07-03 12:46:17,011 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/messages "HTTP/1.1 200 OK"
2024-07-03 12:46:17,013 - INFO - Asistente: asst_IWiXcKSkXWwTtBvguf9N1XGP
2024-07-03 12:46:17,234 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_IWiXcKSkXWwTtBvguf9N1XGP "HTTP/1.1 200 OK"
2024-07-03 12:46:17,618 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs "HTTP/1.1 200 OK"
2024-07-03 12:46:17,871 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs/run_dspnKUC0RQxIXSXwWn6vXYf3 "HTTP/1.1 200 OK"
2024-07-03 12:46:17,876 - INFO - Status: queued
2024-07-03 12:46:18,108 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs/run_dspnKUC0RQxIXSXwWn6vXYf3 "HTTP/1.1 200 OK"
2024-07-03 12:46:28,111 - INFO - Status: queued
2024-07-03 12:46:28,398 - INFO - HTTP Request: GET https://api.openai.

assistant: [TextContentBlock(text=Text(annotations=[], value='{\n  "reasoning": "The logs indicate the spacecraft experienced escalating issues primarily with horizontal displacement, angle mismanagement, and rapid accumulation of multiple penalizing conditions resulting in poor scoring and eventual crash. Here’s a detailed analysis guiding through a refined approach:\\n\\n1. **Horizontal Movement Control**: If horizontal movement veers too far off-center or operates at high velocity, employ side engines to rectify.\\n2. **Vertical Descent Management**: Prioritize decelerating descent speed if it\'s excessively high; use main engines sparingly to avoid fuel penalties.\\n3. **Angular Correction**: Ensure minimal angle around zero for stable descent, adjusting with side engines if deviations occur.\\n4. **General Stability**: Continuously maintain velocity and angular stability, freezing actions (engines off) if conditions stabilize locally.\\n5. **Contact-Landing Handling**: Upon contac

2024-07-03 12:46:52,206 - INFO - Compilación exitosa.        
2024-07-03 12:46:52,208 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 1, "current status": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], "score": 1.473}, {"time": 3, "action": 1, "current status": [0.018, 1.423, 0.341, 0.051, -0.002, 0.072, 0.0, 0.0], "score": 1.809}, {"time": 4, "action": 1, "current status": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], "score": 1.038}, {"time": 5, "action": 1, "current status": [0.025, 1.423, 0.323, -0.003, 0.011, 0.146, 0.0, 0.0], "score": 0.221}, {"time": 6, "action": 1, "current status": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], "score": 0.039}, {"time": 7, "action": 1,

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 47
episode  1 score -591.756 avg score -8.219


2024-07-03 12:46:52,459 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/messages "HTTP/1.1 200 OK"
2024-07-03 12:46:52,461 - INFO - Mejor resultado: Asistentente asst_g80WvbiVksILFWSWqlrjR0oD con puntuación -233.267
2024-07-03 12:46:52,724 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/messages "HTTP/1.1 200 OK"
2024-07-03 12:46:52,960 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/messages "HTTP/1.1 200 OK"
2024-07-03 12:46:52,962 - INFO - Iteration: 4
2024-07-03 12:46:52,962 - INFO - Asistente: asst_g80WvbiVksILFWSWqlrjR0oD
2024-07-03 12:46:53,217 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_g80WvbiVksILFWSWqlrjR0oD "HTTP/1.1 200 OK"
2024-07-03 12:46:53,824 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA/runs "HTTP/1.1 200 OK"
2024-07-03 12:46:54,044 - INFO - HTTP Request: GET 

assistant: [TextContentBlock(text=Text(annotations=[], value='{\n    "reasoning": "The highest-scoring code provided highlights several key decisions and modifications that guided the spacecraft with better results. Here is a detailed analysis of the main points and a revised version based on observations:\\n\\n1. **Immediate Horizontal Adjustment**: The code immediately adjusts for horizontal drift, either position or velocity exceeding certain thresholds. This is effective in keeping the spacecraft centered.\\n2. **Vertical Descent Control**: The vertical velocity is kept in check to prevent rapid descents that could lead to crashes. The action of pushing both engines upwards is correctly prioritized for reducing high negative Y velocities.\\n3. **Angle Stability**: The angle and angular velocity are controlled to ensure the spacecraft maintains a stable orientation, essential for a balanced descent.\\n4. **Reduced Oscillations**: By checking and responding to the conditions as they 

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



2024-07-03 12:47:30,138 - INFO - Compilación exitosa.        
2024-07-03 12:47:30,139 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 1, "current status": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], "score": 1.473}, {"time": 3, "action": 1, "current status": [0.018, 1.423, 0.341, 0.051, -0.002, 0.072, 0.0, 0.0], "score": 1.809}, {"time": 4, "action": 1, "current status": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], "score": 1.038}, {"time": 5, "action": 1, "current status": [0.025, 1.423, 0.323, -0.003, 0.011, 0.146, 0.0, 0.0], "score": 0.221}, {"time": 6, "action": 1, "current status": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], "score": 0.039}, {"time": 7, "action": 1,

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 90. Tamaño de logs: 55
episode  1 score -233.267 avg score -2.621


2024-07-03 12:47:30,354 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA/messages "HTTP/1.1 200 OK"
2024-07-03 12:47:30,356 - INFO - Asistente: asst_1qm0ClDWY6sldqhJdjf5xafQ
2024-07-03 12:47:30,544 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_1qm0ClDWY6sldqhJdjf5xafQ "HTTP/1.1 200 OK"
2024-07-03 12:47:30,946 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs "HTTP/1.1 200 OK"
2024-07-03 12:47:31,168 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs/run_riMCUNmdGbxONJ12rRv9bAiS "HTTP/1.1 200 OK"
2024-07-03 12:47:31,171 - INFO - Status: queued
2024-07-03 12:47:31,380 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs/run_riMCUNmdGbxONJ12rRv9bAiS "HTTP/1.1 200 OK"
2024-07-03 12:47:41,383 - INFO - Status: queued
2024-07-03 12:47:41,655 - INFO - HTTP Request: GET https://api.openai.

assistant: [TextContentBlock(text=Text(annotations=[], value='{"reasoning": "Let\'s carefully analyze logs and comparisons to generate a comprehensive understanding and guide code optimization toward performing well as expected.\\n\\n### Analysis of the Provided Code\\n**Key Strengths: **\\n1. **Consistent Checking:** The constants and structured observation indices enhance readability, ensuring the thresholds are applied efficiently without deviations or misinterpretations.\\n2. **Action Prioritization:** The code priorities are clear – by first focusing on horizontal and vertical positional/stability maneuvers and maintaining descent, it emphasizes control at key areas.\\n3. **Simplified Implementations:** By employing straightforward checks (`> 0.1 or < -0.1`), it avoids unnecessary complexity in adjustments, showing clarity in critical decision triggers.\\n\\n### Comparative Analysis and Potential Improvements\\n**Observations on Differences:**\\n1. **Horizontal Adjustments & Angle

2024-07-03 12:48:05,410 - INFO - Compilación exitosa.        
2024-07-03 12:48:05,410 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 1, "current status": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], "score": 1.473}, {"time": 3, "action": 1, "current status": [0.018, 1.423, 0.341, 0.051, -0.002, 0.072, 0.0, 0.0], "score": 1.809}, {"time": 4, "action": 1, "current status": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], "score": 1.038}, {"time": 5, "action": 1, "current status": [0.025, 1.423, 0.323, -0.003, 0.011, 0.146, 0.0, 0.0], "score": 0.221}, {"time": 6, "action": 1, "current status": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], "score": 0.039}, {"time": 7, "action": 1,

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 90. Tamaño de logs: 55
episode  1 score -233.267 avg score -2.621


2024-07-03 12:48:05,674 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/messages "HTTP/1.1 200 OK"
2024-07-03 12:48:05,676 - INFO - Asistente: asst_IWiXcKSkXWwTtBvguf9N1XGP
2024-07-03 12:48:05,889 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_IWiXcKSkXWwTtBvguf9N1XGP "HTTP/1.1 200 OK"
2024-07-03 12:48:06,319 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs "HTTP/1.1 200 OK"
2024-07-03 12:48:06,539 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs/run_W7Oi1O6GaGikXrKrvMWEuxU3 "HTTP/1.1 200 OK"
2024-07-03 12:48:06,540 - INFO - Status: queued
2024-07-03 12:48:06,810 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs/run_W7Oi1O6GaGikXrKrvMWEuxU3 "HTTP/1.1 200 OK"
2024-07-03 12:48:16,814 - INFO - Status: queued
2024-07-03 12:48:17,100 - INFO - HTTP Request: GET https://api.openai.

assistant: [TextContentBlock(text=Text(annotations=[], value='{\n  "reasoning": "After comparing the provided best result code with the previous attempt, the crucial differences are noted in structured parameter thresholds and a clear separation of decision logic for prioritizing stabilizations. Adjustments targeting distinct parameter observations and specific conditions ensure explicit control over horizontal, vertical, and angular dynamics. The current state needs adjustments to integrate lessons learned, ensuring responsive controls aligning with best practices.\\n\\nKey Adjustments Include:\\n1. **Explicit Threshold Definitions**: Incorporate constants for often-used values to streamline the threshold adjustments as required.\\n2. **Index Labels for Observations**: Define clear indices mapping observation arrays for improved maintainability and readability.\\n3. **Prioritized Condition Handling**: Sequentially address horizontal and vertical motions followed by angular corrections

2024-07-03 12:49:01,823 - INFO - Compilación exitosa.        
2024-07-03 12:49:01,824 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 1, "current status": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], "score": 1.473}, {"time": 3, "action": 1, "current status": [0.018, 1.423, 0.341, 0.051, -0.002, 0.072, 0.0, 0.0], "score": 1.809}, {"time": 4, "action": 1, "current status": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], "score": 1.038}, {"time": 5, "action": 1, "current status": [0.025, 1.423, 0.323, -0.003, 0.011, 0.146, 0.0, 0.0], "score": 0.221}, {"time": 6, "action": 1, "current status": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], "score": 0.039}, {"time": 7, "action": 1,

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 90. Tamaño de logs: 55
episode  1 score -233.267 avg score -2.621


2024-07-03 12:49:02,132 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/messages "HTTP/1.1 200 OK"
2024-07-03 12:49:02,136 - INFO - Mejor resultado: Asistentente asst_g80WvbiVksILFWSWqlrjR0oD con puntuación -233.267
2024-07-03 12:49:02,360 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/messages "HTTP/1.1 200 OK"
2024-07-03 12:49:02,590 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/messages "HTTP/1.1 200 OK"
2024-07-03 12:49:02,591 - INFO - Iteration: 5
2024-07-03 12:49:02,592 - INFO - Asistente: asst_g80WvbiVksILFWSWqlrjR0oD
2024-07-03 12:49:02,812 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_g80WvbiVksILFWSWqlrjR0oD "HTTP/1.1 200 OK"
2024-07-03 12:49:03,239 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA/runs "HTTP/1.1 200 OK"
2024-07-03 12:49:03,461 - INFO - HTTP Request: GET 

assistant: [TextContentBlock(text=Text(annotations=[], value='{\n  "reasoning": "The latest performance with a total score of -233.267 indicates that the spacecraft crashes despite making efforts to control horizontal, vertical, and angular movements progressively.\\n\\nAnalyzing crashes and performance, the spacecraft’s consistent horizontal control at the start leads to better scores, but it fails to handle the vertical descent and angular stability effectively, leading to crashes.\\n\\n1. **Too Many Repeated Left Engine Pushes**: We have repeated pushes from the left engine which needed gradual progressions.\\n2. **Angular Control**: The angular adjustments came too late and persisted when unnecessary, causing compounding errors.\\n3. **Vertical Stability Mismanagement**: Both engine offs and upward pushes didn\'t fit effective progression, which worsened vertical handling.\\n\\n### Revised Strategy and Code\\n- Start with immediate adjustments for horizontal, followed by vertical, 

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4



2024-07-03 12:49:27,532 - INFO - Compilación exitosa.        
2024-07-03 12:49:27,533 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0.085, 0.0, 0.0], "score": 0.121}, {"time": 2, "action": 0, "current status": [0.015, 1.421, 0.38, 0.077, -0.017, -0.085, 0.0, 0.0], "score": 0.013}, {"time": 3, "action": 0, "current status": [0.019, 1.423, 0.38, 0.051, -0.021, -0.085, 0.0, 0.0], "score": -0.104}, {"time": 4, "action": 0, "current status": [0.023, 1.423, 0.38, 0.024, -0.026, -0.085, 0.0, 0.0], "score": -0.226}, {"time": 5, "action": 0, "current status": [0.026, 1.423, 0.38, -0.003, -0.03, -0.085, 0.0, 0.0], "score": -0.353}, {"time": 6, "action": 0, "current status": [0.03, 1.422, 0.38, -0.029, -0.034, -0.085, 0.0, 0.0], "score": -0.481}, {"time": 7, "acti

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 89. Tamaño de logs: 55
episode  1 score -486.602 avg score -5.530


2024-07-03 12:49:27,800 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA/messages "HTTP/1.1 200 OK"
2024-07-03 12:49:27,801 - INFO - Asistente: asst_1qm0ClDWY6sldqhJdjf5xafQ
2024-07-03 12:49:28,008 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_1qm0ClDWY6sldqhJdjf5xafQ "HTTP/1.1 200 OK"
2024-07-03 12:49:28,477 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs "HTTP/1.1 200 OK"
2024-07-03 12:49:28,715 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs/run_bfGz4jIwyGU7MfgGyhUMioXm "HTTP/1.1 200 OK"
2024-07-03 12:49:28,728 - INFO - Status: queued
2024-07-03 12:49:28,942 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/runs/run_bfGz4jIwyGU7MfgGyhUMioXm "HTTP/1.1 200 OK"
2024-07-03 12:49:38,947 - INFO - Status: queued
2024-07-03 12:49:39,224 - INFO - HTTP Request: GET https://api.openai.

assistant: [TextContentBlock(text=Text(annotations=[], value='{"reasoning": "Analyzing both the provided `best result` code and the latest implementation reveals several patterns and behaviors impacting the spacecraft’s descent and stability. To improve upon our approach, it’s crucial to identify what worked well and where it failed.\\n\\nIn the logs generated by the recent iterations, the spacecraft starts with correct maneuvers involving core vertical and horizontal adjustments but eventually becomes overly reliant on a single action, particularly pushing the left engine (Action 1). This causes the spacecraft to spiral out of control as it heavily tilts to one side and consistently fires to unstable angular movements in one direction.\\n\\nSteps to Improve:\\n1. **Balanced Action Deployment:** Alternate and balance engine thrusts to avoid overcompensation or a single directional tilt. Improve responsiveness while avoiding long sequences of similar actions.\\n2. **Decelerative vs. Cor

2024-07-03 12:50:03,583 - INFO - Compilación exitosa.         
2024-07-03 12:50:03,583 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 1, "current status": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], "score": 1.473}, {"time": 3, "action": 1, "current status": [0.018, 1.423, 0.341, 0.051, -0.002, 0.072, 0.0, 0.0], "score": 1.809}, {"time": 4, "action": 1, "current status": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], "score": 1.038}, {"time": 5, "action": 1, "current status": [0.025, 1.423, 0.323, -0.003, 0.011, 0.146, 0.0, 0.0], "score": 0.221}, {"time": 6, "action": 1, "current status": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], "score": 0.039}, {"time": 7, "action": 1

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 135. Tamaño de logs: 78
episode  1 score -46.801 avg score -0.349


2024-07-03 12:50:03,804 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr/messages "HTTP/1.1 200 OK"
2024-07-03 12:50:03,805 - INFO - Asistente: asst_IWiXcKSkXWwTtBvguf9N1XGP
2024-07-03 12:50:04,159 - INFO - HTTP Request: GET https://api.openai.com/v1/assistants/asst_IWiXcKSkXWwTtBvguf9N1XGP "HTTP/1.1 200 OK"
2024-07-03 12:50:04,588 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs "HTTP/1.1 200 OK"
2024-07-03 12:50:04,822 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs/run_nYtzuFTJVKCNgRXYRt1QBrR9 "HTTP/1.1 200 OK"
2024-07-03 12:50:04,824 - INFO - Status: queued
2024-07-03 12:50:05,044 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/runs/run_nYtzuFTJVKCNgRXYRt1QBrR9 "HTTP/1.1 200 OK"
2024-07-03 12:50:15,047 - INFO - Status: queued
2024-07-03 12:50:15,313 - INFO - HTTP Request: GET https://api.openai.

assistant: [TextContentBlock(text=Text(annotations=[], value='{\n  "reasoning": "Upon comparison, it is clear that the best-performing code effectively sets thresholds and explicitly handles each observation. The specific thresholds and observation indices greatly aid in maintaining the control logic. The key takeaway is to manage control efficiency by handling X and Y movements, angle, and angular velocities separately and prioritize their corrections. The concise and clear logic structure helps in maintaining stability during descent and landing.\\n\\nThe main areas of improvement for the act function involve:\\n1. Managing horizontal movement effectively by adjusting to required thrust based on X position and velocity.\\n2. Controlling vertical velocity to manage descent speed effectively, especially under fast falling conditions.\\n3. Correcting the angle and minimizing angular velocity for a stable descent.\\n4. Implementing structured condition checks that sequentially handle the

2024-07-03 12:50:39,247 - INFO - Compilación exitosa.        
2024-07-03 12:50:39,248 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 1, "current status": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], "score": 1.473}, {"time": 3, "action": 1, "current status": [0.018, 1.423, 0.341, 0.051, -0.002, 0.072, 0.0, 0.0], "score": 1.809}, {"time": 4, "action": 1, "current status": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], "score": 1.038}, {"time": 5, "action": 1, "current status": [0.025, 1.423, 0.323, -0.003, 0.011, 0.146, 0.0, 0.0], "score": 0.221}, {"time": 6, "action": 1, "current status": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], "score": 0.039}, {"time": 7, "action": 1,

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 90. Tamaño de logs: 55
episode  1 score -233.267 avg score -2.621


2024-07-03 12:50:39,465 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/messages "HTTP/1.1 200 OK"
2024-07-03 12:50:39,466 - INFO - Mejor resultado: Asistentente asst_1qm0ClDWY6sldqhJdjf5xafQ con puntuación -46.801
2024-07-03 12:50:39,700 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA/messages "HTTP/1.1 200 OK"
2024-07-03 12:50:39,943 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/messages "HTTP/1.1 200 OK"
2024-07-03 12:50:40,152 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8/messages "HTTP/1.1 200 OK"


user: [TextContentBlock(text=Text(annotations=[], value='The best result for the population was -46.801 achieved with the following code: import numpy as np\n\n# Define constants for easier adjustments\nHORIZONTAL_THRESHOLD = 0.1\nVELOCITY_THRESHOLD = 0.1\nVERTICAL_VELOCITY_THRESHOLD = -0.3\nANGLE_THRESHOLD = 0.1\n\n# Indices for observation for clarity\nX_POS = 0\nY_POS = 1\nX_VEL = 2\nY_VEL = 3\nANGLE = 4\nANG_VEL = 5\nLEFT_CONTACT = 6\nRIGHT_CONTACT = 7\n\n\ndef act(observation):\n    \'\'\'\n    Function determining the required action for optimal landing.\n\n    Args:\n        observation (numpy.array):\n            - Description: State of the environment after the action is taken.\n            - Positions: {  \n                "0": X position,\n                "1": Y position,\n                "2": X velocity,\n                "3": Y velocity,\n                "4": Angle,\n                "5": Angular velocity,\n                "6": Left contact sensor,\n                "7": Righ

2024-07-03 12:50:40,811 - INFO - HTTP Request: DELETE https://api.openai.com/v1/assistants/asst_g80WvbiVksILFWSWqlrjR0oD "HTTP/1.1 200 OK"
2024-07-03 12:50:41,012 - INFO - HTTP Request: DELETE https://api.openai.com/v1/assistants/asst_1qm0ClDWY6sldqhJdjf5xafQ "HTTP/1.1 200 OK"
2024-07-03 12:50:41,239 - INFO - HTTP Request: DELETE https://api.openai.com/v1/assistants/asst_IWiXcKSkXWwTtBvguf9N1XGP "HTTP/1.1 200 OK"
2024-07-03 12:50:41,555 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_6lXS5bEJWx9DfAyfgXLrgrLA "HTTP/1.1 200 OK"
2024-07-03 12:50:41,792 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_OyFKkBI3jwxZSukSkfxlteFr "HTTP/1.1 200 OK"
2024-07-03 12:50:42,042 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_XEJdeQ1OtT6lVxpNjoNGctL8 "HTTP/1.1 200 OK"
2024-07-03 12:50:42,043 - INFO - 
Ejecución finalizada.




Asistente vaciado.
Run(id='run_nYtzuFTJVKCNgRXYRt1QBrR9', assistant_id='asst_IWiXcKSkXWwTtBvguf9N1XGP', cancelled_at=None, completed_at=1720003823, created_at=1720003804, expires_at=None, failed_at=None, incomplete_details=None, instructions='To complete the task you must follow the following steps and indications:\n1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.\n2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:\n    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).\n    - Increased/decreased the slower/faster the spacecraft is moving.\n    - Decreased the more the spacecraft is tilted.\n    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.\n    - Receive 100 points 

#### Function calling.

In [None]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    dic_population = init_population(agente, model, Incial_msg, logger, mode='fc', N=3)

    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False
        resultados = [1, float("-inf")]
        # Ejecuta los asistentes al mismo tiempo.
        for asistente_id in dic_population.keys():
            logger.info(f"Asistente: {asistente_id}")
            hilo, msg = dic_population[asistente_id]
            asistente = agente.get_assistant(asistente_id)
            compiled = False
            
            # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
            while not compiled:
                # Ejecuta el hilo
                ejecucion = agente.run(hilo.id, asistente.id)
                response = agente.get_run(ejecucion.id, hilo.id)

                # Esperamos a que la respuesta esté lista.
                while response.status not in ["completed", "failed", "requires_action"]:
                    logger.info(f"Status: {ejecucion.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(20)

                # Descomponemos los elementos de la respuesta.
                logger.info(f"Status: {response.status}")
                if response.status == "completed":
                    return response
                tool_call = response.required_action.submit_tool_outputs.tool_calls
                print(f"Tool call: {tool_call}")

                # Convierte el string a un diccionario
                code_dict = json.loads(tool_call[0].function.arguments)
                logger.info(f"Arguments: {code_dict}")
                
                # Obtén el código Python de la llamada a la herramienta
                code = code_dict["code"]
                filename = code_dict["filename"]  
                reasoning = code_dict["reasoning"]

                logger.info(f"\nRazonamiento:\n{reasoning}")
                logger.info(f"\nCodigo generado:\n{code}")

                # Ejecuta el código Python
                try:
                    store_code_in_file(reasoning, code, filename)
                    time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                    
                    # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                    commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"4o genetic FC. Iteración {i+1}.") 
                    
                    importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                    # Ejecutamos el código generado.
                    logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/Asitente_{asistente_id}/iteration_{i+1}.mp4")
                    json_logs = json.loads(logs)
                    
                    if float(json_logs['total score']) > resultados[1]:
                        resultados = [asistente_id, float(json_logs["total score"]), code]
                    resultados.append(json_logs["total score"])
                    
                    # Devolvemos la respuesta al asistente.
                    for call in tool_call:
                        agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                    compiled = True
                    logger.info(f"Compilación exitosa.")
                    
                    # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                    while response.status not in ["completed", "failed", "expired"]:
                        logger.info(f"Status: {response.status}")
                        response = agente.get_run(response.id, hilo.id)
                        time.sleep(20)
                             
                    compiled = True
                    logger.info(f"Compilación exitosa.")
                    
                    msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success. Remember that the code is a reflection of your reasoning and the way you approach the problem."""
                    logger.info(msg)    
                    agente.add_message(hilo.id, role="user", content=msg)
                
                # Alimentamos el asistente con el error generado en la ejecución del código.    
                except Exception as e:
                    logger.exception("Error: %s", e)
                    error_trace = traceback.format_exc()
                    for call in tool_call:
                        agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                    logger.error(f"Error: {e}.")
                    while response.status not in ["completed", "failed", "expired"]:
                        logger.info(f"Status: {response.status}")
                        response = agente.get_run(ejecucion.id, hilo.id)
                        time.sleep(30)
                    msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                    logger.error(msg)   
                    agente.add_message(hilo.id, role="assistant", content=msg)
        logger.info(f"Mejor resultado: Asistentente {resultados[0]} con puntuación {resultados[1]}")
        # Después de determinar el asistente con la máxima puntuación y antes de finalizar el bucle de iteraciones
        for asistente_id in dic_population.keys():
            if asistente_id != resultados[0]:  # Verifica que no sea el asistente con la máxima puntuación
                hilo, _ = dic_population[asistente_id]
                # Aquí puedes añadir el resultado al asistente que no tiene la máxima puntuación
                # Por ejemplo, suponiendo que tienes una función para añadir mensajes o resultados:
                msg_resultado = f"The best result for the population was {resultados[1]} achieved with the following code: {resultados[2]}. \nCompare it with yours and draw conclusions. Re-generate the code step by step and think deeply to improve both results."
                agente.add_message(hilo.id, role="user", content=msg_resultado)
        
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")

In [None]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Provide output in valid JSON format. The data schema should be as follows:  {json.dumps(json_shcema_response)}.
6. Generate the code of the act function in the file 'Action.py'.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?. Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_genetic_FC.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-3.5-turbo", num_iterations=5)

print(response)

## Tasks
- Corregir rotación.
- Cantidad de logs.
- Partir de ejemplo exitoso para afinarlo. (y generalizar)
- Incluir take step back.
- Probar con 2 modelos. Uno encargado del razonamiento y otro de codificar. (O uno que retroalimente al otro con las conclusiones obtenidas)