#### Imports

In [1]:
import time
from openai import OpenAI
from Assistant import AssistantOpenAI
import logging
import os
import json
import gymnasium as gym
import random
import matplotlib.pyplot as plt
import os
import time
from git import Repo

import numpy as np
#from collections import deque, namedtuple

# For visualization
import gymnasium.wrappers.record_video as record_video

#### Parameters

In [2]:
num_episodes = 200
ENV_NAME = 'LunarLander-v2'
ARCLABKEY_OPENAI = "sk-proj-DvHDR3hpgbm2r3kCA9jKT3BlbkFJL57ABXkfaWAIYKBxdhM6"
ARCLABKEY_OPENAI = "sk-proj-GvaDXazpibWA2M1I5Pu2T3BlbkFJxDuKlr9AcoVG98ctJZ7Q"

#### Function calling.

Función encargada de almacenar el código generado mediante la opción de function calling del asistente de OpenAI.

In [3]:
def store_code_in_file(code, filename):
    """ Store code in a file
    
    Args:
        code: str: code to store
        filename: str: filename to store code in
    
    Returns:
        None
    """
    with open(filename, 'w') as f:
        f.write(code)
        
store_code_in_file_schema = {
    "name": "store_code_in_file",
    "description": "Store code in a file",
    "parameters": {
        "type": "object",
        "properties": {
            "code": {
                "type": "string",
                "description": "The Python code to store."
            },
            "filename": {
                "type": "string",
                "description": "The filename to store the code in."
            }
        },
        "required": ["code", "filename"]
    }
}

TOOLS = [{"type": "function", "function": store_code_in_file_schema}]

### Entorno Lunnar Lander

In [4]:
env = gym.make('LunarLander-v2')
print('State shape: ', env.observation_space.shape)
print('Number of actions: ', env.action_space.n)

State shape:  (8,)
Number of actions:  4


In [5]:
agent_initial_code = """
import random

def act(observation):
    '''
    The function that codifies the action to be taken in each instant of time.

    Args:
        observation (numpy.array):
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": "X position",
                "1": "Y position",
                "2": "X velocity",
                "3": "Y velocity",
                "4": "Angle",
                "5": "Angular velocity",
                "6": "Left contact sensor",
                "7": "Right contact sensor"
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]

    Returns:
        Integer  : The action to be taken.
        "options": {
                '0' : "Switch off engines",
                '1' : "Push left engine",
                '2' : "Push both engines (upwards)",
                '3' : "Push right engine"
            }
    '''
    return random.randint(0, 3)
"""

#### Mapeador de logs a JSON

In [6]:
log_description = {
    "description": "Log data for each step of the spacecraft landing environment.",
    "landing attempt": {
        "type": "integer",
        "description": "The episode number."
    },
    "logs": {
        "time": {
            "type": "integer",
            "description": "The instant within the landing attempt where the current log is taken."
        },
        "action": {
            "type": "integer",
            "description": "The possible actions taken.",
            "options": {
                '0' : "Switch off engines",
                '1' : "Push left engine",
                '2' : "Push both engines (upwards)",
                '3' : "Push right engine"
            }
        },  
        "current status": {
            "type": "array",
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": {
                    "name": "X position",
                    "description": "The horizontal position of the spacecraft relative to the landing zone."
                },
                "1": {
                    "name": "Y position",
                    "description": "The vertical position of the spacecraft relative to the landing zone."
                },
                "2": {
                    "name": "X velocity",
                    "description": "The horizontal velocity of the spacecraft."
                },
                "3": {
                    "name": "Y velocity",
                    "description": "The vertical velocity of the spacecraft."
                },
                "4": {
                    "name": "Angle",
                    "description": "The angle of the spacecraft relative to the vertical (left negative, right positive)."
                },
                "5": {
                    "name": "Angular velocity",
                    "description": "The rate of change of the angle of the spacecraft."
                },
                "6": {
                    "name": "Left contact sensor with landing zone",
                    "description": "Indicates whether the left side of the spacecraft is in contact with the landing zone."
                },
                "7": {
                    "name": "Right contact sensor with landing zone",
                    "description": "Indicates whether the right side of the spacecraft is in contact with the landing zone."
                }
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1],
            
        }, 
        "score": {
            "type": "number",
            "description": "The score received for the action."
        },  
        "completed": {
            "type": "boolean",
            "description": "Whether the landing event has ended (landing or accident)."
        }
    },
    "total score": {
        "type": "number",
        "description": "The total score received for the landing attempt."
    }
}

### UTILS

In [7]:
import json

def format_step_data(instant, action, next_state, reward, terminated):
    """ Format the step data into a JSON string
    
    Args:
        instant: int: the instant within the landing attempt where the current log is taken
        action: int: the action taken
        next_state: np.array: the next state of the environment
        reward: float: the reward received
        terminated: bool: whether the landing event has ended
        
    Returns:
        str: the step data formatted as a JSON string
    """
    # Convertir el array numpy a una lista
    next_state_list = next_state.tolist()

    # Redondear los elementos de la lista a 4 decimales
    next_state_list_rounded = [round(x, 3) for x in next_state_list]

    step_data = {
        'time': instant,
        'action' : int(action),
        'current status': next_state_list_rounded,  # Convert numpy array to list
        'score': round(reward, 3),
        #'completed': terminated,
        #'truncated': truncated
        #'info': info
    }

    # Convert the dictionary to a JSON string
    #step_data_json = json.dumps(step_data)

    return step_data


def format_episode_logs(logs, episode, total_score):
    """ Format the logs into a JSON string
    
    Args:
        logs: list: the logs for each step of the environment
        episode: int: the episode number
        
    Returns:
        str: the logs formatted as a JSON string
    """
    logs_data = {
        'landing attempt': episode,
        'logs': logs,
        'total score': round(total_score, 3)
    }

    # Convert the dictionary to a JSON string
    logs_json = json.dumps(logs_data)

    return logs_json

In [8]:
def commit_changes(repo_path, commit_message):
    """ Commit changes to the repository.

    Args:
        repo_path (str): Path to the repository.
        commit_message (str): The commit message.
    """
    repo = Repo(repo_path)
    repo.git.add(update=True)
    repo.index.commit(commit_message)
    origin = repo.remote(name='origin')
    origin.push()

#### Código encargado de ejecutar los eventos en el entorno.

In [9]:
import Action

def lunar_lander(max_t=1000, n_games=1, display=False, seed=38, agent=None, recoder=False, video_filename='video/video.mp4'):
    """ Run the Lunar Lander environment
    
    Args:
        max_t: int: the maximum number of timesteps
        n_games: int: the number of games to play
        display: bool: whether to display the environment
        seed: int: the seed for the environment
        agent: object: the agent to use
        recoder: bool: whether to record the video
        video_filename: str: the filename for the video
        
    Returns:
        str: the logs for the landing attempts
    """
    
    # Incluimos la opción del entorno gráfico y la de grabación.
    if display:
        env = gym.make(ENV_NAME, render_mode='human')
        if recoder:
            env = gym.make(ENV_NAME, render_mode='rgb_array')
            env.reset() 
            video_recorder = record_video.RecordVideo(env, video_filename)
    else:
        env = gym.make(ENV_NAME)

    # Bucle principal de ejecución de los episodios.
    logs = []
    for episode in range(1, n_games+1):
        # Revisamos que la semilla sea un número entero o una lista de semillas.
        if isinstance(seed, list):
            semilla = seed[episode-1]
        else:
            semilla = seed
        print(f"Semilla {semilla}")    
        state = env.reset(seed=semilla) # Set a seed for the environment
        state = state[0] # Eliminamos el diccionario vacio y dejamos unicamente el estado de 8 elementos.
        score = 0
        instant = 0
        episode_actions = []
        if recoder: # En caso de que se quiera grabar el video, se inicia el grabador.
            video_recorder.start_video_recorder()
            
        for i in range(max_t): # Bucle de ejecución de los instantes de tiempo.
            
            # Seleccionamos el método de elegir la acción (agente exitoso o método de decisión del asistente)
            if agent: 
                action = agent.act(state)
            else:
                action = Action.act(state)
            
            # Avanzamos un instante de tiempo en el entorno en función de si se quiere grabar el video o no.
            if recoder:
                next_state, reward, terminated, truncated, info = video_recorder.step(action)
            else:
                next_state, reward, terminated, truncated, info = env.step(action)
            
            # Formateamos los datos del instante de tiempo y los alcenamos.
            json_logs = format_step_data(instant, action, next_state, reward, terminated)
            score += reward
            instant += 1
            if len(episode_actions) < 20 or len(episode_actions) % 2 == 0 or terminated:
                logs.append(json_logs)
            #logs.append(json_logs)
            episode_actions.append(action)
            state = next_state
            if terminated: # Condición de salida del bucle, si el episodio ha terminado.
                break
        json_episode_logs = format_episode_logs(logs, episode, score) 
        
        print(f"Número de instantes: {instant+1}. Tamaño de logs: {len(logs)}")
        print('episode ', episode, 'score %.3f' % float(score), 'avg score %.3f' % (float(score) / instant))
        
    if recoder:
        video_recorder.close()
    else:
        env.close()
        
    return json_episode_logs

#### Logs del código inicial

In [10]:
initial_logs = lunar_lander(n_games=1, display=True)
initial_logs

Semilla 38
Número de instantes: 102. Tamaño de logs: 61
episode  1 score -269.842 avg score -2.672


'{"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 3, "current status": [0.015, 1.421, 0.372, 0.077, -0.01, -0.053, 0.0, 0.0], "score": -0.85}, {"time": 3, "action": 2, "current status": [0.018, 1.424, 0.363, 0.103, -0.013, -0.064, 0.0, 0.0], "score": -0.525}, {"time": 4, "action": 2, "current status": [0.022, 1.426, 0.366, 0.106, -0.016, -0.061, 0.0, 0.0], "score": -1.259}, {"time": 5, "action": 3, "current status": [0.026, 1.428, 0.377, 0.079, -0.021, -0.104, 0.0, 0.0], "score": -1.137}, {"time": 6, "action": 2, "current status": [0.029, 1.43, 0.371, 0.092, -0.027, -0.112, 0.0, 0.0], "score": -0.744}, {"time": 7, "action": 2, "current status": [0.033, 1.433, 0.365, 0.125, -0.033, -0.119, 0.0, 0.0], "score": -1.597}, {"time": 8, "action": 3, "current stat

#### Logs exitosos
Utilizar otras semillas para que no memorice el entorno

In [11]:
import DuelingDQN

agent = DuelingDQN.Agent(num_observaciones=8, num_acciones=4, red_modelo=DuelingDQN.DuelingQNetwork, seed=0)
# Cargamos los pesos del agente entrenado.
agent.load_weights('checkpoint_Dueling.pth')

seeds = [130, 412]

success_logs = lunar_lander(n_games=len(seeds), display=True, seed=seeds, agent=agent)
success_logs

Semilla 130
Número de instantes: 242. Tamaño de logs: 131
episode  1 score 263.217 avg score 1.092
Semilla 412
Número de instantes: 242. Tamaño de logs: 262
episode  2 score 266.713 avg score 1.107


'{"landing attempt": 2, "logs": [{"time": 0, "action": 1, "current status": [0.006, 1.393, 0.31, -0.402, -0.005, -0.029, 0.0, 0.0], "score": -0.567}, {"time": 1, "action": 1, "current status": [0.009, 1.384, 0.301, -0.429, -0.005, 0.005, 0.0, 0.0], "score": -0.672}, {"time": 2, "action": 1, "current status": [0.012, 1.373, 0.291, -0.456, -0.003, 0.045, 0.0, 0.0], "score": -0.468}, {"time": 3, "action": 3, "current status": [0.015, 1.363, 0.303, -0.483, -0.003, -0.001, 0.0, 0.0], "score": -1.813}, {"time": 4, "action": 1, "current status": [0.018, 1.351, 0.291, -0.509, -0.0, 0.045, 0.0, 0.0], "score": -0.367}, {"time": 5, "action": 3, "current status": [0.021, 1.339, 0.299, -0.536, 0.0, 0.015, 0.0, 0.0], "score": -1.553}, {"time": 6, "action": 3, "current status": [0.024, 1.326, 0.308, -0.563, -0.001, -0.022, 0.0, 0.0], "score": -1.579}, {"time": 7, "action": 1, "current status": [0.027, 1.313, 0.3, -0.59, -0.0, 0.009, 0.0, 0.0], "score": -0.688}, {"time": 8, "action": 2, "current statu

#### Bucle iterativo

Sección principal del código encargada de conectar con el asistente de la API de OpenAI e iterar en la generación de código nuevo a partir de los registros del generado previamente.

In [12]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    
    # Crea un hilo
    hilo = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)

    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id, tool_choice='required')
            response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
            while response.status not in ["completed", "failed", "requires_action"]:
                logger.info(f"Status: {ejecucion.status}")
                response = agente.get_run(ejecucion.id, hilo.id)
                time.sleep(20)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  

            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"4o. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


#### Logger

In [13]:
def configura_log(nombre_archivo):
    """ 
    Configura el logger para que guarde los logs en un archivo y los muestre en la consola.
    
    Args:
        nombre_archivo: str Nombre del archivo donde se guardarán los logs.
        
    Returns:
        logger: logging.Logger Objeto logger configurado.
    """
    # Crear la carpeta logs si no existe
    if not os.path.exists('logs'):
        os.makedirs('logs')
    # Configura el logger
    logging.basicConfig(filename=f'logs/{nombre_archivo}', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger()
    
    # Añade un StreamHandler para mostrar los logs en la consola
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)
    
    return logger

### Ejecución con el asistente

Prompt inicial y mensajes del sistema para el asistente.

In [None]:
DESCRIPTION = "You are an expert programer in Pyhton. Your specialty is to generate the code responsible for making decisions about actions to be taken in various spacecraft landing environments.The objective is to land the spacecraft within a target zone in the shortest possible time and very gently. A scoring system is used to evaluate the landings, which must be maximized."
INSTRUCTIONS = f"""Your task is:
You have two modes of action depending on whether the use of function call is required. Reason deeply about what you are asked to do or code the requested task.
1. Analyze and reason about the logs received in the last landing attempts.
2. Your goal is to be able to make the correct decision based on what you have learned from the results of previous iterations. You must code the decision making based on your reasoning in a Python function.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (angle and location), keep falling under control and then land gently at the end.
    - It is mandatory to use all the elements of the array of observations received by parameter in your code when deciding what action to take at any given moment. Both position and velocities must be taken into account to know how the ship is doing and towards which states it is heading. All of these must be considered to achieve stability.
    - Learn how actions taken affect the future states of the spacecraft in the logs of past events so that you can take this into account when developing code to reach the landing zone.
    - The landing zone is in the central area of the x-axis.
    - Carefully choose the actions involving the lateral engines according to the rotation of the spacecraft.
4. You should analyze the performance that appear in the logs of the code you have generated. You should improve the code generated in the 'act' function in the last iteration without fear of making major changes, seeking to maximize the score received and generate a higher quality code.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. You have to be able to learn from it to land successfully with any other conditions. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}.Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_betterprompt.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-06-25 12:54:05,317 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-06-25 12:54:05,646 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-06-25 12:54:06,026 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_FjycrwXuowzQlfTMVUsY8mbR/messages "HTTP/1.1 200 OK"
2024-06-25 12:54:06,291 - INFO - Iteration: 1
2024-06-25 12:54:06,868 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_FjycrwXuowzQlfTMVUsY8mbR/runs "HTTP/1.1 200 OK"
2024-06-25 12:54:07,209 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_FjycrwXuowzQlfTMVUsY8mbR/runs/run_CdSA0VbpnhkVlAkUyMCR3kcT "HTTP/1.1 200 OK"
2024-06-25 12:54:07,216 - INFO - Status: queued
2024-06-25 12:54:07,427 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_FjycrwXuowzQlfTMVUsY8mbR/runs/run_CdSA0VbpnhkVlAkUyMCR3kcT "HTTP/1.1 200 OK"


KeyboardInterrupt: 

#### Prueba de cambio grande en el prompt.

In [34]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (speed, angle and location), keep falling under control and then land gently at the end.
    - Use all the elements of the vector of observations, all are relevant to make the right decision.
    - Pay close attention to successful events.
    - Environment and ship conditions may change but your code must be effective for all cases.
    - Find a balance in your landing policy to maximize your score (must exceed 200 points).
    - If you are rotating to the left (negative values), you should fire the left engine and vice versa. But always prioritizing a controlled speed with the main engine.
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}.Before coding take a deep breath and reason step-by-step.What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?"

logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-3.5-turbo", num_iterations=5)

print(response)

2024-06-26 22:05:26,232 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-06-26 22:05:26,232 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-06-26 22:05:26,425 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-06-26 22:05:26,425 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-06-26 22:05:26,702 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_oJDUYr2gw3it7bXZK7ld6jIQ/messages "HTTP/1.1 200 OK"
2024-06-26 22:05:26,702 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_oJDUYr2gw3it7bXZK7ld6jIQ/messages "HTTP/1.1 200 OK"
2024-06-26 22:05:26,846 - INFO - Iteration: 1
2024-06-26 22:05:26,846 - INFO - Iteration: 1
2024-06-26 22:05:27,366 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_oJDUYr2gw3it7bXZK7ld6jIQ/runs "HTTP/1.1 200 OK"
2024-06-26 22:05:27,366 - INFO - HTTP Request: POST htt

KeyboardInterrupt: 

#### Take a step back.
Abstraer primero el problema.

# Incluir supervisor encargado del stepback

In [14]:
def inicializar_agente(model, msg, description, instructions, name, tools):
    """ Inicializa un agente con un asistente y un hilo.
    
    Args:
        model: str: el modelo de lenguaje a utilizar
        msg: str: el mensaje inicial del hilo
        
    Returns:
        agente: AssistantOpenAI: el agente inicializado
        asistente: dict: el asistente creado
        hilo: dict: el hilo creado
    """
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)
    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=description, instructions=instructions, name=name, tools=tools)
    # Crea un hilo
    hilo = agente.create_thread()
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=msg)
    
    return agente, asistente, hilo


def run_message_assistant(agente, asistente, hilo, tool_choice="auto"):
    """ Ejecuta un mensaje en el asistente.
    
    Args:
        agente: AssistantOpenAI: el agente
        asistente: dict: el asistente
        hilo: dict: el hilo
        tool_choice: str: la elección de herramienta a utilizar
        
    Returns:
        ejecucion: dict: la ejecución del mensaje
        response: dict: la respuesta del asistente
    """
    # Ejecuta el hilo
    ejecucion = agente.run(hilo.id, asistente.id, tool_choice=tool_choice)
    response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
    while response.status not in ["completed", "failed", "requires_action"]:
        logger.info(f"Status: {ejecucion.status}")
        response = agente.get_run(ejecucion.id, hilo.id)
        time.sleep(20)
    return ejecucion, response


In [14]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, code_msg, logger, model="gpt-3.5-turbo", num_iterations=5):
    
    # Creamos el asistente.
    #agente, asistente, hilo = inicializar_agente(model, Incial_msg, DESCRIPTION, INSTRUCTIONS, NAME, TOOLS)
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)
    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    # Crea un hilo
    hilo = agente.create_thread()
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)
    
    # Primero ejecutamos el razonamiento de STEP BACK.
    #ejecucion, response = run_message_assistant(agent, asistente, hilo, tool_choice='none')
    # Ejecuta el hilo
    ejecucion = agente.run(hilo.id, asistente.id, tool_choice='none')
    response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
    while response.status not in ["completed", "failed", "requires_action"]:
        logger.info(f"Status: {ejecucion.status}")
        response = agente.get_run(ejecucion.id, hilo.id)
        print("Waiting for response...")
        time.sleep(20)
    
    #print(response)
    msgs = agente.mostrar_mensajes(hilo.id)
    print(msgs[0].data[0].content[0].text.value)
    
    # Añadimos el mensaje de generación de código.
    msg = agente.add_message(hilo.id, role="user", content=code_msg)
    
    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id, tool_choice='required')
            response = agente.get_run(ejecucion.id, hilo.id)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            print(response)
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  

            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"Stepback 4o. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


In [15]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (speed, angle and location), keep falling under control and then land gently at the end.
    - Use all the elements of the vector of observations, all are relevant to make the right decision.
    - Pay close attention to successful events.
    - Environment and ship conditions may change but your code must be effective for all cases.
    - Find a balance in your landing policy to maximize your score (must exceed 200 points).
    - If you are rotating to the left (negative values), you should fire the left engine and vice versa. But always prioritizing a controlled speed with the main engine.
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Decision coder"


initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Take a deep breath and reason step-by-step.What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing? Do not implement any code."
code_msg = f"After the reasoning with the processes behind the decision making, code very carefully when structuring your code, keep in mind that certain actions can be left with starvation. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool, function calling is required."



In [16]:
logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(initial_msg, code_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-01 20:24:30,902 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-01 20:24:31,107 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-01 20:24:31,353 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/messages "HTTP/1.1 200 OK"
2024-07-01 20:24:31,917 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/runs "HTTP/1.1 200 OK"
2024-07-01 20:24:32,130 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/runs/run_rBlWNoPJc2HIQNThxrjQvaiT "HTTP/1.1 200 OK"
2024-07-01 20:24:32,132 - INFO - Status: queued
2024-07-01 20:24:32,336 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/runs/run_rBlWNoPJc2HIQNThxrjQvaiT "HTTP/1.1 200 OK"


Waiting for response...


In [None]:
agente = AssistantOpenAI(ARCLABKEY_OPENAI)

# Crea un asistente
asistente = agente.create_assistant(model="gpt-3.5-turbo", description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    
# Crea un hilo
hilo = agente.create_thread()

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Before coding take a deep breath and reason step-by-step.What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?"
    
# Añade un mensaje inicial al hilo.
msg = agente.add_message(hilo.id, role="user", content=initial_msg)


# Ejecuta el hilo
ejecucion = agente.run(hilo.id, asistente.id, tool_choice='none')
response = agente.get_run(ejecucion.id, hilo.id)

# Esperamos a que la respuesta esté lista.
while response.status not in ["completed", "failed", "requires_action"]:
    #logger.info(f"Status: {ejecucion.status}")
    print("Waiting for response...")
    response = agente.get_run(ejecucion.id, hilo.id)
    time.sleep(20)
print(response)


agente.mostrar_mensajes(hilo.id)

Waiting for response...
Waiting for response...
Run(id='run_5ip2qIKcf1KIU1L1XCyvPVyK', assistant_id='asst_qqONwHALF6YCJlMyXttFXsnu', cancelled_at=None, completed_at=1719491913, created_at=1719491908, expires_at=None, failed_at=None, incomplete_details=None, instructions="To complete the task you must follow the following steps and indications:\n1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.\n2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:\n    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).\n    - Increased/decreased the slower/faster the spacecraft is moving.\n    - Decreased the more the spacecraft is tilted.\n    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engin

(SyncCursorPage[Message](data=[Message(id='msg_4CYNsLrcWXj8sEnvIm9EyV4G', assistant_id='asst_qqONwHALF6YCJlMyXttFXsnu', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value="Based on the successful landing record and the scoring system provided, we can infer some strategies to achieve a successful landing with a high score:\n\n1. Initially, the spacecraft should start with a gentle descent while maintaining a controlled speed. This helps in stabilizing the falling ship.\n2. As the spacecraft approaches the landing area, it should decrease the altitude gradually and adjust the horizontal position to align with the landing location.\n3. The spacecraft should minimize lateral movement and keep the speed under control to ensure a safe landing.\n4. Utilize the main engine for controlled descent and adjust the tilt angle to reduce lateral velocity.\n5. When close to the landing area, reduce the speed further and ensure the spacecraft is aligned properl

In [19]:
msgs, ids = agente.mostrar_mensajes(hilo.id)
print(msgs)
print(msgs.data[0].content[0].text.value)

assistant: [TextContentBlock(text=Text(annotations=[], value="Based on the successful landing record and the scoring system provided, we can infer some strategies to achieve a successful landing with a high score:\n\n1. Initially, the spacecraft should start with a gentle descent while maintaining a controlled speed. This helps in stabilizing the falling ship.\n2. As the spacecraft approaches the landing area, it should decrease the altitude gradually and adjust the horizontal position to align with the landing location.\n3. The spacecraft should minimize lateral movement and keep the speed under control to ensure a safe landing.\n4. Utilize the main engine for controlled descent and adjust the tilt angle to reduce lateral velocity.\n5. When close to the landing area, reduce the speed further and ensure the spacecraft is aligned properly for a gentle landing.\n6. Avoid unnecessary firing of the side engines as they decrease the overall score. Only use them for small adjustments if nece

TypeError: 'SyncCursorPage[Message]' object is not subscriptable

## Dos agentes.

In [14]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, code_msg, logger, model="gpt-3.5-turbo", num_iterations=5):
    
    # Creamos el asistente.
    #agente, asistente, hilo = inicializar_agente(model, Incial_msg, DESCRIPTION, INSTRUCTIONS, NAME, TOOLS)
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)
    # Crea un asistente supervisor y codificador.
    codificador = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    supervisor = agente.create_assistant(model=model, description=DESCRIPTION_SUPERVISOR, instructions=INSTRUCTIONS_SUPERVISOR, name=NAME_SUPERVISOR)
    # Crea un hilo
    hilo_supervisor = agente.create_thread()
    hilo_codificador = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo_supervisor.id, role="user", content=Incial_msg)
    
    # Primero ejecutamos el razonamiento de STEP BACK.
    #ejecucion, response = run_message_assistant(agent, asistente, hilo, tool_choice='none')
    # Ejecuta el hilo
    ejecucion = agente.run(hilo_supervisor.id, supervisor.id, tool_choice='none')
    response = agente.get_run(ejecucion.id, hilo_supervisor.id)

            # Esperamos a que la respuesta esté lista.
    while response.status not in ["completed", "failed", "requires_action"]:
        logger.info(f"Status: {ejecucion.status}")
        response = agente.get_run(ejecucion.id, hilo_supervisor.id)
        print("Waiting for response...")
        time.sleep(20)
    
    #print(response)
    msgs = agente.mostrar_mensajes(hilo_supervisor.id)
    logger.info(msgs[0].data[0].content[0].text.value)
    
    # Añadimos el mensaje de generación de código.
    msg = code_msg +  "\nSupervisor feedback: " + msgs[0].data[0].content[0].text.value
    print(msg)
    msg = agente.add_message(hilo_codificador.id, role="user", content=msg)
    
    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo_codificador.id, codificador.id, tool_choice='required')
            response = agente.get_run(ejecucion.id, hilo_codificador.id)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            print(response)
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  

            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"Stepback 4o. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo_codificador.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo_codificador.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success."""
                logger.info(msg)    
                agente.add_message(hilo_codificador.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo_codificador.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo_codificador.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo_codificador.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo_codificador.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


In [15]:
DESCRIPTION = "You are an expert agent in spacecraft landing and decision making codificiation. Based on the landing records received and the indications received by a supervising agent, you must generate the code to land the spacecraft in the environment whatever the initial conditions are."
DESCRIPTION_SUPERVISOR = "You are an expert agent in spacecraft landing environments. Your mission is to analyze landing logs and the code generated by the agent specialized in coding, to guide him in improving the performance of his decision making code."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""
INSTRUCTIONS_SUPERVISOR = f"""To complete the task you must follow the following steps and indications:
1. Revisa aterrizajes exitosos y fracasados para analizar el efecto de las acciones en cada una de las observaciones y decidir que decisión tomar en cada momento.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. All the knowledge obtained must be expressed in the best possible way to the coding LLM agent so that it can improve the performance of the generated decision making code.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Decision coder"
NAME_SUPERVISOR = "Spacecraft Landing Supervisor"


supervisor_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Take a deep breath and reason step-by-step.What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing? Do not implement any code."
code_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. After the reasoning with the processes behind the decision making, code very carefully when structuring your code, keep in mind that certain actions can be left with starvation. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool, function calling is required."



In [16]:
logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(supervisor_msg, code_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-01 20:51:35,000 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-01 20:51:35,277 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-01 20:51:35,474 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-01 20:51:35,669 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-01 20:51:35,971 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_lBmbPXP2drYvy7NV15idXtq6/messages "HTTP/1.1 200 OK"
2024-07-01 20:51:36,555 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_lBmbPXP2drYvy7NV15idXtq6/runs "HTTP/1.1 200 OK"
2024-07-01 20:51:36,765 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_lBmbPXP2drYvy7NV15idXtq6/runs/run_6z4NoS6tNsDptxfZvK1rdWVu "HTTP/1.1 200 OK"
2024-07-01 20:51:36,768 - INFO - Status: queued
2024-07-01 20:51:36,980 - INFO - HTTP Request: GET https://api.openai.com/v

Waiting for response...


2024-07-01 20:51:56,985 - INFO - Status: queued
2024-07-01 20:51:57,250 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_lBmbPXP2drYvy7NV15idXtq6/runs/run_6z4NoS6tNsDptxfZvK1rdWVu "HTTP/1.1 200 OK"


Waiting for response...


2024-07-01 20:52:17,481 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_lBmbPXP2drYvy7NV15idXtq6/messages "HTTP/1.1 200 OK"
2024-07-01 20:52:17,731 - INFO - To achieve a successful landing, it is essential to understand and control not only the spacecraft's position and velocity but also its angular dynamics. Here's a detailed breakdown of the key elements in the observations and necessary actions:

### Key Observations and Actions

1. **Position (X, Y)**:
   - **X position**: Horizontal distance from the landing zone center.
   - **Y position**: Height above the landing area.

2. **Velocity (X velocity, Y velocity)**:
   - **X velocity**: Speed in the horizontal direction. It should ideally be close to zero to avoid horizontal skidding at landing.
   - **Y velocity**: Speed in the vertical direction. This also should be slow to ensure a gentle touchdown.

3. **Angle and Angular Velocity**:
   - **Angle**: The tilt of the spacecraft. Perfect vertical alignment is ze

assistant: [TextContentBlock(text=Text(annotations=[], value="To achieve a successful landing, it is essential to understand and control not only the spacecraft's position and velocity but also its angular dynamics. Here's a detailed breakdown of the key elements in the observations and necessary actions:\n\n### Key Observations and Actions\n\n1. **Position (X, Y)**:\n   - **X position**: Horizontal distance from the landing zone center.\n   - **Y position**: Height above the landing area.\n\n2. **Velocity (X velocity, Y velocity)**:\n   - **X velocity**: Speed in the horizontal direction. It should ideally be close to zero to avoid horizontal skidding at landing.\n   - **Y velocity**: Speed in the vertical direction. This also should be slow to ensure a gentle touchdown.\n\n3. **Angle and Angular Velocity**:\n   - **Angle**: The tilt of the spacecraft. Perfect vertical alignment is zero degrees. Any deviation should be corrected to avoid landing on an edge.\n   - **Angular velocity**:

2024-07-01 20:52:18,028 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6JvqFTYrY690DzVJVR1NVZpd/messages "HTTP/1.1 200 OK"
2024-07-01 20:52:18,277 - INFO - Iteration: 1
2024-07-01 20:52:18,652 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6JvqFTYrY690DzVJVR1NVZpd/runs "HTTP/1.1 200 OK"
2024-07-01 20:52:18,858 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_6JvqFTYrY690DzVJVR1NVZpd/runs/run_RJ5MLv0Zmx4H7N8xzxXjdAwK "HTTP/1.1 200 OK"
2024-07-01 20:52:18,862 - INFO - Status: in_progress


Run(id='run_RJ5MLv0Zmx4H7N8xzxXjdAwK', assistant_id='asst_fcjNgTQdjwyDKoWmIjDVbhWO', cancelled_at=None, completed_at=None, created_at=1719859937, expires_at=1719860537, failed_at=None, incomplete_details=None, instructions="To complete the task you must follow the following steps and indications:\n1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.\n2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:\n    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).\n    - Increased/decreased the slower/faster the spacecraft is moving.\n    - Decreased the more the spacecraft is tilted.\n    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.\n    - Receive 100 points for a successful la

AttributeError: 'NoneType' object has no attribute 'submit_tool_outputs'

### Tasks
- Corregir rotación.
- Cantidad de logs.
- Partir de ejemplo exitoso para afinarlo. (y generalizar)
- Incluir take step back.
- Probar con 2 modelos. Uno encargado del razonamiento y otro de codificar. (O uno que retroalimente al otro con las conclusiones obtenidas)