#### Imports

In [1]:
import time
from openai import OpenAI
from Assistant import AssistantOpenAI
import logging
import os
import json
import gymnasium as gym
import random
import matplotlib.pyplot as plt
import os
import time
from git import Repo

import numpy as np
#from collections import deque, namedtuple

# For visualization
import gymnasium.wrappers.record_video as record_video

#### Parameters

In [2]:
num_episodes = 200
ENV_NAME = 'LunarLander-v2'
ARCLABKEY_OPENAI = "sk-proj-DvHDR3hpgbm2r3kCA9jKT3BlbkFJL57ABXkfaWAIYKBxdhM6"
ARCLABKEY_OPENAI = "sk-proj-GvaDXazpibWA2M1I5Pu2T3BlbkFJxDuKlr9AcoVG98ctJZ7Q"

#### Function calling.

Función encargada de almacenar el código generado mediante la opción de function calling del asistente de OpenAI.

In [3]:
def store_code_in_file(reasoning, code, filename,):
    """ Store code in a file
    
    Args:
        reasoning: str: Pre-coding reasoning on log analysis and the influence of actions on observations and steps to follow to achieve a successful landing.
        code: str: code to store. Use the reasoning parameter to generate this code.
        filename: str: filename to store code in
    
    Returns:
        None
    """
    print(f"Model reasoning: {reasoning}")
    with open(filename, 'w') as f:
        f.write(code)
        
store_code_in_file_schema = {
    "name": "store_code_in_file",
    "description": "Store code in a file",
    "parameters": {
        "type": "object",
        "properties": {
            "reasoning": {
                "type": "string",
                "description": "Pre-coding deep reasoning on log analysis and the influence of actions on observations and steps to follow to achieve a successful landing. It is an expnasion but not the code itself."
            },
            "code": {
                "type": "string",
                "description": "The Python code to store. Use the reasoning parameter to generate this code."
            },
            "filename": {
                "type": "string",
                "description": "The filename to store the code in."
            }
        },
        "required": ["code", "filename"]
    }
}

TOOLS = [{"type": "function", "function": store_code_in_file_schema}]

### Entorno Lunnar Lander

In [4]:
env = gym.make('LunarLander-v2')
print('State shape: ', env.observation_space.shape)
print('Number of actions: ', env.action_space.n)

State shape:  (8,)
Number of actions:  4


In [5]:
agent_initial_code = """
import random

def act(observation):
    '''
    The function that codifies the action to be taken in each instant of time.

    Args:
        observation (numpy.array):
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": "X position",
                "1": "Y position",
                "2": "X velocity",
                "3": "Y velocity",
                "4": "Angle",
                "5": "Angular velocity",
                "6": "Left contact sensor",
                "7": "Right contact sensor"
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]

    Returns:
        Integer  : The action to be taken.
        "options": {
                '0' : "Switch off engines",
                '1' : "Push left engine",
                '2' : "Push both engines (upwards)",
                '3' : "Push right engine"
            }
    '''
    return random.randint(0, 3)
"""

#### Mapeador de logs a JSON

In [6]:
log_description = {
    "description": "Log data for each step of the spacecraft landing environment.",
    "landing attempt": {
        "type": "integer",
        "description": "The episode number."
    },
    "logs": {
        "time": {
            "type": "integer",
            "description": "The instant within the landing attempt where the current log is taken."
        },
        "action": {
            "type": "integer",
            "description": "The possible actions taken.",
            "options": {
                '0' : "Switch off engines",
                '1' : "Push left engine",
                '2' : "Push both engines (upwards)",
                '3' : "Push right engine"
            }
        },  
        "current status": {
            "type": "array",
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": {
                    "name": "X position",
                    "description": "The horizontal position of the spacecraft relative to the landing zone."
                },
                "1": {
                    "name": "Y position",
                    "description": "The vertical position of the spacecraft relative to the landing zone."
                },
                "2": {
                    "name": "X velocity",
                    "description": "The horizontal velocity of the spacecraft."
                },
                "3": {
                    "name": "Y velocity",
                    "description": "The vertical velocity of the spacecraft."
                },
                "4": {
                    "name": "Angle",
                    "description": "The angle of the spacecraft relative to the vertical (left negative, right positive)."
                },
                "5": {
                    "name": "Angular velocity",
                    "description": "The rate of change of the angle of the spacecraft."
                },
                "6": {
                    "name": "Left contact sensor with landing zone",
                    "description": "Indicates whether the left side of the spacecraft is in contact with the landing zone."
                },
                "7": {
                    "name": "Right contact sensor with landing zone",
                    "description": "Indicates whether the right side of the spacecraft is in contact with the landing zone."
                }
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1],
            
        }, 
        "score": {
            "type": "number",
            "description": "The score received for the action."
        },  
        "completed": {
            "type": "boolean",
            "description": "Whether the landing event has ended (landing or accident)."
        }
    },
    "total score": {
        "type": "number",
        "description": "The total score received for the landing attempt."
    }
}

### UTILS

In [7]:
import json

def format_step_data(instant, action, next_state, reward, terminated):
    """ Format the step data into a JSON string
    
    Args:
        instant: int: the instant within the landing attempt where the current log is taken
        action: int: the action taken
        next_state: np.array: the next state of the environment
        reward: float: the reward received
        terminated: bool: whether the landing event has ended
        
    Returns:
        str: the step data formatted as a JSON string
    """
    # Convertir el array numpy a una lista
    next_state_list = next_state.tolist()

    # Redondear los elementos de la lista a 4 decimales
    next_state_list_rounded = [round(x, 3) for x in next_state_list]

    step_data = {
        'time': instant,
        'action' : int(action),
        'current status': next_state_list_rounded,  # Convert numpy array to list
        'score': round(reward, 3),
        #'completed': terminated,
        #'truncated': truncated
        #'info': info
    }

    # Convert the dictionary to a JSON string
    #step_data_json = json.dumps(step_data)

    return step_data


def format_episode_logs(logs, episode, total_score):
    """ Format the logs into a JSON string
    
    Args:
        logs: list: the logs for each step of the environment
        episode: int: the episode number
        
    Returns:
        str: the logs formatted as a JSON string
    """
    logs_data = {
        'landing attempt': episode,
        'logs': logs,
        'total score': round(total_score, 3)
    }

    # Convert the dictionary to a JSON string
    logs_json = json.dumps(logs_data)

    return logs_json

In [8]:
def commit_changes(repo_path, commit_message):
    """ Commit changes to the repository.

    Args:
        repo_path (str): Path to the repository.
        commit_message (str): The commit message.
    """
    repo = Repo(repo_path)
    repo.git.add(update=True)
    repo.index.commit(commit_message)
    origin = repo.remote(name='origin')
    origin.push()

#### Código encargado de ejecutar los eventos en el entorno.

In [9]:
import Action

def lunar_lander(max_t=1000, n_games=1, display=False, seed=38, agent=None, recoder=False, video_filename='video/video.mp4'):
    """ Run the Lunar Lander environment
    
    Args:
        max_t: int: the maximum number of timesteps
        n_games: int: the number of games to play
        display: bool: whether to display the environment
        seed: int: the seed for the environment
        agent: object: the agent to use
        recoder: bool: whether to record the video
        video_filename: str: the filename for the video
        
    Returns:
        str: the logs for the landing attempts
    """
    
    # Incluimos la opción del entorno gráfico y la de grabación.
    if display:
        env = gym.make(ENV_NAME, render_mode='human')
        if recoder:
            env = gym.make(ENV_NAME, render_mode='rgb_array')
            env.reset() 
            video_recorder = record_video.RecordVideo(env, video_filename)
    else:
        env = gym.make(ENV_NAME)

    # Bucle principal de ejecución de los episodios.
    logs = []
    for episode in range(1, n_games+1):
        # Revisamos que la semilla sea un número entero o una lista de semillas.
        if isinstance(seed, list):
            semilla = seed[episode-1]
        else:
            semilla = seed
        print(f"Semilla {semilla}")    
        state = env.reset(seed=semilla) # Set a seed for the environment
        state = state[0] # Eliminamos el diccionario vacio y dejamos unicamente el estado de 8 elementos.
        score = 0
        instant = 0
        episode_actions = []
        if recoder: # En caso de que se quiera grabar el video, se inicia el grabador.
            video_recorder.start_video_recorder()
            
        for i in range(max_t): # Bucle de ejecución de los instantes de tiempo.
            
            # Seleccionamos el método de elegir la acción (agente exitoso o método de decisión del asistente)
            if agent: 
                action = agent.act(state)
            else:
                action = Action.act(state)
            
            # Avanzamos un instante de tiempo en el entorno en función de si se quiere grabar el video o no.
            if recoder:
                next_state, reward, terminated, truncated, info = video_recorder.step(action)
            else:
                next_state, reward, terminated, truncated, info = env.step(action)
            
            # Formateamos los datos del instante de tiempo y los alcenamos.
            json_logs = format_step_data(instant, action, next_state, reward, terminated)
            score += reward
            instant += 1
            if len(episode_actions) < 20 or len(episode_actions) % 2 == 0 or terminated:
                logs.append(json_logs)
            #logs.append(json_logs)
            episode_actions.append(action)
            state = next_state
            if terminated: # Condición de salida del bucle, si el episodio ha terminado.
                break
        json_episode_logs = format_episode_logs(logs, episode, score) 
        
        print(f"Número de instantes: {instant+1}. Tamaño de logs: {len(logs)}")
        print('episode ', episode, 'score %.3f' % float(score), 'avg score %.3f' % (float(score) / instant))
        
    if recoder:
        video_recorder.close()
    else:
        env.close()
        
    return json_episode_logs

#### Logs del código inicial

In [10]:
initial_logs = lunar_lander(n_games=1, display=True)
initial_logs

Semilla 38
Número de instantes: 98. Tamaño de logs: 59
episode  1 score -120.058 avg score -1.238


'{"landing attempt": 1, "logs": [{"time": 0, "action": 3, "current status": [0.008, 1.417, 0.39, 0.131, -0.01, -0.123, 0.0, 0.0], "score": -0.871}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.382, 0.104, -0.015, -0.09, 0.0, 0.0], "score": 0.85}, {"time": 2, "action": 2, "current status": [0.015, 1.423, 0.387, 0.125, -0.019, -0.086, 0.0, 0.0], "score": -2.125}, {"time": 3, "action": 2, "current status": [0.019, 1.426, 0.378, 0.151, -0.024, -0.096, 0.0, 0.0], "score": -1.163}, {"time": 4, "action": 3, "current status": [0.023, 1.429, 0.388, 0.124, -0.031, -0.138, 0.0, 0.0], "score": -1.05}, {"time": 5, "action": 0, "current status": [0.027, 1.431, 0.388, 0.097, -0.038, -0.137, 0.0, 0.0], "score": -0.186}, {"time": 6, "action": 2, "current status": [0.03, 1.433, 0.383, 0.11, -0.045, -0.145, 0.0, 0.0], "score": -1.1}, {"time": 7, "action": 0, "current status": [0.034, 1.435, 0.383, 0.083, -0.052, -0.145, 0.0, 0.0], "score": -0.271}, {"time": 8, "action": 0, "current status":

#### Logs exitosos
Utilizar otras semillas para que no memorice el entorno

In [11]:
import DuelingDQN

agent = DuelingDQN.Agent(num_observaciones=8, num_acciones=4, red_modelo=DuelingDQN.DuelingQNetwork, seed=0)
# Cargamos los pesos del agente entrenado.
agent.load_weights('checkpoint_Dueling.pth')

seeds = [130, 412]

success_logs = lunar_lander(n_games=len(seeds), display=True, seed=seeds, agent=agent)
success_logs

Semilla 130
Número de instantes: 242. Tamaño de logs: 131
episode  1 score 263.217 avg score 1.092
Semilla 412
Número de instantes: 242. Tamaño de logs: 262
episode  2 score 266.713 avg score 1.107


'{"landing attempt": 2, "logs": [{"time": 0, "action": 1, "current status": [0.006, 1.393, 0.31, -0.402, -0.005, -0.029, 0.0, 0.0], "score": -0.567}, {"time": 1, "action": 1, "current status": [0.009, 1.384, 0.301, -0.429, -0.005, 0.005, 0.0, 0.0], "score": -0.672}, {"time": 2, "action": 1, "current status": [0.012, 1.373, 0.291, -0.456, -0.003, 0.045, 0.0, 0.0], "score": -0.468}, {"time": 3, "action": 3, "current status": [0.015, 1.363, 0.303, -0.483, -0.003, -0.001, 0.0, 0.0], "score": -1.813}, {"time": 4, "action": 1, "current status": [0.018, 1.351, 0.291, -0.509, -0.0, 0.045, 0.0, 0.0], "score": -0.367}, {"time": 5, "action": 3, "current status": [0.021, 1.339, 0.299, -0.536, 0.0, 0.015, 0.0, 0.0], "score": -1.553}, {"time": 6, "action": 3, "current status": [0.024, 1.326, 0.308, -0.563, -0.001, -0.022, 0.0, 0.0], "score": -1.579}, {"time": 7, "action": 1, "current status": [0.027, 1.313, 0.3, -0.59, -0.0, 0.009, 0.0, 0.0], "score": -0.688}, {"time": 8, "action": 2, "current statu

#### Bucle iterativo

Sección principal del código encargada de conectar con el asistente de la API de OpenAI e iterar en la generación de código nuevo a partir de los registros del generado previamente.

In [12]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    
    # Crea un hilo
    hilo = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)

    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id, tool_choice='required')
            response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
            while response.status not in ["completed", "failed", "requires_action"]:
                logger.info(f"Status: {ejecucion.status}")
                response = agente.get_run(ejecucion.id, hilo.id)
                time.sleep(20)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  
            reasoning = code_dict["reasoning"]

            logger.info(f"\nRazonamiento:\n{reasoning}")
            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(reasoning, code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"4o Setpback. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success. Remember that the code is a reflection of your reasoning and the way you approach the problem."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


#### Logger

In [13]:
def configura_log(nombre_archivo):
    """ 
    Configura el logger para que guarde los logs en un archivo y los muestre en la consola.
    
    Args:
        nombre_archivo: str Nombre del archivo donde se guardarán los logs.
        
    Returns:
        logger: logging.Logger Objeto logger configurado.
    """
    # Crear la carpeta logs si no existe
    if not os.path.exists('logs'):
        os.makedirs('logs')
    # Configura el logger
    logging.basicConfig(filename=f'logs/{nombre_archivo}', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger()
    
    # Añade un StreamHandler para mostrar los logs en la consola
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)
    
    return logger

### Ejecución con el asistente

Prompt inicial y mensajes del sistema para el asistente.

In [None]:
DESCRIPTION = "You are an expert programer in Pyhton. Your specialty is to generate the code responsible for making decisions about actions to be taken in various spacecraft landing environments.The objective is to land the spacecraft within a target zone in the shortest possible time and very gently. A scoring system is used to evaluate the landings, which must be maximized."
INSTRUCTIONS = f"""Your task is:
You have two modes of action depending on whether the use of function call is required. Reason deeply about what you are asked to do or code the requested task.
1. Analyze and reason about the logs received in the last landing attempts.
2. Your goal is to be able to make the correct decision based on what you have learned from the results of previous iterations. You must code the decision making based on your reasoning in a Python function.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (angle and location), keep falling under control and then land gently at the end.
    - It is mandatory to use all the elements of the array of observations received by parameter in your code when deciding what action to take at any given moment. Both position and velocities must be taken into account to know how the ship is doing and towards which states it is heading. All of these must be considered to achieve stability.
    - Learn how actions taken affect the future states of the spacecraft in the logs of past events so that you can take this into account when developing code to reach the landing zone.
    - The landing zone is in the central area of the x-axis.
    - Carefully choose the actions involving the lateral engines according to the rotation of the spacecraft.
4. This is the schema of the landing logs: {log_description}.
5. You should analyze the performance that appear in the logs of the code you have generated. You should improve the code generated in the 'act' function in the last iteration without fear of making major changes, seeking to maximize the score received and generate a higher quality code.
6. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. You have to be able to learn from it to land successfully with any other conditions. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}.Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_betterprompt.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-02 00:37:43,931 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 00:37:43,931 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 00:37:44,122 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 00:37:44,122 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 00:37:44,385 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_QoB6MCbRbYhGkgNXtosUNZOa/messages "HTTP/1.1 200 OK"
2024-07-02 00:37:44,385 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_QoB6MCbRbYhGkgNXtosUNZOa/messages "HTTP/1.1 200 OK"
2024-07-02 00:37:44,532 - INFO - Iteration: 1
2024-07-02 00:37:44,532 - INFO - Iteration: 1
2024-07-02 00:37:44,929 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_QoB6MCbRbYhGkgNXtosUNZOa/runs "HTTP/1.1 200 OK"
2024-07-02 00:37:44,929 - INFO - HTTP Request: POST htt

Tool call: [RequiredActionFunctionToolCall(id='call_aFYAf5toEx9JxbWztDqncIya', function=Function(arguments='{"code":"import numpy as np\\n\\ndef act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



2024-07-02 00:38:30,678 - ERROR - Error: Missing required arguments; Expected either ('thread_id' and 'tool_outputs') or ('thread_id', 'stream' and 'tool_outputs') arguments to be given
Traceback (most recent call last):
  File "C:\Users\adria\AppData\Local\Temp\ipykernel_19840\4102630422.py", line 69, in create_and_run_llm_loop
    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])
  File "c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\Assistant.py", line 171, in devolver_respuesta
    response = self.client.beta.threads.runs.submit_tool_outputs(run_id, thread_id=thread_id, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\adria\anaconda3\Lib\site-packages\openai\_utils\_utils.py", line 276, in wrapper
    raise TypeError(msg)
TypeError: Missing required arguments; Expected either ('thread_id' and 'tool_outputs') or ('thread_id', '

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 47
episode  1 score -612.950 avg score -8.513


TypeError: Missing required arguments; Expected either ('thread_id' and 'tool_outputs') or ('thread_id', 'stream' and 'tool_outputs') arguments to be given

#### Prueba de cambio grande en el prompt.

In [14]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (speed, angle and location), keep falling under control and then land gently at the end.
    - Use all the elements of the vector of observations, all are relevant to make the right decision.
    - Pay close attention to successful events.
    - Environment and ship conditions may change but your code must be effective for all cases.
    - Find a balance in your landing policy to maximize your score (must exceed 200 points).
    - If you are rotating to the left (negative values), you should fire the left engine and vice versa. But always prioritizing a controlled speed with the main engine.
4. This is the schema of the landing logs: {log_description}.
5. You should analyze the performance that appear in the logs of the code you have generated. You should improve the code generated in the 'act' function in the last iteration without fear of making major changes, seeking to maximize the score received and generate a higher quality code.
6. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?. Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-02 01:28:50,965 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 01:28:51,143 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 01:28:51,438 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/messages "HTTP/1.1 200 OK"
2024-07-02 01:28:51,571 - INFO - Iteration: 1
2024-07-02 01:28:51,986 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs "HTTP/1.1 200 OK"
2024-07-02 01:28:52,391 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7 "HTTP/1.1 200 OK"
2024-07-02 01:28:52,394 - INFO - Status: queued
2024-07-02 01:28:52,595 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7 "HTTP/1.1 200 OK"
2024-07-02 01:29:12,599 - INFO - Status: queued
2024-07-02 01:29:12,854

Tool call: [RequiredActionFunctionToolCall(id='call_09KGpk8yWlzAUJfv4sZ0RvbY', function=Function(arguments='{"reasoning": "### Analysis and Reasoning\\n\\n#### Observations and Actions:\\n1. **X and Y Position (`obs[0]`, `obs[1]`)**: These are the horizontal and vertical positions of the spacecraft. The goal is to bring the X position to 0 (centered on the landing pad) and the Y position to 0 (landed on the pad).\\n2. **X and Y Velocity (`obs[2]`, `obs[3]`)**: These are the velocities in the X and Y directions. We aim to reduce these to near zero for a soft landing.\\n3. **Angle (`obs[4]`)**: This is the tilt angle of the spacecraft. We aim to keep this at 0 (vertical) to avoid tipping over.\\n4. **Angular Velocity (`obs[5]`)**: This is the rate at which the angle is changing. This should also be minimized to ensure the spaceship is stable.\\n5. **Left and Right Contact Sensors (`obs[6]`, `obs[7]`)**: These indicate if the left or right side of the spacecraft is in contact with the gro

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 145. Tamaño de logs: 83
episode  1 score -23.203 avg score -0.161


2024-07-02 01:29:37,623 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:29:37,625 - INFO - Compilación exitosa.
2024-07-02 01:29:37,626 - INFO - Status: requires_action
2024-07-02 01:29:37,836 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7 "HTTP/1.1 200 OK"
2024-07-02 01:29:57,839 - INFO - Status: in_progress
2024-07-02 01:29:58,153 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_Pt7gqDwYum9j3Ux59bApeiz7 "HTTP/1.1 200 OK"
2024-07-02 01:30:18,157 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_UinaJHyHMgLlfrraivUpESvO', function=Function(arguments='{"reasoning":"### Analysis of Performance\\n\\nBased on the latest log, we can observe the following issues and necessary adjustments:\\n\\n1. **Inaction During Flight**: For a significant portion of the logs, the spacecraft did not perform any action (action 0) despite requiring stabilization and velocity adjustments. This led to a continuous accumulation of negative scores.\\n2. **Angular Stabilization**: The initial steps correctly assessed no significant angular issues, but corrective actions continued to be minimal or late.\\n3. **Horizontal and Vertical Control**: Actions to manage horizontal and vertical velocities need to be more responsive and accountable for both current velocity and position.\\n4. **Boosters Activation**: The code needs a more effective control routine for when to use which engines to maximize stabilization and minimize unwanted tilts or velocities.\\n

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 78. Tamaño de logs: 49
episode  1 score -116.178 avg score -1.509


2024-07-02 01:31:03,144 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_k50bHdiqmVqIjvXUI2M1HDLC/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:31:03,149 - INFO - Compilación exitosa.
2024-07-02 01:31:03,149 - INFO - Status: requires_action
2024-07-02 01:31:03,354 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_k50bHdiqmVqIjvXUI2M1HDLC "HTTP/1.1 200 OK"
2024-07-02 01:31:23,358 - INFO - Status: in_progress
2024-07-02 01:31:23,609 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_k50bHdiqmVqIjvXUI2M1HDLC "HTTP/1.1 200 OK"
2024-07-02 01:31:43,611 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_Xt1NDiRvOUIFCBDveYWYRgeC', function=Function(arguments='{"reasoning":"### Analysis of Performance and Adjustments Needed\\n\\n1. **Inaction During Flight**: The spacecraft remains inactive (action 0) despite needing corrections in its state. This improper action leads to the build-up of negative scores as no stabilization or velocity adjustments are being made.\\n2. **Angular Stabilization and Velocity Correction**: Late or ineffective corrections when the spacecraft starts tilting or gaining excessive velocities (both angular and linear).\\n3. **Too Many Corrections of Single Type**: The spacecraft consistently uses action 3 without achieving the required adjustments.\\n4. **Combining Stabilization and Velocity Control**: Need to manage angular corrections in combination with linear velocity adjustments effectively.\\n\\n### Revised Strategy\\n\\n1. **Continuous Adjustment**: Ensure that adjustments are made continuously rather than 

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 86. Tamaño de logs: 53
episode  1 score -116.692 avg score -1.373


2024-07-02 01:32:29,064 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_WXkvgqehu6HQvfvKUV4zAF8Z/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:32:29,065 - INFO - Compilación exitosa.
2024-07-02 01:32:29,066 - INFO - Status: requires_action
2024-07-02 01:32:29,273 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_WXkvgqehu6HQvfvKUV4zAF8Z "HTTP/1.1 200 OK"
2024-07-02 01:32:49,276 - INFO - Status: in_progress
2024-07-02 01:32:49,583 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_WXkvgqehu6HQvfvKUV4zAF8Z "HTTP/1.1 200 OK"
2024-07-02 01:33:09,588 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_9SDGGnjEawlwGNfEa36MYxk1', function=Function(arguments='{"reasoning":"### Analysis of Recent Performance\\n\\nThe recent logs show that the spacecraft again failed to land successfully. The code didn\'t manage to correct the trajectory effectively, leading to unnecessary extended periods of inaction and incorrect sequential thrust applications. Notably, the code failed to properly balance horizontal velocity, vertical descent rate, and spacecraft stabilization.\\n\\n### Problems Identified:\\n1. **Extended Inaction**: No action was taken when corrections were evidently needed, leading to continuous increases in vertical and horizontal velocities.\\n2. **Sequential Thrusts**: Excessive repeated side thrusts were applied without balancing or correcting the overall motion effectively.\\n3. **Incorrect Prioritization**: Angular stabilization seemed to overshadow the need to manage descent rate and horizontal drift effectively.\\n\\n### Mo

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 86. Tamaño de logs: 53
episode  1 score -116.692 avg score -1.373


2024-07-02 01:33:54,509 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_e1PASc5cGFVYMq46o9beOyxA/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:33:54,514 - INFO - Compilación exitosa.
2024-07-02 01:33:54,517 - INFO - Status: requires_action
2024-07-02 01:33:54,740 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_e1PASc5cGFVYMq46o9beOyxA "HTTP/1.1 200 OK"
2024-07-02 01:34:14,742 - INFO - Status: in_progress
2024-07-02 01:34:15,012 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_e1PASc5cGFVYMq46o9beOyxA "HTTP/1.1 200 OK"
2024-07-02 01:34:35,016 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

Tool call: [RequiredActionFunctionToolCall(id='call_T5Nlba0CpBh7SAAPOx0rHZCR', function=Function(arguments='{"reasoning":"### Reanalysis and Code Revision\\n\\nUpon carefully analyzing the recent logs and performance, the issue still lies in timely managing the spacecraft\'s trajectory and velocity corrections. The sequence of actions observed has shown prolonged execution of a single action type, leading to insufficient response to accumulated velocity, both horizontal and vertical. The spacecraft’s descent was not controlled properly, leading it to crash.\\n\\n### Identified issues\\n1. **Prolonged Inaction**: Multiple consecutive steps executed no action, making the spaceship quickly deviate further from the desired descent path.\\n2. **Insufficient Handling of Descent Speed**: Although vertical stabilization commands were triggered, they were neither appropriately timed nor frequent enough.\\n3. **Over-corrective Angular Adjustments**: Excessive thrust applications in one direction

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 86. Tamaño de logs: 53
episode  1 score -116.692 avg score -1.373


2024-07-02 01:35:19,698 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_nOjPMiKC59rz6dTUyDqrDpkW/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 01:35:19,701 - INFO - Compilación exitosa.
2024-07-02 01:35:19,702 - INFO - Status: requires_action
2024-07-02 01:35:19,905 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_nOjPMiKC59rz6dTUyDqrDpkW "HTTP/1.1 200 OK"
2024-07-02 01:35:39,910 - INFO - Status: in_progress
2024-07-02 01:35:40,156 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v/runs/run_nOjPMiKC59rz6dTUyDqrDpkW "HTTP/1.1 200 OK"
2024-07-02 01:36:00,161 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0

user: [TextContentBlock(text=Text(annotations=[], value='These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 0, "current status": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], "score": 0.232}, {"time": 1, "action": 0, "current status": [0.011, 1.42, 0.38, 0.104, -0.013, -0.085, 0.0, 0.0], "score": 0.121}, {"time": 2, "action": 0, "current status": [0.015, 1.421, 0.38, 0.077, -0.017, -0.085, 0.0, 0.0], "score": 0.013}, {"time": 3, "action": 0, "current status": [0.019, 1.423, 0.38, 0.051, -0.021, -0.085, 0.0, 0.0], "score": -0.104}, {"time": 4, "action": 0, "current status": [0.023, 1.423, 0.38, 0.024, -0.026, -0.085, 0.0, 0.0], "score": -0.226}, {"time": 5, "action": 0, "current status": [0.026, 1.423, 0.38, -0.003, -0.03, -0.085, 0.0, 0.0], "score": -0.353}, {"time": 6, "action": 0, "current status": [0.03, 1.422, 0.38, -0.029, -0.034, -0.085, 0.0, 0.0], "score": -0.481}, {"time": 7, "action": 0, "current status": [0.034, 1.42

2024-07-02 01:36:01,593 - INFO - HTTP Request: DELETE https://api.openai.com/v1/assistants/asst_oz37cI4Jj4423yR963RqZPpk "HTTP/1.1 200 OK"
2024-07-02 01:36:01,926 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_L7ucr9hmSMXelhzWoyVcJn7v "HTTP/1.1 200 OK"
2024-07-02 01:36:01,927 - INFO - 
Ejecución finalizada.




Asistente vaciado.
Run(id='run_nOjPMiKC59rz6dTUyDqrDpkW', assistant_id='asst_oz37cI4Jj4423yR963RqZPpk', cancelled_at=None, completed_at=1719876926, created_at=1719876875, expires_at=None, failed_at=None, incomplete_details=None, instructions="To complete the task you must follow the following steps and indications:\n1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.\n2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:\n    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).\n    - Increased/decreased the slower/faster the spacecraft is moving.\n    - Decreased the more the spacecraft is tilted.\n    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.\n    - Receive 100 points 

In [14]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?. Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-02 14:05:19,684 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 14:05:19,895 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 14:05:20,208 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/messages "HTTP/1.1 200 OK"
2024-07-02 14:05:20,348 - INFO - Iteration: 1
2024-07-02 14:05:20,737 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs "HTTP/1.1 200 OK"
2024-07-02 14:05:21,016 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_oaZzUTE37ZSXobAQ80BIjqKQ "HTTP/1.1 200 OK"
2024-07-02 14:05:21,019 - INFO - Status: queued
2024-07-02 14:05:21,241 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_oaZzUTE37ZSXobAQ80BIjqKQ "HTTP/1.1 200 OK"
2024-07-02 14:05:41,243 - INFO - Status: queued
2024-07-02 14:05:41,514

Tool call: [RequiredActionFunctionToolCall(id='call_H1rYNYx5YwzRDtyqO8JsupnB', function=Function(arguments='{"reasoning": "### Reasoning and Analysis for Successful Landing ###\\n\\n#### Step-by-Step Analysis ####\\n\\n1. **Position and Velocity**\\n   - **X Position and Velocity:** Values \\n     - Goal: X Position should close to 0.0, X Velocity should ideally be 0.0 at landing.\\n   - **Y Position and Velocity:** Values\\n     - Goal: Y Position should reach 0.0 (landing surface), Y Velocity should be low negative value close to 0.0 (slow descent).\\n\\n2. **Angle and Angular Velocity**\\n   - **Angle:** Values\\n     - Goal: Angle should be as close to 0.0 as possible (vertical).\\n   - **Angular Velocity:** Values\\n     - Goal: Angular velocity should be close to 0.0 (stable).\\n\\n3. **Contact Sensors**\\n   - **Left and Right Contact Sensors:** Values\\n     - Goal: Both should switch from 0 to 1 upon touching down.\\n\\n4. **Scores and Penalties**\\n   - Scores are affected by

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 90. Tamaño de logs: 55
episode  1 score -242.000 avg score -2.719


2024-07-02 14:06:06,546 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_oaZzUTE37ZSXobAQ80BIjqKQ/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 14:06:06,548 - INFO - Compilación exitosa.
2024-07-02 14:06:06,549 - INFO - Status: requires_action
2024-07-02 14:06:06,752 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_oaZzUTE37ZSXobAQ80BIjqKQ "HTTP/1.1 200 OK"
2024-07-02 14:06:26,756 - INFO - Status: in_progress
2024-07-02 14:06:27,008 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_oaZzUTE37ZSXobAQ80BIjqKQ "HTTP/1.1 200 OK"
2024-07-02 14:06:47,013 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_bZ1jKiXJun7sxzFrO2WblvmF', function=Function(arguments='{"reasoning":"### Performance Analysis and Identification of Errors ###\\n\\n#### Observations from Logs ####\\n1. **Initial Steps**:\\n   - **Action 1, Switching Left Engine**: Generated increasing oscillations in angular velocity and positive drift in X Velocity.\\n\\n2. **Mid-Flight Corrections**:\\n   - **Continuous Action 1**: Lead to increasing velocity and unstable pitch control.\\n\\n3. **Descent Phase**:\\n   - **Frequent Use of Left Engine (Action 1)**: Did not control vertical descent efficiently; angular velocity became increasingly unstable.\\n\\n4. **Crash Landing**:\\n   - **Action 3 Usage at End**: Aggressively corrected angles without stabilizing descent or maintaining low velocities.\\n   - Resulted in high angular velocities and significant pull sideways leading to a crash.\\n\\n#### Analysis-based Modifications ####\\n1. **Initial Stabilization**:\\n   - Optim

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 90. Tamaño de logs: 55
episode  1 score -242.000 avg score -2.719


2024-07-02 14:07:31,931 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_2l2ayRwljpFxyEOYJXV942Y1/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 14:07:31,934 - INFO - Compilación exitosa.
2024-07-02 14:07:31,934 - INFO - Status: requires_action
2024-07-02 14:07:32,181 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_2l2ayRwljpFxyEOYJXV942Y1 "HTTP/1.1 200 OK"
2024-07-02 14:07:52,183 - INFO - Status: in_progress
2024-07-02 14:07:52,485 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_2l2ayRwljpFxyEOYJXV942Y1 "HTTP/1.1 200 OK"
2024-07-02 14:08:12,487 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_YipcCrC5e95MLAQAOfftTBOd', function=Function(arguments='{"reasoning":"### Performance Analysis and Correction ###\\n\\n#### Observations from Logs ####\\n1. **Initial Actions**\\n   - The spacecraft stabilizes its X velocity initially with left engine, leading to slight oscillations.\\n   - Without vertical stabilization, there’s no solid control over descent speed.\\n\\n2. **Middle Actions**\\n   - Constant Action 1 application leads to rising angular velocity.\\n   - Y velocity control delayed, leading to rapid descent and score penalties.\\n\\n3. **Final Phase**\\n   - Overuse of Right Engine in later stages; resulted in uncontrollable oscillations.\\n   - Diverge into rapid negative scoring with frequent angular misalignments.\\n\\n#### Plan for Modification ####\\n1. **Vertical Descent Prioritization**\\n   - Implement early and stronger center engine usage to provide consistent upward thrust.\\n\\n2. **Proportional Adjustment of

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 90. Tamaño de logs: 55
episode  1 score -242.000 avg score -2.719


2024-07-02 14:08:58,148 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_MmeTtdwQ7i55nRvBTFV5YfHB/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 14:08:58,150 - INFO - Compilación exitosa.
2024-07-02 14:08:58,151 - INFO - Status: requires_action
2024-07-02 14:08:58,383 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_MmeTtdwQ7i55nRvBTFV5YfHB "HTTP/1.1 200 OK"
2024-07-02 14:09:18,386 - INFO - Status: in_progress
2024-07-02 14:09:18,648 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_MmeTtdwQ7i55nRvBTFV5YfHB "HTTP/1.1 200 OK"
2024-07-02 14:09:38,651 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

Tool call: [RequiredActionFunctionToolCall(id='call_cuteTPNAWtS8VzG5F1J0bY6P', function=Function(arguments='{"reasoning":"### Data-Driven Adjustments for Improved Landing Function ###\\n\\n#### Observations from Logs ####\\n1. **Initial Stabilization Phase**:\\n   - Continuous Action 1 led to stable X velocity but negative Y control.\\n\\n2. **Descent to Mid-Flight Corrections**:\\n   - Delayed Y velocity correction with late stage Action 2.\\n\\n3. **Inconsistent Final Phase**:\\n   - Frequent Action 3 use excessively addressed angular adjustments, rapidly oscillating.\\n\\n#### Modification Plan ####\\n1. **Balanced Starting Sequence**:\\n   - Stronger early Action 2 usage to manage both X and Y velocities.\\n   - Interleaves proportional balancing engine applications for angular controls.\\n\\n2. **Dynamic Descent Management**:\\n   - Ensuring responsiveness towards X velocity shifts with minimal corrections.\\n   - Continual Action 2 use maintaining downward path decreasing.\\n\\n3

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 108. Tamaño de logs: 64
episode  1 score -643.809 avg score -6.017


2024-07-02 14:10:24,318 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_pZjb4dkS24ScmS4GufoY5G0g/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 14:10:24,319 - INFO - Compilación exitosa.
2024-07-02 14:10:24,320 - INFO - Status: requires_action
2024-07-02 14:10:24,560 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_pZjb4dkS24ScmS4GufoY5G0g "HTTP/1.1 200 OK"
2024-07-02 14:10:44,563 - INFO - Status: in_progress
2024-07-02 14:10:45,514 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_pZjb4dkS24ScmS4GufoY5G0g "HTTP/1.1 200 OK"
2024-07-02 14:11:05,516 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 2, "current status": [0.011, 1.422, 0.389, 0.179, -0.013,

Tool call: [RequiredActionFunctionToolCall(id='call_oQQEBG1gp5mq7WmaBWA3wiUZ', function=Function(arguments='{"reasoning":"### Performance Review and Correction ###\\n\\n#### Problematic Trend in Logs ####\\n1. **Excessive Usage of Action 2**:\\n   - Overuse led to significant scores deduction without proper Y velocity or angle corrections.\\n\\n2. **Missed Angular Adjustment**:\\n   - Inconsistencies in angle corrections, resulting in significant angular drifts.\\n\\n3. **Vertical Velocity Inefficiency**:\\n   - Prolonged upward thrust increased velocity and disregarded safe descent.\\n\\n4. **Late-phase Rapid Descent and Scoring Deductions**:\\n   - The scoring system penalized rise in velocities leading to poor safe landing trajectory.\\n\\n### Refined Strategy ####\\n1. **Balanced Initial Approach**:\\n   - Use of engines for early-stage slow descent ensuring consistent vertical velocity management.\\n\\n2. **Proportional Angular Adjustments**:\\n   - Efficient angle stabilization w

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 111. Tamaño de logs: 66
episode  1 score -200.300 avg score -1.821


2024-07-02 14:11:51,267 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_Q5la41ytAf7hN7t201W1KAG4/submit_tool_outputs "HTTP/1.1 200 OK"
2024-07-02 14:11:51,269 - INFO - Compilación exitosa.
2024-07-02 14:11:51,270 - INFO - Status: requires_action
2024-07-02 14:11:51,475 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_Q5la41ytAf7hN7t201W1KAG4 "HTTP/1.1 200 OK"
2024-07-02 14:12:11,478 - INFO - Status: in_progress
2024-07-02 14:12:11,801 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc/runs/run_Q5la41ytAf7hN7t201W1KAG4 "HTTP/1.1 200 OK"
2024-07-02 14:12:31,804 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -

user: [TextContentBlock(text=Text(annotations=[], value='These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 1, "current status": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], "score": 1.387}, {"time": 1, "action": 1, "current status": [0.011, 1.42, 0.362, 0.104, -0.007, -0.011, 0.0, 0.0], "score": 1.282}, {"time": 2, "action": 1, "current status": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], "score": 1.473}, {"time": 3, "action": 1, "current status": [0.018, 1.423, 0.341, 0.051, -0.002, 0.072, 0.0, 0.0], "score": 1.809}, {"time": 4, "action": 1, "current status": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], "score": 1.038}, {"time": 5, "action": 1, "current status": [0.025, 1.423, 0.323, -0.003, 0.011, 0.146, 0.0, 0.0], "score": 0.221}, {"time": 6, "action": 1, "current status": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], "score": 0.039}, {"time": 7, "action": 1, "current status": [0.031, 1.421, 0.30

2024-07-02 14:12:33,068 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_nAWycSlJhSPbKx8L4uUfabyc "HTTP/1.1 200 OK"
2024-07-02 14:12:33,070 - INFO - 
Ejecución finalizada.




Asistente vaciado.
Run(id='run_Q5la41ytAf7hN7t201W1KAG4', assistant_id='asst_DDs6L6pOUSh8Jtp1TuE8tVUc', cancelled_at=None, completed_at=1719922317, created_at=1719922265, expires_at=None, failed_at=None, incomplete_details=None, instructions="To complete the task you must follow the following steps and indications:\n1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.\n2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:\n    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).\n    - Increased/decreased the slower/faster the spacecraft is moving.\n    - Decreased the more the spacecraft is tilted.\n    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.\n    - Receive 100 points 

#### Take a step back.
Abstraer primero el problema.

## Incluir supervisor encargado del stepback

In [14]:
def inicializar_agente(model, msg, description, instructions, name, tools):
    """ Inicializa un agente con un asistente y un hilo.
    
    Args:
        model: str: el modelo de lenguaje a utilizar
        msg: str: el mensaje inicial del hilo
        
    Returns:
        agente: AssistantOpenAI: el agente inicializado
        asistente: dict: el asistente creado
        hilo: dict: el hilo creado
    """
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)
    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=description, instructions=instructions, name=name, tools=tools)
    # Crea un hilo
    hilo = agente.create_thread()
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=msg)
    
    return agente, asistente, hilo


def run_message_assistant(agente, asistente, hilo, tool_choice="auto"):
    """ Ejecuta un mensaje en el asistente.
    
    Args:
        agente: AssistantOpenAI: el agente
        asistente: dict: el asistente
        hilo: dict: el hilo
        tool_choice: str: la elección de herramienta a utilizar
        
    Returns:
        ejecucion: dict: la ejecución del mensaje
        response: dict: la respuesta del asistente
    """
    # Ejecuta el hilo
    ejecucion = agente.run(hilo.id, asistente.id, tool_choice=tool_choice)
    response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
    while response.status not in ["completed", "failed", "requires_action"]:
        logger.info(f"Status: {ejecucion.status}")
        response = agente.get_run(ejecucion.id, hilo.id)
        time.sleep(20)
    return ejecucion, response


In [14]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, code_msg, logger, model="gpt-3.5-turbo", num_iterations=5):
    
    # Creamos el asistente.
    #agente, asistente, hilo = inicializar_agente(model, Incial_msg, DESCRIPTION, INSTRUCTIONS, NAME, TOOLS)
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)
    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    # Crea un hilo
    hilo = agente.create_thread()
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)
    
    # Primero ejecutamos el razonamiento de STEP BACK.
    #ejecucion, response = run_message_assistant(agent, asistente, hilo, tool_choice='none')
    # Ejecuta el hilo
    ejecucion = agente.run(hilo.id, asistente.id, tool_choice='none')
    response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
    while response.status not in ["completed", "failed", "requires_action"]:
        logger.info(f"Status: {ejecucion.status}")
        response = agente.get_run(ejecucion.id, hilo.id)
        print("Waiting for response...")
        time.sleep(20)
    
    #print(response)
    msgs = agente.mostrar_mensajes(hilo.id)
    print(msgs[0].data[0].content[0].text.value)
    
    # Añadimos el mensaje de generación de código.
    msg = agente.add_message(hilo.id, role="user", content=code_msg)
    
    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id, tool_choice='required')
            response = agente.get_run(ejecucion.id, hilo.id)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            print(response)
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  

            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"Stepback 4o. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


In [15]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (speed, angle and location), keep falling under control and then land gently at the end.
    - Use all the elements of the vector of observations, all are relevant to make the right decision.
    - Pay close attention to successful events.
    - Environment and ship conditions may change but your code must be effective for all cases.
    - Find a balance in your landing policy to maximize your score (must exceed 200 points).
    - If you are rotating to the left (negative values), you should fire the left engine and vice versa. But always prioritizing a controlled speed with the main engine.
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Decision coder"


initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Take a deep breath and reason step-by-step.What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing? Do not implement any code."
code_msg = f"After the reasoning with the processes behind the decision making, code very carefully when structuring your code, keep in mind that certain actions can be left with starvation. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool, function calling is required."



In [16]:
logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(initial_msg, code_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-01 20:24:30,902 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-01 20:24:31,107 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-01 20:24:31,353 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/messages "HTTP/1.1 200 OK"
2024-07-01 20:24:31,917 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/runs "HTTP/1.1 200 OK"
2024-07-01 20:24:32,130 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/runs/run_rBlWNoPJc2HIQNThxrjQvaiT "HTTP/1.1 200 OK"
2024-07-01 20:24:32,132 - INFO - Status: queued
2024-07-01 20:24:32,336 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Z2XFcAqd7BpvTGXwLKXUdXLi/runs/run_rBlWNoPJc2HIQNThxrjQvaiT "HTTP/1.1 200 OK"


Waiting for response...


In [None]:
agente = AssistantOpenAI(ARCLABKEY_OPENAI)

# Crea un asistente
asistente = agente.create_assistant(model="gpt-3.5-turbo", description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    
# Crea un hilo
hilo = agente.create_thread()

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Before coding take a deep breath and reason step-by-step.What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?"
    
# Añade un mensaje inicial al hilo.
msg = agente.add_message(hilo.id, role="user", content=initial_msg)


# Ejecuta el hilo
ejecucion = agente.run(hilo.id, asistente.id, tool_choice='none')
response = agente.get_run(ejecucion.id, hilo.id)

# Esperamos a que la respuesta esté lista.
while response.status not in ["completed", "failed", "requires_action"]:
    #logger.info(f"Status: {ejecucion.status}")
    print("Waiting for response...")
    response = agente.get_run(ejecucion.id, hilo.id)
    time.sleep(20)
print(response)


agente.mostrar_mensajes(hilo.id)

Waiting for response...
Waiting for response...
Run(id='run_5ip2qIKcf1KIU1L1XCyvPVyK', assistant_id='asst_qqONwHALF6YCJlMyXttFXsnu', cancelled_at=None, completed_at=1719491913, created_at=1719491908, expires_at=None, failed_at=None, incomplete_details=None, instructions="To complete the task you must follow the following steps and indications:\n1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.\n2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:\n    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).\n    - Increased/decreased the slower/faster the spacecraft is moving.\n    - Decreased the more the spacecraft is tilted.\n    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engin

(SyncCursorPage[Message](data=[Message(id='msg_4CYNsLrcWXj8sEnvIm9EyV4G', assistant_id='asst_qqONwHALF6YCJlMyXttFXsnu', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value="Based on the successful landing record and the scoring system provided, we can infer some strategies to achieve a successful landing with a high score:\n\n1. Initially, the spacecraft should start with a gentle descent while maintaining a controlled speed. This helps in stabilizing the falling ship.\n2. As the spacecraft approaches the landing area, it should decrease the altitude gradually and adjust the horizontal position to align with the landing location.\n3. The spacecraft should minimize lateral movement and keep the speed under control to ensure a safe landing.\n4. Utilize the main engine for controlled descent and adjust the tilt angle to reduce lateral velocity.\n5. When close to the landing area, reduce the speed further and ensure the spacecraft is aligned properl

In [19]:
msgs, ids = agente.mostrar_mensajes(hilo.id)
print(msgs)
print(msgs.data[0].content[0].text.value)

assistant: [TextContentBlock(text=Text(annotations=[], value="Based on the successful landing record and the scoring system provided, we can infer some strategies to achieve a successful landing with a high score:\n\n1. Initially, the spacecraft should start with a gentle descent while maintaining a controlled speed. This helps in stabilizing the falling ship.\n2. As the spacecraft approaches the landing area, it should decrease the altitude gradually and adjust the horizontal position to align with the landing location.\n3. The spacecraft should minimize lateral movement and keep the speed under control to ensure a safe landing.\n4. Utilize the main engine for controlled descent and adjust the tilt angle to reduce lateral velocity.\n5. When close to the landing area, reduce the speed further and ensure the spacecraft is aligned properly for a gentle landing.\n6. Avoid unnecessary firing of the side engines as they decrease the overall score. Only use them for small adjustments if nece

TypeError: 'SyncCursorPage[Message]' object is not subscriptable

## Dos agentes.

In [15]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, code_msg, logger, model="gpt-3.5-turbo", num_iterations=5):
    
    # Creamos el asistente.
    #agente, asistente, hilo = inicializar_agente(model, Incial_msg, DESCRIPTION, INSTRUCTIONS, NAME, TOOLS)
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)
    # Crea un asistente supervisor y codificador.
    codificador = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    supervisor = agente.create_assistant(model=model, description=DESCRIPTION_SUPERVISOR, instructions=INSTRUCTIONS_SUPERVISOR, name=NAME_SUPERVISOR)
    # Crea un hilo
    hilo_supervisor = agente.create_thread()
    hilo_codificador = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo_supervisor.id, role="user", content=Incial_msg)
    
    # Primero ejecutamos el razonamiento de STEP BACK.
    #ejecucion, response = run_message_assistant(agent, asistente, hilo, tool_choice='none')
    # Ejecuta el hilo
    ejecucion = agente.run(hilo_supervisor.id, supervisor.id, tool_choice='none')
    response = agente.get_run(ejecucion.id, hilo_supervisor.id)

            # Esperamos a que la respuesta esté lista.
    while response.status not in ["completed", "failed", "requires_action"]:
        logger.info(f"Status: {ejecucion.status}")
        response = agente.get_run(ejecucion.id, hilo_supervisor.id)
        print("Waiting for response...")
        time.sleep(20)
    
    #print(response)
    msgs = agente.mostrar_mensajes(hilo_supervisor.id)
    logger.info(msgs[0].data[0].content[0].text.value)
    
    # Añadimos el mensaje de generación de código.
    msg = code_msg +  "\nSupervisor feedback: " + msgs[0].data[0].content[0].text.value
    print(msg)
    msg = agente.add_message(hilo_codificador.id, role="user", content=msg)
    
    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo_codificador.id, codificador.id, tool_choice='required')
            response = agente.get_run(ejecucion.id, hilo_codificador.id)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            print(response)
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  

            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"Stepback 4o. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo_codificador.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo_codificador.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success."""
                logger.info(msg)    
                agente.add_message(hilo_codificador.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo_codificador.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo_codificador.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo_codificador.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo_codificador.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


In [16]:
DESCRIPTION = "You are an expert agent in spacecraft landing and decision making codificiation. Based on the landing records received and the indications received by a supervising agent, you must generate the code to land the spacecraft in the environment whatever the initial conditions are."
DESCRIPTION_SUPERVISOR = "You are an expert agent in spacecraft landing environments. Your mission is to analyze landing logs and the code generated by the agent specialized in coding, to guide him in improving the performance of his decision making code."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""
INSTRUCTIONS_SUPERVISOR = f"""To complete the task you must follow the following steps and indications:
1. Revisa aterrizajes exitosos y fracasados para analizar el efecto de las acciones en cada una de las observaciones y decidir que decisión tomar en cada momento.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. All the knowledge obtained must be expressed in the best possible way to the coding LLM agent so that it can improve the performance of the generated decision making code.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Decision coder"
NAME_SUPERVISOR = "Spacecraft Landing Supervisor"


supervisor_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Take a deep breath and reason step-by-step.What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing? Do not implement any code."
code_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. After the reasoning with the processes behind the decision making, code very carefully when structuring your code, keep in mind that certain actions can be left with starvation. Save the code of the act function in the file 'Action.py' using store_code_in_file function tool, function calling is required."



In [17]:
logger = configura_log('Spacecraft_4o_Stepback.log')
response = create_and_run_llm_loop(supervisor_msg, code_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-07-02 14:30:55,570 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 14:30:55,570 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 14:30:55,839 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 14:30:55,839 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 14:30:56,029 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 14:30:56,029 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 14:30:56,242 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 14:30:56,242 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 14:30:56,538 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_6j2zlwyD27qqWLEFuNk6Ef5T/messages "HTTP/1.1 200 OK"
2024-07-02 

KeyboardInterrupt: 

## JSON MODE

In [14]:
json_shcema_response = {
    "reasoning": {
        "type": "string",
        "description": "Pre-coding deep reasoning on log analysis and the influence of actions on observations and steps to follow to achieve a successful landing. It is an expnasion but not the code itself."
    },
    "code": {
        "type": "string",
        "description": "The Python code to store. Use the reasoning parameter to generate this code."
    },
    "filename": {
        "type": "string",
        "description": "The filename to store the code in (Action.py)."
    }
}

In [15]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    asistente = agente.create_assistant_json_mode(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME)
    
    # Crea un hilo
    hilo = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)

    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id)
            response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
            while response.status not in ["completed", "failed", "requires_action"]:
                logger.info(f"Status: {ejecucion.status}")
                response = agente.get_run(ejecucion.id, hilo.id)
                time.sleep(10)
            
            print(response.response_format)
            msgs = agente.mostrar_mensajes(hilo.id)
            msg = msgs[0].data[0].content[0].text.value
            logger.info(msg)

            # # Descomponemos los elementos de la respuesta.
            # logger.info(f"Status: {response.status}")
            # if response.status == "completed":
            #     return response
            # tool_call = response.required_action.submit_tool_outputs.tool_calls
            # print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(msg)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  
            reasoning = code_dict["reasoning"]

            logger.info(f"\nRazonamiento:\n{reasoning}")
            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(reasoning, code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"4o JSONMODE. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # # Devolvemos la respuesta al asistente.
                # for call in tool_call:
                #     agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                # compiled = True
                # logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success. Remember that the code is a reflection of your reasoning and the way you approach the problem."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                # for call in tool_call:
                #     agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                # logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")

In [16]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
    - A succesful landing must exceed 200 points.
3. This is the log schema:\n {log_description}
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Provide output in valid JSON format. The data schema should be as follows:  {json.dumps(json_shcema_response)}.
6. Generate the code of the act function in the file 'Action.py'.
"""

# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. What does each value and each change in the observations mean? What effects does each action have on the current state observations? What are the steps to follow to achieve a successful landing?. Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_JsonMode.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-3.5-turbo", num_iterations=5)

print(response)

2024-07-02 18:15:13,144 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-07-02 18:15:13,369 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-07-02 18:15:13,654 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/messages "HTTP/1.1 200 OK"
2024-07-02 18:15:13,794 - INFO - Iteration: 1
2024-07-02 18:15:14,194 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/runs "HTTP/1.1 200 OK"
2024-07-02 18:15:14,426 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/runs/run_9voCoIROrveH2ATtBzWa2Nld "HTTP/1.1 200 OK"
2024-07-02 18:15:14,427 - INFO - Status: queued
2024-07-02 18:15:14,632 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/runs/run_9voCoIROrveH2ATtBzWa2Nld "HTTP/1.1 200 OK"
2024-07-02 18:15:24,636 - INFO - Status: queued
2024-07-02 18:15:24,894

AssistantResponseFormat(type='json_object')


2024-07-02 18:15:35,184 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/messages "HTTP/1.1 200 OK"
2024-07-02 18:15:35,428 - INFO - {"reasoning": "Based on the analysis of the landing records from the example, it is crucial to take into consideration the spacecraft's position, velocity, angle, angular velocity, contact sensors, and the effect of actions on the total score. To achieve a successful landing, the actions need to be chosen carefully to minimize errors and maximize the final score. A strategy focusing on reducing velocity and angle while keeping the spacecraft near the landing area and avoiding continuous use of thrusters is important.", "code": "def act(curr_status):\n    x_pos, y_pos, x_vel, y_vel, angle, ang_vel, left_contact, right_contact = curr_status\n    if left_contact == 0 and right_contact == 0:\n        if x_pos < -0.1:\n            return 3  # Push right engine\n        elif x_pos > 0.1:\n            return 1  # Push 

assistant: [TextContentBlock(text=Text(annotations=[], value='{"reasoning": "Based on the analysis of the landing records from the example, it is crucial to take into consideration the spacecraft\'s position, velocity, angle, angular velocity, contact sensors, and the effect of actions on the total score. To achieve a successful landing, the actions need to be chosen carefully to minimize errors and maximize the final score. A strategy focusing on reducing velocity and angle while keeping the spacecraft near the landing area and avoiding continuous use of thrusters is important.", "code": "def act(curr_status):\\n    x_pos, y_pos, x_vel, y_vel, angle, ang_vel, left_contact, right_contact = curr_status\\n    if left_contact == 0 and right_contact == 0:\\n        if x_pos < -0.1:\\n            return 3  # Push right engine\\n        elif x_pos > 0.1:\\n            return 1  # Push left engine\\n        else:\\n            return 2  # Push both engines (upwards)\\n    else:\\n        retu

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



2024-07-02 18:15:39,991 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": [{"time": 0, "action": 2, "current status": [0.007, 1.418, 0.377, 0.172, -0.009, -0.089, 0.0, 0.0], "score": -1.45}, {"time": 1, "action": 2, "current status": [0.011, 1.422, 0.389, 0.179, -0.013, -0.077, 0.0, 0.0], "score": -2.465}, {"time": 2, "action": 2, "current status": [0.015, 1.427, 0.395, 0.2, -0.016, -0.072, 0.0, 0.0], "score": -2.498}, {"time": 3, "action": 2, "current status": [0.019, 1.432, 0.385, 0.226, -0.02, -0.083, 0.0, 0.0], "score": -1.675}, {"time": 4, "action": 2, "current status": [0.023, 1.437, 0.389, 0.229, -0.024, -0.08, 0.0, 0.0], "score": -1.691}, {"time": 5, "action": 2, "current status": [0.027, 1.443, 0.398, 0.251, -0.028, -0.074, 0.0, 0.0], "score": -3.085}, {"time": 6, "action": 2, "current status": [0.031, 1.449, 0.392, 0.264, -0.032, -0.081, 0.0, 0.0], "score": -1.511}, {"time": 7, "action": 2, "current status": [0.035, 1.455, 0.387, 0.297, -

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 117. Tamaño de logs: 69
episode  1 score -986.081 avg score -8.501


2024-07-02 18:15:40,232 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/messages "HTTP/1.1 200 OK"
2024-07-02 18:15:40,635 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/runs "HTTP/1.1 200 OK"
2024-07-02 18:15:40,911 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/runs/run_Fe6Lp6TbZyaiOAG4X3EiZodU "HTTP/1.1 200 OK"
2024-07-02 18:15:40,912 - INFO - Status: queued
2024-07-02 18:15:41,190 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/runs/run_Fe6Lp6TbZyaiOAG4X3EiZodU "HTTP/1.1 200 OK"
2024-07-02 18:15:51,192 - INFO - Status: queued
2024-07-02 18:15:51,502 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/runs/run_Fe6Lp6TbZyaiOAG4X3EiZodU "HTTP/1.1 200 OK"


AssistantResponseFormat(type='json_object')


2024-07-02 18:16:01,753 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/messages "HTTP/1.1 200 OK"
2024-07-02 18:16:01,991 - INFO - {"reasoning": "The spacecraft is consistently failing to land successfully due to a significant decrease in the score each time an action is taken. Actions have been persistently steering the spacecraft away from the landing zone as indicated by the X and Y position values deviating from zero. Additionally, the X velocity is increasing rapidly, worsening the situation. The actions performed (pushing the left engine continuously) are not appropriate for achieving a successful landing. To correct this, the actions need to be adjusted in such a way that the spacecraft is guided towards the landing area while reducing the X and Y velocity appropriately.", "code": "def act(data):\n    if data['logs'][-1]['current status'][0] > 0:\n        return 0  # Switch off engines\n    else:\n        return 1  # Push left engine

assistant: [TextContentBlock(text=Text(annotations=[], value='{"reasoning": "The spacecraft is consistently failing to land successfully due to a significant decrease in the score each time an action is taken. Actions have been persistently steering the spacecraft away from the landing zone as indicated by the X and Y position values deviating from zero. Additionally, the X velocity is increasing rapidly, worsening the situation. The actions performed (pushing the left engine continuously) are not appropriate for achieving a successful landing. To correct this, the actions need to be adjusted in such a way that the spacecraft is guided towards the landing area while reducing the X and Y velocity appropriately.", "code": "def act(data):\\n    if data[\'logs\'][-1][\'current status\'][0] > 0:\\n        return 0  # Switch off engines\\n    else:\\n        return 1  # Push left engine\\n\\n# You may further customize this function to consider other factors influencing a successful landing.

2024-07-02 18:16:05,010 - ERROR - Error: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
Traceback (most recent call last):
  File "C:\Users\adria\AppData\Local\Temp\ipykernel_6036\835410671.py", line 72, in create_and_run_llm_loop
    logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\adria\AppData\Local\Temp\ipykernel_6036\3374499487.py", line 52, in lunar_lander
    action = Action.act(state)
             ^^^^^^^^^^^^^^^^^
  File "c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\Action.py", line 2, in act
    if data['logs'][-1]['current status'][0] > 0:
       ~~~~^^^^^^^^
IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
2024-07-02 18:16:05,011 - ERROR -

Semilla 38


2024-07-02 18:16:05,231 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/messages "HTTP/1.1 200 OK"
  logger.warn("Unable to save last video! Did you call close()?")
2024-07-02 18:16:05,704 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/runs "HTTP/1.1 200 OK"
2024-07-02 18:16:06,182 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/runs/run_V7VHyerDywlmP2y7I0zFGSG8 "HTTP/1.1 200 OK"
2024-07-02 18:16:06,184 - INFO - Status: queued
2024-07-02 18:16:06,385 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/runs/run_V7VHyerDywlmP2y7I0zFGSG8 "HTTP/1.1 200 OK"
2024-07-02 18:16:16,387 - INFO - Status: queued
2024-07-02 18:16:16,639 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/runs/run_V7VHyerDywlmP2y7I0zFGSG8 "HTTP/1.1 200 OK"


AssistantResponseFormat(type='json_object')


2024-07-02 18:16:26,921 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_Hs8IHuZKcweUovhEIneKtbDD/messages "HTTP/1.1 200 OK"
2024-07-02 18:16:27,174 - INFO - {"reasoning": {"type": "string", "description": "The spacecraft is consistently failing to land successfully due to a significant decrease in the score each time an action is taken. Actions have been persistently steering the spacecraft away from the landing zone as indicated by the X and Y position values deviating from zero. Additionally, the X velocity is increasing rapidly, worsening the situation. The actions performed (pushing the left engine continuously) are not appropriate for achieving a successful landing. To correct this, the actions need to be adjusted in such a way that the spacecraft is guided towards the landing area while reducing the X and Y velocity appropriately."}, "code": {"type": "string", "description": "def act(data):\n    if data['logs'][-1]['current status'][0] > 0:\n        return 0  

assistant: [TextContentBlock(text=Text(annotations=[], value='{"reasoning": {"type": "string", "description": "The spacecraft is consistently failing to land successfully due to a significant decrease in the score each time an action is taken. Actions have been persistently steering the spacecraft away from the landing zone as indicated by the X and Y position values deviating from zero. Additionally, the X velocity is increasing rapidly, worsening the situation. The actions performed (pushing the left engine continuously) are not appropriate for achieving a successful landing. To correct this, the actions need to be adjusted in such a way that the spacecraft is guided towards the landing area while reducing the X and Y velocity appropriately."}, "code": {"type": "string", "description": "def act(data):\\n    if data[\'logs\'][-1][\'current status\'][0] > 0:\\n        return 0  # Switch off engines\\n    else:\\n        return 2  # Push both engines (upwards)\\n\\n# You may further cus

KeyError: 'filename'

## Tasks
- Corregir rotación.
- Cantidad de logs.
- Partir de ejemplo exitoso para afinarlo. (y generalizar)
- Incluir take step back.
- Probar con 2 modelos. Uno encargado del razonamiento y otro de codificar. (O uno que retroalimente al otro con las conclusiones obtenidas)