#### Imports

In [1]:
import time
from openai import OpenAI
from Assistant import AssistantOpenAI
import logging
import os
import json
import gymnasium as gym
import random
import matplotlib.pyplot as plt
import os
import time
from git import Repo

import numpy as np
#from collections import deque, namedtuple

# For visualization
import gymnasium.wrappers.record_video as record_video

#### Parameters

In [2]:
num_episodes = 200
ENV_NAME = 'LunarLander-v2'
ARCLABKEY_OPENAI = "sk-proj-DvHDR3hpgbm2r3kCA9jKT3BlbkFJL57ABXkfaWAIYKBxdhM6"
ARCLABKEY_OPENAI = "sk-proj-GvaDXazpibWA2M1I5Pu2T3BlbkFJxDuKlr9AcoVG98ctJZ7Q"

#### Function calling.

Función encargada de almacenar el código generado mediante la opción de function calling del asistente de OpenAI.

In [3]:
def store_code_in_file(code, filename):
    """ Store code in a file
    
    Args:
        code: str: code to store
        filename: str: filename to store code in
    
    Returns:
        None
    """
    with open(filename, 'w') as f:
        f.write(code)
        
store_code_in_file_schema = {
    "name": "store_code_in_file",
    "description": "Store code in a file",
    "parameters": {
        "type": "object",
        "properties": {
            "code": {
                "type": "string",
                "description": "The Python code to store."
            },
            "filename": {
                "type": "string",
                "description": "The filename to store the code in."
            }
        },
        "required": ["code", "filename"]
    }
}

TOOLS = [{"type": "function", "function": store_code_in_file_schema}]

### Entorno Lunnar Lander

In [4]:
env = gym.make('LunarLander-v2')
print('State shape: ', env.observation_space.shape)
print('Number of actions: ', env.action_space.n)

State shape:  (8,)
Number of actions:  4


In [5]:
agent_initial_code = """
import random

def act(observation):
    '''
    The function that codifies the action to be taken in each instant of time.

    Args:
        observation (numpy.array):
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": "X position",
                "1": "Y position",
                "2": "X velocity",
                "3": "Y velocity",
                "4": "Angle",
                "5": "Angular velocity",
                "6": "Left contact sensor",
                "7": "Right contact sensor"
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]

    Returns:
        Integer  : The action to be taken.
    '''
    return random.randint(0, 3)
"""

#### Mapeador de logs a JSON

In [6]:
log_description = {
    "description": "Log data for each step of the spacecraft landing environment.",
    "landing attempt": {
        "type": "integer",
        "description": "The episode number."
    },
    "logs": {
        "instant": {
            "type": "integer",
            "description": "The instant within the landing attempt where the current log is taken."
        },
        "action": {
            "type": "integer",
            "description": "The possible actions taken.",
            "options": {
                1 : "Switch off engines",
                2 : "Push left engine",
                3 : "Push both engines (upwards)",
                4 : "Push right engine"
            }
        },  
        "current status": {
            "type": "array",
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": "X position",
                "1": "Y position",
                "2": "X velocity",
                "3": "Y velocity",
                "4": "Angle",
                "5": "Angular velocity",
                "6": "Left contact sensor with landing zone",
                "7": "Right contact sensor with landing zone"
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1] 
        }, 
        "score": {
            "type": "number",
            "description": "The score received for the action."
        },  
        "completed": {
            "type": "boolean",
            "description": "Whether the landing event has ended (landing or accident)."
        }
    },
    "total score": {
        "type": "number",
        "description": "The total score received for the landing attempt."
    }
}

### UTILS

In [7]:
import json

def format_step_data(instant, action, next_state, reward, terminated):
    """ Format the step data into a JSON string
    
    Args:
        instant: int: the instant within the landing attempt where the current log is taken
        action: int: the action taken
        next_state: np.array: the next state of the environment
        reward: float: the reward received
        terminated: bool: whether the landing event has ended
        
    Returns:
        str: the step data formatted as a JSON string
    """
    # Convertir el array numpy a una lista
    next_state_list = next_state.tolist()

    # Redondear los elementos de la lista a 4 decimales
    next_state_list_rounded = [round(x, 3) for x in next_state_list]

    step_data = {
        'instant': instant,
        'action' : int(action),
        'current status': next_state_list_rounded,  # Convert numpy array to list
        'score': round(reward, 3),
        'completed': terminated,
        #'truncated': truncated
        #'info': info
    }

    # Convert the dictionary to a JSON string
    step_data_json = json.dumps(step_data)

    return step_data_json


def format_episode_logs(logs, episode, total_score):
    """ Format the logs into a JSON string
    
    Args:
        logs: list: the logs for each step of the environment
        episode: int: the episode number
        
    Returns:
        str: the logs formatted as a JSON string
    """
    logs_data = {
        'landing attempt': episode,
        'logs': logs,
        'total score': total_score
    }

    # Convert the dictionary to a JSON string
    logs_json = json.dumps(logs_data)

    return logs_json

In [8]:
def commit_changes(repo_path, commit_message):
    """ Commit changes to the repository.

    Args:
        repo_path (str): Path to the repository.
        commit_message (str): The commit message.
    """
    repo = Repo(repo_path)
    repo.git.add(update=True)
    repo.index.commit(commit_message)
    origin = repo.remote(name='origin')
    origin.push()

#### Código encargado de ejecutar los eventos en el entorno.

In [9]:
import Action

def lunar_lander(max_t=1000, n_games=1, display=False, seed=38, agent=None, recoder=False, video_filename='video/video.mp4'):
    """ Run the Lunar Lander environment
    
    Args:
        max_t: int: the maximum number of timesteps
        n_games: int: the number of games to play
        display: bool: whether to display the environment
        seed: int: the seed for the environment
        agent: object: the agent to use
        recoder: bool: whether to record the video
        video_filename: str: the filename for the video
        
    Returns:
        str: the logs for the landing attempts
    """
    
    # Incluimos la opción del entorno gráfico y la de grabación.
    if display:
        env = gym.make(ENV_NAME, render_mode='human')
        if recoder:
            env = gym.make(ENV_NAME, render_mode='rgb_array')
            env.reset() 
            video_recorder = record_video.RecordVideo(env, video_filename)
    else:
        env = gym.make(ENV_NAME)

    # Bucle principal de ejecución de los episodios.
    logs = []
    for episode in range(1, n_games+1):
        state = env.reset(seed=seed) # Set a seed for the environment
        state = state[0] # Eliminamos el diccionario vacio y dejamos unicamente el estado de 8 elementos.
        score = 0
        instant = 0
        episode_actions = []
        if recoder: # En caso de que se quiera grabar el video, se inicia el grabador.
            video_recorder.start_video_recorder()
            
        for i in range(max_t): # Bucle de ejecución de los instantes de tiempo.
            
            # Seleccionamos el método de elegir la acción (agente exitoso o método de decisión del asistente)
            if agent: 
                action = agent.act(state)
            else:
                action = Action.act(state)
            
            # Avanzamos un instante de tiempo en el entorno en función de si se quiere grabar el video o no.
            if recoder:
                next_state, reward, terminated, truncated, info = video_recorder.step(action)
            else:
                next_state, reward, terminated, truncated, info = env.step(action)
            
            # Formateamos los datos del instante de tiempo y los alcenamos.
            json_logs = format_step_data(instant, action, next_state, reward, terminated)
            score += reward
            instant += 1
            if len(episode_actions) == 0 or len(episode_actions) % 2 == 0 or terminated:
                logs.append(json_logs)
            episode_actions.append(action)
            state = next_state
            if terminated: # Condición de salida del bucle, si el episodio ha terminado.
                break
        json_episode_logs = format_episode_logs(logs, episode, score) 
        
        print(f"Número de instantes: {instant+1}. Tamaño de logs: {len(logs)}")
        print('episode ', episode, 'score %.3f' % float(score), 'avg score %.3f' % (float(score) / instant))
        
    if recoder:
        video_recorder.close()
    else:
        env.close()
        
    return json_episode_logs

#### Logs del código inicial

In [10]:
initial_logs = lunar_lander(n_games=1, display=True)
initial_logs

Número de instantes: 107. Tamaño de logs: 54
episode  1 score -451.053 avg score -4.255


'{"landing attempt": 1, "logs": ["{\\"instant\\": 0, \\"action\\": 1, \\"current status\\": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], \\"score\\": 1.387, \\"completed\\": false}", "{\\"instant\\": 2, \\"action\\": 1, \\"current status\\": [0.015, 1.423, 0.373, 0.111, -0.008, 0.005, 0.0, 0.0], \\"score\\": 1.451, \\"completed\\": false}", "{\\"instant\\": 4, \\"action\\": 1, \\"current status\\": [0.022, 1.429, 0.353, 0.11, -0.007, 0.032, 0.0, 0.0], \\"score\\": 1.653, \\"completed\\": false}", "{\\"instant\\": 6, \\"action\\": 0, \\"current status\\": [0.029, 1.432, 0.364, 0.057, -0.008, -0.011, 0.0, 0.0], \\"score\\": 0.313, \\"completed\\": false}", "{\\"instant\\": 8, \\"action\\": 0, \\"current status\\": [0.037, 1.432, 0.353, 0.004, -0.004, 0.033, 0.0, 0.0], \\"score\\": 0.277, \\"completed\\": false}", "{\\"instant\\": 10, \\"action\\": 3, \\"current status\\": [0.044, 1.431, 0.365, -0.049, -0.003, -0.013, 0.0, 0.0], \\"score\\": -1.395, \\"completed\\": false}", "{\

#### Logs exitosos
Utilizar otras semillas para que no memorice el entorno

In [11]:
import DuelingDQN

agent = DuelingDQN.Agent(num_observaciones=8, num_acciones=4, red_modelo=DuelingDQN.DuelingQNetwork, seed=0)
# Cargamos los pesos del agente entrenado.
agent.load_weights('checkpoint_Dueling.pth')

success_logs = lunar_lander(n_games=1, display=True, seed=42, agent=agent)
success_logs

Número de instantes: 294. Tamaño de logs: 147
episode  1 score 250.956 avg score 0.857


'{"landing attempt": 1, "logs": ["{\\"instant\\": 0, \\"action\\": 1, \\"current status\\": [0.004, 1.425, 0.22, 0.295, -0.003, -0.004, 0.0, 0.0], \\"score\\": 2.08, \\"completed\\": false}", "{\\"instant\\": 2, \\"action\\": 0, \\"current status\\": [0.009, 1.436, 0.22, 0.242, -0.003, -0.004, 0.0, 0.0], \\"score\\": 1.453, \\"completed\\": false}", "{\\"instant\\": 4, \\"action\\": 0, \\"current status\\": [0.013, 1.445, 0.22, 0.188, -0.004, -0.004, 0.0, 0.0], \\"score\\": 1.354, \\"completed\\": false}", "{\\"instant\\": 6, \\"action\\": 1, \\"current status\\": [0.018, 1.452, 0.211, 0.135, -0.002, 0.032, 0.0, 0.0], \\"score\\": 2.092, \\"completed\\": false}", "{\\"instant\\": 8, \\"action\\": 0, \\"current status\\": [0.022, 1.456, 0.211, 0.081, 0.001, 0.032, 0.0, 0.0], \\"score\\": 0.866, \\"completed\\": false}", "{\\"instant\\": 10, \\"action\\": 0, \\"current status\\": [0.026, 1.458, 0.211, 0.028, 0.004, 0.032, 0.0, 0.0], \\"score\\": 0.283, \\"completed\\": false}", "{\\"inst

#### Bucle iterativo

Sección principal del código encargada de conectar con el asistente de la API de OpenAI e iterar en la generación de código nuevo a partir de los registros del generado previamente.

In [12]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    
    # Crea un hilo
    hilo = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)

    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id)
            response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
            while response.status not in ["completed", "failed", "requires_action"]:
                logger.info(f"Status: {ejecucion.status}")
                response = agente.get_run(ejecucion.id, hilo.id)
                time.sleep(20)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  

            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"Fourth 4o execution. Upgrade prompt. Iteration {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated in the last execution{logs}. You have to improve the last code generated to maximize the reward. Please, make it more efficient, faster and more robust."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


#### Logger

In [13]:
def configura_log(nombre_archivo):
    """ 
    Configura el logger para que guarde los logs en un archivo y los muestre en la consola.
    
    Args:
        nombre_archivo: str Nombre del archivo donde se guardarán los logs.
        
    Returns:
        logger: logging.Logger Objeto logger configurado.
    """
    # Crear la carpeta logs si no existe
    if not os.path.exists('logs'):
        os.makedirs('logs')
    # Configura el logger
    logging.basicConfig(filename=f'logs/{nombre_archivo}', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger()
    
    # Añade un StreamHandler para mostrar los logs en la consola
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)
    
    return logger

### Ejecución con el asistente

Prompt inicial y mensajes del sistema para el asistente.

In [14]:
DESCRIPTION = "You are an expert programer in Pyhton. Your specialty is to generate the code responsible for making decisions about actions to be taken in various spacecraft landing environments.The objective is to land the spacecraft within a target zone in the shortest possible time and very gently. A scoring system is used to evaluate the landings, which must be maximized."
INSTRUCTIONS = f"""Your task is:
1. Analyze and reason about the results received in the last landing attempts.
2. Your goal is to be able to make the appropriate decision based on the results of previous iterations. You must code the decision making based on your reasoning in a Python function.
3. Use these tips to get a good landing:
    - First you have to stabilize the falling ship and then land gently.
    - Use all observations when deciding what action to take at any given moment. Both position and velocities must be taken into account to know how the ship is doing and towards which states it is heading. All of these must be considered to achieve stability.
    - Learn how actions taken affect the future states of the spacecraft in the logs of past events so that you can take this into account when developing code to reach the landing zone.
4. Based on the logs you should improve the code generated in the 'act' function in the last iteration, seeking to maximize the score received and generate a higher quality code.
5. Save the code of the act function in the file 'Action.py' using the tools given.
6. Improve your results and correct any errors you may have generated in your last code if they exist.
"""
NAME = "Spacecraft Landing Master"

initial_msg = f"Take a deep breath and think step by step. This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}. Analyze the results and improve the code."

logger = configura_log('Spacecraft_4o_commits_recorder.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-06-12 18:37:12,280 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-06-12 18:37:12,498 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-06-12 18:37:12,713 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/messages "HTTP/1.1 200 OK"
2024-06-12 18:37:12,852 - INFO - Iteration: 1
2024-06-12 18:37:13,264 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs "HTTP/1.1 200 OK"
2024-06-12 18:37:13,485 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_jSNerXzBISZtWdteEq2IKmmT "HTTP/1.1 200 OK"
2024-06-12 18:37:13,499 - INFO - Status: queued
2024-06-12 18:37:14,669 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_jSNerXzBISZtWdteEq2IKmmT "HTTP/1.1 200 OK"
2024-06-12 18:37:34,684 - INFO - Status: queued
2024-06-12 18:37:34,964

Tool call: [RequiredActionFunctionToolCall(id='call_XTuDlY3zurzq9tmEDjsEWQPJ', function=Function(arguments='{"code": "import numpy as np\\n\\ndef act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns

  logger.warn(


Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



                                                                

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 688. Tamaño de logs: 344
episode  1 score 151.808 avg score 0.221


2024-06-12 18:38:02,527 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_jSNerXzBISZtWdteEq2IKmmT/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-12 18:38:02,531 - INFO - Compilación exitosa.
2024-06-12 18:38:02,533 - INFO - Status: requires_action
2024-06-12 18:38:02,810 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_jSNerXzBISZtWdteEq2IKmmT "HTTP/1.1 200 OK"
2024-06-12 18:38:22,814 - INFO - Status: in_progress
2024-06-12 18:38:23,083 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_jSNerXzBISZtWdteEq2IKmmT "HTTP/1.1 200 OK"
2024-06-12 18:38:43,089 - INFO - These are the logs generated in the last execution{"landing attempt": 1, "logs": ["{\"instant\": 0, \"action\": 1, \"current status\": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], \"score\": 1.387, \"completed\": false}", "{\"instant\": 2, \"action\": 1, \"current 

Tool call: [RequiredActionFunctionToolCall(id='call_ayCy6PjGl49en9bR4Z6RtST3', function=Function(arguments='{\n  "code": "import numpy as np \\n\\ndef act(observation): \\n    \'\'\' \\n    The function that codifies the action to be taken in each instant of time. \\n\\n    Args: \\n        observation (numpy.array): \\n            \\"description\\": \\"The state of the environment after the action is taken.\\", \\n            \\"positions\\": {   \\n                \\"0\\": \\"X position\\", \\n                \\"1\\": \\"Y position\\", \\n                \\"2\\": \\"X velocity\\", \\n                \\"3\\": \\"Y velocity\\", \\n                \\"4\\": \\"Angle\\", \\n                \\"5\\": \\"Angular velocity\\", \\n                \\"6\\": \\"Left contact sensor\\", \\n                \\"7\\": \\"Right contact sensor\\" \\n            }, \\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0], \\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 

  logger.warn(


Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



                                                                

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 690. Tamaño de logs: 345
episode  1 score 151.580 avg score 0.220


2024-06-12 18:39:50,021 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_OMhZ0lpjDLOlm5B7lRRiEXQY/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-12 18:39:50,022 - INFO - Compilación exitosa.
2024-06-12 18:39:50,023 - INFO - Status: requires_action
2024-06-12 18:39:50,280 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_OMhZ0lpjDLOlm5B7lRRiEXQY "HTTP/1.1 200 OK"
2024-06-12 18:40:10,283 - INFO - Status: in_progress
2024-06-12 18:40:10,534 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_OMhZ0lpjDLOlm5B7lRRiEXQY "HTTP/1.1 200 OK"
2024-06-12 18:40:30,538 - INFO - These are the logs generated in the last execution{"landing attempt": 1, "logs": ["{\"instant\": 0, \"action\": 1, \"current status\": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], \"score\": 1.387, \"completed\": false}", "{\"instant\": 2, \"action\": 1, \"current 

Tool call: [RequiredActionFunctionToolCall(id='call_zeiwKVTjOv3IaA2h1JFDHtOw', function=Function(arguments='{\n  "code": "import numpy as np \\n\\ndef act(observation): \\n    \'\'\' \\n    The function that codifies the action to be taken in each instant of time. \\n\\n    Args: \\n        observation (numpy.array): \\n            \\"description\\": \\"The state of the environment after the action is taken.\\", \\n            \\"positions\\": {   \\n                \\"0\\": \\"X position\\", \\n                \\"1\\": \\"Y position\\", \\n                \\"2\\": \\"X velocity\\", \\n                \\"3\\": \\"Y velocity\\", \\n                \\"4\\": \\"Angle\\", \\n                \\"5\\": \\"Angular velocity\\", \\n                \\"6\\": \\"Left contact sensor\\", \\n                \\"7\\": \\"Right contact sensor\\" \\n            }, \\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0], \\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 

  logger.warn(


Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



                                                                

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 392. Tamaño de logs: 196
episode  1 score 227.470 avg score 0.582


2024-06-12 18:41:36,905 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_20LW3u0ciiWldIDFYEAF2mT8/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-12 18:41:36,908 - INFO - Compilación exitosa.
2024-06-12 18:41:36,910 - INFO - Status: requires_action
2024-06-12 18:41:37,148 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_20LW3u0ciiWldIDFYEAF2mT8 "HTTP/1.1 200 OK"
2024-06-12 18:41:57,152 - INFO - Status: in_progress
2024-06-12 18:41:57,421 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_20LW3u0ciiWldIDFYEAF2mT8 "HTTP/1.1 200 OK"
2024-06-12 18:42:17,425 - INFO - Status: in_progress
2024-06-12 18:42:17,695 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_20LW3u0ciiWldIDFYEAF2mT8 "HTTP/1.1 200 OK"
2024-06-12 18:42:37,699 - INFO - These are the logs generated in the last execution{"l

Tool call: [RequiredActionFunctionToolCall(id='call_g3b28zxcuCYVgJcU9G53gGyk', function=Function(arguments='{\n  "code": "import numpy as np \\n\\ndef act(observation): \\n    \'\'\' \\n    The function that codifies the action to be taken in each instant of time. \\n\\n    Args: \\n        observation (numpy.array): \\n            \\"description\\": \\"The state of the environment after the action is taken.\\", \\n            \\"positions\\": {   \\n                \\"0\\": \\"X position\\", \\n                \\"1\\": \\"Y position\\", \\n                \\"2\\": \\"X velocity\\", \\n                \\"3\\": \\"Y velocity\\", \\n                \\"4\\": \\"Angle\\", \\n                \\"5\\": \\"Angular velocity\\", \\n                \\"6\\": \\"Left contact sensor\\", \\n                \\"7\\": \\"Right contact sensor\\" \\n            }, \\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0], \\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 

  logger.warn(


Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



                                                                

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 481. Tamaño de logs: 241
episode  1 score 186.358 avg score 0.388


2024-06-12 18:43:44,019 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_heyvp3Z8arDeUgDRqtZxMmJV/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-12 18:43:44,023 - INFO - Compilación exitosa.
2024-06-12 18:43:44,024 - INFO - Status: requires_action
2024-06-12 18:43:44,245 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_heyvp3Z8arDeUgDRqtZxMmJV "HTTP/1.1 200 OK"
2024-06-12 18:44:04,250 - INFO - Status: in_progress
2024-06-12 18:44:04,489 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_heyvp3Z8arDeUgDRqtZxMmJV "HTTP/1.1 200 OK"
2024-06-12 18:44:24,492 - INFO - Status: in_progress
2024-06-12 18:44:24,777 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_heyvp3Z8arDeUgDRqtZxMmJV "HTTP/1.1 200 OK"
2024-06-12 18:44:44,782 - INFO - These are the logs generated in the last execution{"l

Tool call: [RequiredActionFunctionToolCall(id='call_fhMU6TjiyH115Qhwfe1JeiPG', function=Function(arguments='{\n  "code": "import numpy as np\\n\\ndef act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Ret

  logger.warn(


Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4



                                                                

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 481. Tamaño de logs: 241
episode  1 score 186.358 avg score 0.388


2024-06-12 18:45:51,069 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_vuAi3hwPPf05kfkHWSoSC12v/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-12 18:45:51,072 - INFO - Compilación exitosa.
2024-06-12 18:45:51,074 - INFO - Status: requires_action
2024-06-12 18:45:51,291 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_vuAi3hwPPf05kfkHWSoSC12v "HTTP/1.1 200 OK"
2024-06-12 18:46:11,296 - INFO - Status: in_progress
2024-06-12 18:46:11,598 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_vuAi3hwPPf05kfkHWSoSC12v "HTTP/1.1 200 OK"
2024-06-12 18:46:31,603 - INFO - Status: in_progress
2024-06-12 18:46:31,929 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL/runs/run_vuAi3hwPPf05kfkHWSoSC12v "HTTP/1.1 200 OK"
2024-06-12 18:46:51,934 - INFO - These are the logs generated in the last execution{"l

user: [TextContentBlock(text=Text(annotations=[], value='These are the logs generated in the last execution{"landing attempt": 1, "logs": ["{\\"instant\\": 0, \\"action\\": 1, \\"current status\\": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], \\"score\\": 1.387, \\"completed\\": false}", "{\\"instant\\": 2, \\"action\\": 1, \\"current status\\": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], \\"score\\": 1.473, \\"completed\\": false}", "{\\"instant\\": 4, \\"action\\": 1, \\"current status\\": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], \\"score\\": 1.038, \\"completed\\": false}", "{\\"instant\\": 6, \\"action\\": 1, \\"current status\\": [0.028, 1.422, 0.332, -0.03, 0.012, 0.11, 0.0, 0.0], \\"score\\": 0.442, \\"completed\\": false}", "{\\"instant\\": 8, \\"action\\": 1, \\"current status\\": [0.035, 1.419, 0.329, -0.083, 0.022, 0.119, 0.0, 0.0], \\"score\\": 0.066, \\"completed\\": false}", "{\\"instant\\": 10, \\"action\\": 1, \\"current status\\": [0.04

2024-06-12 18:46:53,385 - INFO - HTTP Request: DELETE https://api.openai.com/v1/assistants/asst_I930QBrx2hc2ctc7nqBHOjtn "HTTP/1.1 200 OK"
2024-06-12 18:46:53,601 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_5MmuoXtkwmZF3qDOCfD7gyrL "HTTP/1.1 200 OK"
2024-06-12 18:46:53,602 - INFO - 
Ejecución finalizada.




Asistente vaciado.
Run(id='run_vuAi3hwPPf05kfkHWSoSC12v', assistant_id='asst_I930QBrx2hc2ctc7nqBHOjtn', cancelled_at=None, completed_at=1718210786, created_at=1718210682, expires_at=None, failed_at=None, incomplete_details=None, instructions="Your task is:\n1. Analyze and reason about the results received in the last landing attempts.\n2. Your goal is to be able to make the appropriate decision based on the results of previous iterations. You must code the decision making based on your reasoning in a Python function.\n3. Use these tips to get a good landing:\n    - First you have to stabilize the falling ship and then land gently.\n    - Use all observations when deciding what action to take at any given moment. Both position and velocities must be taken into account to know how the ship is doing and towards which states it is heading. All of these must be considered to achieve stability.\n    - Learn how actions taken affect the future states of the spacecraft in the logs of past even

## Tasks

* Incluir guardado de vídeo.
* Mejorar el prompt inicial.
* Incluir ejemplos positivos.
* Incluir cambios en el código con GIT.