#### Imports

In [1]:
import time
from openai import OpenAI
from Assistant import AssistantOpenAI
import logging
import os
import json
import gymnasium as gym
import random
import matplotlib.pyplot as plt
import os
import time
from git import Repo

import numpy as np
#from collections import deque, namedtuple

# For visualization
import gymnasium.wrappers.record_video as record_video

#### Parameters

In [2]:
num_episodes = 200
ENV_NAME = 'LunarLander-v2'
ARCLABKEY_OPENAI = "sk-proj-DvHDR3hpgbm2r3kCA9jKT3BlbkFJL57ABXkfaWAIYKBxdhM6"
ARCLABKEY_OPENAI = "sk-proj-GvaDXazpibWA2M1I5Pu2T3BlbkFJxDuKlr9AcoVG98ctJZ7Q"

#### Function calling.

Función encargada de almacenar el código generado mediante la opción de function calling del asistente de OpenAI.

In [3]:
def store_code_in_file(code, filename):
    """ Store code in a file
    
    Args:
        code: str: code to store
        filename: str: filename to store code in
    
    Returns:
        None
    """
    with open(filename, 'w') as f:
        f.write(code)
        
store_code_in_file_schema = {
    "name": "store_code_in_file",
    "description": "Store code in a file",
    "parameters": {
        "type": "object",
        "properties": {
            "code": {
                "type": "string",
                "description": "The Python code to store."
            },
            "filename": {
                "type": "string",
                "description": "The filename to store the code in."
            }
        },
        "required": ["code", "filename"]
    }
}

TOOLS = [{"type": "function", "function": store_code_in_file_schema}]

### Entorno Lunnar Lander

In [4]:
env = gym.make('LunarLander-v2')
print('State shape: ', env.observation_space.shape)
print('Number of actions: ', env.action_space.n)

State shape:  (8,)
Number of actions:  4


In [5]:
agent_initial_code = """
import random

def act(observation):
    '''
    The function that codifies the action to be taken in each instant of time.

    Args:
        observation (numpy.array):
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": "X position",
                "1": "Y position",
                "2": "X velocity",
                "3": "Y velocity",
                "4": "Angle",
                "5": "Angular velocity",
                "6": "Left contact sensor",
                "7": "Right contact sensor"
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]

    Returns:
        Integer  : The action to be taken.
        "options": {
                '0' : "Switch off engines",
                '1' : "Push left engine",
                '2' : "Push both engines (upwards)",
                '3' : "Push right engine"
            }
    '''
    return random.randint(0, 3)
"""

#### Mapeador de logs a JSON

In [6]:
log_description = {
    "description": "Log data for each step of the spacecraft landing environment.",
    "landing attempt": {
        "type": "integer",
        "description": "The episode number."
    },
    "logs": {
        "time": {
            "type": "integer",
            "description": "The instant within the landing attempt where the current log is taken."
        },
        "action": {
            "type": "integer",
            "description": "The possible actions taken.",
            "options": {
                '0' : "Switch off engines",
                
                '1' : "Push left engine",
                '2' : "Push both engines (upwards)",
                '3' : "Push right engine"
            }
        },  
        "current status": {
            "type": "array",
            "description": "The state of the environment after the action is taken.",
            "positions": {  
                "0": {
                    "name": "X position",
                    "description": "The horizontal position of the spacecraft relative to the landing zone."
                },
                "1": {
                    "name": "Y position",
                    "description": "The vertical position of the spacecraft relative to the landing zone."
                },
                "2": {
                    "name": "X velocity",
                    "description": "The horizontal velocity of the spacecraft."
                },
                "3": {
                    "name": "Y velocity",
                    "description": "The vertical velocity of the spacecraft."
                },
                "4": {
                    "name": "Angle",
                    "description": "The angle of the spacecraft relative to the vertical."
                },
                "5": {
                    "name": "Angular velocity",
                    "description": "The rate of change of the angle of the spacecraft."
                },
                "6": {
                    "name": "Left contact sensor with landing zone",
                    "description": "Indicates whether the left side of the spacecraft is in contact with the landing zone."
                },
                "7": {
                    "name": "Right contact sensor with landing zone",
                    "description": "Indicates whether the right side of the spacecraft is in contact with the landing zone."
                }
            },
            "min_values": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],
            "max_values": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1],
            
        }, 
        "score": {
            "type": "number",
            "description": "The score received for the action."
        },  
        "completed": {
            "type": "boolean",
            "description": "Whether the landing event has ended (landing or accident)."
        }
    },
    "total score": {
        "type": "number",
        "description": "The total score received for the landing attempt."
    }
}

### UTILS

In [7]:
import json

def format_step_data(instant, action, next_state, reward, terminated):
    """ Format the step data into a JSON string
    
    Args:
        instant: int: the instant within the landing attempt where the current log is taken
        action: int: the action taken
        next_state: np.array: the next state of the environment
        reward: float: the reward received
        terminated: bool: whether the landing event has ended
        
    Returns:
        str: the step data formatted as a JSON string
    """
    # Convertir el array numpy a una lista
    next_state_list = next_state.tolist()

    # Redondear los elementos de la lista a 4 decimales
    next_state_list_rounded = [round(x, 3) for x in next_state_list]

    step_data = {
        'time': instant,
        'action' : int(action),
        'current status': next_state_list_rounded,  # Convert numpy array to list
        'score': round(reward, 3),
        'completed': terminated,
        #'truncated': truncated
        #'info': info
    }

    # Convert the dictionary to a JSON string
    step_data_json = json.dumps(step_data)

    return step_data_json


def format_episode_logs(logs, episode, total_score):
    """ Format the logs into a JSON string
    
    Args:
        logs: list: the logs for each step of the environment
        episode: int: the episode number
        
    Returns:
        str: the logs formatted as a JSON string
    """
    logs_data = {
        'landing attempt': episode,
        'logs': logs,
        'total score': total_score
    }

    # Convert the dictionary to a JSON string
    logs_json = json.dumps(logs_data)

    return logs_json

In [8]:
def commit_changes(repo_path, commit_message):
    """ Commit changes to the repository.

    Args:
        repo_path (str): Path to the repository.
        commit_message (str): The commit message.
    """
    repo = Repo(repo_path)
    repo.git.add(update=True)
    repo.index.commit(commit_message)
    origin = repo.remote(name='origin')
    origin.push()

#### Código encargado de ejecutar los eventos en el entorno.

In [9]:
import Action

def lunar_lander(max_t=1000, n_games=1, display=False, seed=38, agent=None, recoder=False, video_filename='video/video.mp4'):
    """ Run the Lunar Lander environment
    
    Args:
        max_t: int: the maximum number of timesteps
        n_games: int: the number of games to play
        display: bool: whether to display the environment
        seed: int: the seed for the environment
        agent: object: the agent to use
        recoder: bool: whether to record the video
        video_filename: str: the filename for the video
        
    Returns:
        str: the logs for the landing attempts
    """
    
    # Incluimos la opción del entorno gráfico y la de grabación.
    if display:
        env = gym.make(ENV_NAME, render_mode='human')
        if recoder:
            env = gym.make(ENV_NAME, render_mode='rgb_array')
            env.reset() 
            video_recorder = record_video.RecordVideo(env, video_filename)
    else:
        env = gym.make(ENV_NAME)

    # Bucle principal de ejecución de los episodios.
    logs = []
    for episode in range(1, n_games+1):
        # Revisamos que la semilla sea un número entero o una lista de semillas.
        if isinstance(seed, list):
            semilla = seed[episode-1]
        else:
            semilla = seed
        print(f"Semilla {semilla}")    
        state = env.reset(seed=semilla) # Set a seed for the environment
        state = state[0] # Eliminamos el diccionario vacio y dejamos unicamente el estado de 8 elementos.
        score = 0
        instant = 0
        episode_actions = []
        if recoder: # En caso de que se quiera grabar el video, se inicia el grabador.
            video_recorder.start_video_recorder()
            
        for i in range(max_t): # Bucle de ejecución de los instantes de tiempo.
            
            # Seleccionamos el método de elegir la acción (agente exitoso o método de decisión del asistente)
            if agent: 
                action = agent.act(state)
            else:
                action = Action.act(state)
            
            # Avanzamos un instante de tiempo en el entorno en función de si se quiere grabar el video o no.
            if recoder:
                next_state, reward, terminated, truncated, info = video_recorder.step(action)
            else:
                next_state, reward, terminated, truncated, info = env.step(action)
            
            # Formateamos los datos del instante de tiempo y los alcenamos.
            json_logs = format_step_data(instant, action, next_state, reward, terminated)
            score += reward
            instant += 1
            if len(episode_actions) == 0 or len(episode_actions) % 2 == 0 or terminated:
                logs.append(json_logs)
            episode_actions.append(action)
            state = next_state
            if terminated: # Condición de salida del bucle, si el episodio ha terminado.
                break
        json_episode_logs = format_episode_logs(logs, episode, score) 
        
        print(f"Número de instantes: {instant+1}. Tamaño de logs: {len(logs)}")
        print('episode ', episode, 'score %.3f' % float(score), 'avg score %.3f' % (float(score) / instant))
        
    if recoder:
        video_recorder.close()
    else:
        env.close()
        
    return json_episode_logs

#### Logs del código inicial

In [13]:
initial_logs = lunar_lander(n_games=1, display=True)
initial_logs

Semilla 38
Número de instantes: 78. Tamaño de logs: 39
episode  1 score -116.799 avg score -1.517


'{"landing attempt": 1, "logs": ["{\\"time\\": 0, \\"action\\": 3, \\"current status\\": [0.008, 1.417, 0.39, 0.131, -0.01, -0.123, 0.0, 0.0], \\"score\\": -0.871, \\"completed\\": false}", "{\\"time\\": 2, \\"action\\": 3, \\"current status\\": [0.016, 1.421, 0.412, 0.077, -0.03, -0.211, 0.0, 0.0], \\"score\\": -1.707, \\"completed\\": false}", "{\\"time\\": 4, \\"action\\": 3, \\"current status\\": [0.024, 1.423, 0.431, 0.023, -0.056, -0.286, 0.0, 0.0], \\"score\\": -2.308, \\"completed\\": false}", "{\\"time\\": 6, \\"action\\": 3, \\"current status\\": [0.032, 1.422, 0.45, -0.032, -0.091, -0.364, 0.0, 0.0], \\"score\\": -2.782, \\"completed\\": false}", "{\\"time\\": 8, \\"action\\": 1, \\"current status\\": [0.041, 1.419, 0.448, -0.085, -0.128, -0.356, 0.0, 0.0], \\"score\\": -0.974, \\"completed\\": false}", "{\\"time\\": 10, \\"action\\": 1, \\"current status\\": [0.049, 1.414, 0.43, -0.137, -0.158, -0.283, 0.0, 0.0], \\"score\\": -1.074, \\"completed\\": false}", "{\\"time\\": 

#### Logs exitosos
Utilizar otras semillas para que no memorice el entorno

In [18]:
import DuelingDQN

agent = DuelingDQN.Agent(num_observaciones=8, num_acciones=4, red_modelo=DuelingDQN.DuelingQNetwork, seed=0)
# Cargamos los pesos del agente entrenado.
agent.load_weights('checkpoint_Dueling.pth')

seeds = [130, 412]

success_logs = lunar_lander(n_games=len(seeds), display=True, seed=seeds, agent=agent)
success_logs

Semilla 130
Número de instantes: 242. Tamaño de logs: 121
episode  1 score 263.217 avg score 1.092
Semilla 412
Número de instantes: 242. Tamaño de logs: 242
episode  2 score 266.713 avg score 1.107


'{"landing attempt": 2, "logs": ["{\\"time\\": 0, \\"action\\": 1, \\"current status\\": [0.006, 1.393, 0.31, -0.402, -0.005, -0.029, 0.0, 0.0], \\"score\\": -0.567, \\"completed\\": false}", "{\\"time\\": 2, \\"action\\": 1, \\"current status\\": [0.012, 1.373, 0.291, -0.456, -0.003, 0.045, 0.0, 0.0], \\"score\\": -0.468, \\"completed\\": false}", "{\\"time\\": 4, \\"action\\": 1, \\"current status\\": [0.018, 1.351, 0.291, -0.509, -0.0, 0.045, 0.0, 0.0], \\"score\\": -0.367, \\"completed\\": false}", "{\\"time\\": 6, \\"action\\": 3, \\"current status\\": [0.024, 1.326, 0.308, -0.563, -0.001, -0.022, 0.0, 0.0], \\"score\\": -1.579, \\"completed\\": false}", "{\\"time\\": 8, \\"action\\": 2, \\"current status\\": [0.03, 1.3, 0.291, -0.582, -0.0, 0.001, 0.0, 0.0], \\"score\\": 2.115, \\"completed\\": false}", "{\\"time\\": 10, \\"action\\": 2, \\"current status\\": [0.036, 1.276, 0.3, -0.527, 0.001, 0.009, 0.0, 0.0], \\"score\\": 3.931, \\"completed\\": false}", "{\\"time\\": 12, \\"ac

#### Bucle iterativo

Sección principal del código encargada de conectar con el asistente de la API de OpenAI e iterar en la generación de código nuevo a partir de los registros del generado previamente.

In [19]:
import traceback, importlib
import Action

Action = importlib.reload(Action)

def create_and_run_llm_loop(Incial_msg, logger, model="gpt-3.5-turbo", num_iterations=10):
    
    agente = AssistantOpenAI(ARCLABKEY_OPENAI)

    # Crea un asistente
    asistente = agente.create_assistant(model=model, description=DESCRIPTION, instructions=INSTRUCTIONS, name=NAME, tools=TOOLS)
    
    # Crea un hilo
    hilo = agente.create_thread()
    
    # Añade un mensaje inicial al hilo.
    msg = agente.add_message(hilo.id, role="user", content=Incial_msg)

    # Bucle de aprendizaje del asistente.
    for i in range(num_iterations):
        
        logger.info(f"Iteration: {i+1}")
        compiled = False

        # Si en la presente iteración no se ha compilado el código, se pide al asistente corregir los errores antes de iterar.
        while not compiled:
            # Ejecuta el hilo
            ejecucion = agente.run(hilo.id, asistente.id)
            response = agente.get_run(ejecucion.id, hilo.id)

            # Esperamos a que la respuesta esté lista.
            while response.status not in ["completed", "failed", "requires_action"]:
                logger.info(f"Status: {ejecucion.status}")
                response = agente.get_run(ejecucion.id, hilo.id)
                time.sleep(20)

            # Descomponemos los elementos de la respuesta.
            logger.info(f"Status: {response.status}")
            if response.status == "completed":
                return response
            tool_call = response.required_action.submit_tool_outputs.tool_calls
            print(f"Tool call: {tool_call}")

            # Convierte el string a un diccionario
            code_dict = json.loads(tool_call[0].function.arguments)
            logger.info(f"Arguments: {code_dict}")
            
            # Obtén el código Python de la llamada a la herramienta
            code = code_dict["code"]
            filename = code_dict["filename"]  

            logger.info(f"\nCodigo generado:\n{code}")

            # Ejecuta el código Python
            try:
                store_code_in_file(code, filename)
                time.sleep(1) # Pequeño retraso para que el sistema operativo pueda reflejar los cambios en el archivo
                
                # Hacemos commit de los cambios en el repositorio para analizar las modificaciones del agente.
                commit_changes(r"C:\Users\adria\Repos\TFM__LLM_landing_self-refinement", f"Nuevo prompt. Iteración {i+1}.") 
                
                importlib.reload(Action) # Recargamos el módulo de acciones para que se actualice con las modificaciones del agente.

                # Ejecutamos el código generado.
                logs = lunar_lander(n_games=1, display=True, recoder=True, video_filename=f"video/iteration_{i+1}.mp4")
                
                # Devolvemos la respuesta al asistente.
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "Run successful."}])            
                compiled = True
                logger.info(f"Compilación exitosa.")
                
                # Esperamos a que el agente esté listo para recibir mensajes y le añadimos el resultado de la iteración.
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(response.id, hilo.id)
                    time.sleep(20)
                
                msg = f"""These are the logs generated by your last code: {logs}. Analyze the performance of the spacecraft and how it differs from the desired result. Be guided by the scoring system. Identify the cause of errors in your code and modify it without fear of making major changes. Think deeply about the priorities of your code and how to order and combine them correctly to achieve success."""
                logger.info(msg)    
                agente.add_message(hilo.id, role="user", content=msg)
            
            # Alimentamos el asistente con el error generado en la ejecución del código.    
            except Exception as e:
                logger.exception("Error: %s", e)
                error_trace = traceback.format_exc()
                for call in tool_call:
                    agente.devolver_respuesta(response.id, hilo.id, tool_outputs=[{"tool_call_id": call.id, "output": "ERROR."}]) 
                logger.error(f"Error: {e}.")
                while response.status not in ["completed", "failed", "expired"]:
                    logger.info(f"Status: {response.status}")
                    response = agente.get_run(ejecucion.id, hilo.id)
                    time.sleep(30)
                msg = f"The code generated has an error. Please, try again. Error: {e}. Trace: {error_trace}"   
                logger.error(msg)   
                agente.add_message(hilo.id, role="assistant", content=msg)
    
    agente.mostrar_mensajes(hilo.id)  
    vaciar_agente(agente)
    
    logger.info("\nEjecución finalizada.\n\n")       

    return response


def vaciar_agente(agente):
    for assistant in agente.assistants:
        agente.delete_assistant(assistant)

    for thread in agente.threads:
        agente.delete_thread(thread)
        
    print("Asistente vaciado.")


#### Logger

In [20]:
def configura_log(nombre_archivo):
    """ 
    Configura el logger para que guarde los logs en un archivo y los muestre en la consola.
    
    Args:
        nombre_archivo: str Nombre del archivo donde se guardarán los logs.
        
    Returns:
        logger: logging.Logger Objeto logger configurado.
    """
    # Crear la carpeta logs si no existe
    if not os.path.exists('logs'):
        os.makedirs('logs')
    # Configura el logger
    logging.basicConfig(filename=f'logs/{nombre_archivo}', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger()
    
    # Añade un StreamHandler para mostrar los logs en la consola
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)
    
    return logger

### Ejecución con el asistente

Prompt inicial y mensajes del sistema para el asistente.

In [14]:
DESCRIPTION = "You are an expert programer in Pyhton. Your specialty is to generate the code responsible for making decisions about actions to be taken in various spacecraft landing environments.The objective is to land the spacecraft within a target zone in the shortest possible time and very gently. A scoring system is used to evaluate the landings, which must be maximized."
INSTRUCTIONS = f"""Your task is:
1. Analyze and reason about the logs received in the last landing attempts.
2. Your goal is to be able to make the correct decision based on what you have learned from the results of previous iterations. You must code the decision making based on your reasoning in a Python function.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (angle and location), keep falling under control and then land gently at the end.
    - It is mandatory to use all the elements of the array of observations received by parameter in your code when deciding what action to take at any given moment. Both position and velocities must be taken into account to know how the ship is doing and towards which states it is heading. All of these must be considered to achieve stability.
    - Learn how actions taken affect the future states of the spacecraft in the logs of past events so that you can take this into account when developing code to reach the landing zone.
    - The landing zone is in the central area of the x-axis.
    - Carefully choose the actions involving the lateral engines according to the rotation of the spacecraft.
4. You should analyze the performance that appear in the logs of the code you have generated. You should improve the code generated in the 'act' function in the last iteration without fear of making major changes, seeking to maximize the score received and generate a higher quality code.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. You have to be able to learn from it to land successfully with any other conditions. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}.Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_betterprompt.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-06-14 18:53:13,248 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-06-14 18:53:13,551 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-06-14 18:53:13,860 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/messages "HTTP/1.1 200 OK"
2024-06-14 18:53:13,981 - INFO - Iteration: 1
2024-06-14 18:53:14,396 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs "HTTP/1.1 200 OK"
2024-06-14 18:53:14,679 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_2RsqPE6OCJJ0Nr6JJ9Zuhcx2 "HTTP/1.1 200 OK"
2024-06-14 18:53:14,683 - INFO - Status: queued
2024-06-14 18:53:14,959 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_2RsqPE6OCJJ0Nr6JJ9Zuhcx2 "HTTP/1.1 200 OK"
2024-06-14 18:53:34,966 - INFO - Status: queued
2024-06-14 18:53:35,363

Tool call: [RequiredActionFunctionToolCall(id='call_E2RJKgSSV9UiguWfORbHo4lk', function=Function(arguments='{"code": "def act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:\\n        Integer  : T

  logger.warn(


Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 141. Tamaño de logs: 71
episode  1 score -25.140 avg score -0.180


2024-06-14 18:54:00,651 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_2RsqPE6OCJJ0Nr6JJ9Zuhcx2/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-14 18:54:00,657 - INFO - Compilación exitosa.
2024-06-14 18:54:00,659 - INFO - Status: requires_action
2024-06-14 18:54:00,969 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_2RsqPE6OCJJ0Nr6JJ9Zuhcx2 "HTTP/1.1 200 OK"
2024-06-14 18:54:20,973 - INFO - Status: in_progress
2024-06-14 18:54:21,343 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_2RsqPE6OCJJ0Nr6JJ9Zuhcx2 "HTTP/1.1 200 OK"
2024-06-14 18:54:41,349 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": ["{\"instant\": 0, \"action\": 0, \"current status\": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], \"score\": 0.232, \"completed\": false}", "{\"instant\": 2, \"action\": 0, \"current st

Tool call: [RequiredActionFunctionToolCall(id='call_cWammYO8OV37kBKNB0MY7Syx', function=Function(arguments='{"code":"def act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:\\n        Integer  : Th

  logger.warn(


Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 154. Tamaño de logs: 77
episode  1 score -15.174 avg score -0.099


2024-06-14 18:55:28,210 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_YKOvfwJVt5KD8FVCuXYAT2c1/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-14 18:55:28,215 - INFO - Compilación exitosa.
2024-06-14 18:55:28,217 - INFO - Status: requires_action
2024-06-14 18:55:28,516 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_YKOvfwJVt5KD8FVCuXYAT2c1 "HTTP/1.1 200 OK"
2024-06-14 18:55:48,522 - INFO - Status: in_progress
2024-06-14 18:55:48,895 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_YKOvfwJVt5KD8FVCuXYAT2c1 "HTTP/1.1 200 OK"
2024-06-14 18:56:08,900 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": ["{\"instant\": 0, \"action\": 0, \"current status\": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], \"score\": 0.232, \"completed\": false}", "{\"instant\": 2, \"action\": 0, \"current st

Tool call: [RequiredActionFunctionToolCall(id='call_oXGd0cXXIdkYNOebWg8fnkwp', function=Function(arguments='{"code":"def act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:\\n        Integer  : Th

  logger.warn(


Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 215. Tamaño de logs: 108
episode  1 score -46.319 avg score -0.216


2024-06-14 18:56:56,992 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_Dhus7fwLTEl5ObcyJyutPEM6/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-14 18:56:56,996 - INFO - Compilación exitosa.
2024-06-14 18:56:56,997 - INFO - Status: requires_action
2024-06-14 18:56:57,298 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_Dhus7fwLTEl5ObcyJyutPEM6 "HTTP/1.1 200 OK"
2024-06-14 18:57:17,303 - INFO - Status: in_progress
2024-06-14 18:57:17,676 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_Dhus7fwLTEl5ObcyJyutPEM6 "HTTP/1.1 200 OK"
2024-06-14 18:57:37,681 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": ["{\"instant\": 0, \"action\": 0, \"current status\": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], \"score\": 0.232, \"completed\": false}", "{\"instant\": 2, \"action\": 0, \"current st

Tool call: [RequiredActionFunctionToolCall(id='call_Jocr3dYwOc5GZwK9Khj2BQAg', function=Function(arguments='{"code":"def act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:\\n        Integer  : Th

  logger.warn(


Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 203. Tamaño de logs: 102
episode  1 score -44.060 avg score -0.218


2024-06-14 18:58:25,367 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_bQkWf8ZOlKoi8hUCtWsh8pRi/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-14 18:58:25,370 - INFO - Compilación exitosa.
2024-06-14 18:58:25,371 - INFO - Status: requires_action
2024-06-14 18:58:25,669 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_bQkWf8ZOlKoi8hUCtWsh8pRi "HTTP/1.1 200 OK"
2024-06-14 18:58:45,674 - INFO - Status: in_progress
2024-06-14 18:58:46,046 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_bQkWf8ZOlKoi8hUCtWsh8pRi "HTTP/1.1 200 OK"
2024-06-14 18:59:06,051 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": ["{\"instant\": 0, \"action\": 0, \"current status\": [0.008, 1.417, 0.38, 0.131, -0.009, -0.085, 0.0, 0.0], \"score\": 0.232, \"completed\": false}", "{\"instant\": 2, \"action\": 0, \"current st

Tool call: [RequiredActionFunctionToolCall(id='call_5eS5zUeh3eryqHFlhyd0Ohmj', function=Function(arguments='{"code":"def act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:\\n        Integer  : Th

  logger.warn(


Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 318. Tamaño de logs: 159
episode  1 score -108.456 avg score -0.342


2024-06-14 18:59:55,215 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_qIUa5OlXlo7N6Xn1yuI4jWOB/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-14 18:59:55,219 - INFO - Compilación exitosa.
2024-06-14 18:59:55,221 - INFO - Status: requires_action
2024-06-14 18:59:55,474 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_qIUa5OlXlo7N6Xn1yuI4jWOB "HTTP/1.1 200 OK"
2024-06-14 19:00:15,481 - INFO - Status: in_progress
2024-06-14 19:00:15,772 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC/runs/run_qIUa5OlXlo7N6Xn1yuI4jWOB "HTTP/1.1 200 OK"
2024-06-14 19:00:35,785 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": ["{\"instant\": 0, \"action\": 1, \"current status\": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], \"score\": 1.387, \"completed\": false}", "{\"instant\": 2, \"action\": 0, \"current st

user: [TextContentBlock(text=Text(annotations=[], value='These are the logs generated by your last code: {"landing attempt": 1, "logs": ["{\\"instant\\": 0, \\"action\\": 1, \\"current status\\": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], \\"score\\": 1.387, \\"completed\\": false}", "{\\"instant\\": 2, \\"action\\": 0, \\"current status\\": [0.015, 1.421, 0.37, 0.077, -0.011, -0.044, 0.0, 0.0], \\"score\\": 0.237, \\"completed\\": false}", "{\\"instant\\": 4, \\"action\\": 0, \\"current status\\": [0.022, 1.423, 0.37, 0.024, -0.015, -0.044, 0.0, 0.0], \\"score\\": -0.012, \\"completed\\": false}", "{\\"instant\\": 6, \\"action\\": 0, \\"current status\\": [0.03, 1.422, 0.37, -0.029, -0.02, -0.044, 0.0, 0.0], \\"score\\": -0.276, \\"completed\\": false}", "{\\"instant\\": 8, \\"action\\": 0, \\"current status\\": [0.037, 1.419, 0.37, -0.083, -0.024, -0.044, 0.0, 0.0], \\"score\\": -0.534, \\"completed\\": false}", "{\\"instant\\": 10, \\"action\\": 0, \\"current status\\": 

2024-06-14 19:00:37,150 - INFO - HTTP Request: DELETE https://api.openai.com/v1/assistants/asst_PYZJAwtwCaHOqgCRaqe86KlD "HTTP/1.1 200 OK"
2024-06-14 19:00:37,458 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_1e4PP44mCYLDwQguTEhGiInC "HTTP/1.1 200 OK"
2024-06-14 19:00:37,462 - INFO - 
Ejecución finalizada.




Asistente vaciado.
Run(id='run_qIUa5OlXlo7N6Xn1yuI4jWOB', assistant_id='asst_PYZJAwtwCaHOqgCRaqe86KlD', cancelled_at=None, completed_at=1718384405, created_at=1718384342, expires_at=None, failed_at=None, incomplete_details=None, instructions="Your task is:\n1. Analyze and reason about the logs received in the last landing attempts.\n2. Your goal is to be able to make the correct decision based on what you have learned from the results of previous iterations. You must code the decision making based on your reasoning in a Python function.\n3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:\n    - First you have to stabilize the falling ship (angle and location), keep falling under control and then land gently at the end.\n    - It is mandatory to use all the elements of the array of observations received by parameter in your code when deciding what action to take at any given moment. Both position and velocities must be taken into account to know how 

#### Prueba de cambio grande en el prompt.

In [21]:
DESCRIPTION = "You are an expert spacecraft landing agent. Your specialty is to take the appropriate action at each instant of time based on the environment and state of the spacecraft. To achieve this, you put your knowledge base and the knowledge you acquire by analyzing each execution into a python function in charge of directing the landing."
INSTRUCTIONS = f"""To complete the task you must follow the following steps and indications:
1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.
2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:
    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).
    - Increased/decreased the slower/faster the spacecraft is moving.
    - Decreased the more the spacecraft is tilted.
    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.
    - Receive 100 points for a successful landing and lose them for crashing.
3. IMPORTANT. Use the following tips in your reasoning to achieve a successful landing:
    - First you have to stabilize the falling ship (angle, speed and location), keep falling under control and then land gently at the end.
    - Use all the elements of the vector of observations, all are relevant to make the right decision.
    - Environment and ship conditions may change but your code must be effective for all cases.
    - Find a balance in your landing policy to maximize your score (must exceed 200 points).
4. Add your decision code to the 'act' method and locate errors in it if the landing is unsuccessful.
5. Save the code of the act function in the file 'Action.py' using store_code_in_file function.
"""
# 6. Improve your results and correct also any programming error you may have generated in your last code if they exist.
NAME = "Spacecraft Landing Master"

initial_msg = f"This is the record of an example of a successful landing in this environment, but under other conditions: {success_logs}. You have to be able to learn from it to land successfully with any other conditions. This is the code of the initial function: {agent_initial_code} and these are the execution logs of one landing attempt: {initial_logs}.Take a deep breath and reason step-by-step. After reasoning analyze the results, learn and make better code."

logger = configura_log('Spacecraft_4o_NuevoPrompt.log')
response = create_and_run_llm_loop(initial_msg, logger, model="gpt-4o", num_iterations=5)

print(response)

2024-06-18 14:52:44,360 - INFO - HTTP Request: POST https://api.openai.com/v1/assistants "HTTP/1.1 200 OK"
2024-06-18 14:52:44,575 - INFO - HTTP Request: POST https://api.openai.com/v1/threads "HTTP/1.1 200 OK"
2024-06-18 14:52:44,864 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/messages "HTTP/1.1 200 OK"
2024-06-18 14:52:45,123 - INFO - Iteration: 1
2024-06-18 14:52:45,541 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs "HTTP/1.1 200 OK"
2024-06-18 14:52:45,791 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_N7tHCHgszoiazq5XcqbeSyEp "HTTP/1.1 200 OK"
2024-06-18 14:52:45,792 - INFO - Status: queued
2024-06-18 14:52:46,047 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_N7tHCHgszoiazq5XcqbeSyEp "HTTP/1.1 200 OK"
2024-06-18 14:53:06,052 - INFO - Status: queued
2024-06-18 14:53:06,448

Tool call: [RequiredActionFunctionToolCall(id='call_sIOOfk2F40FBSUg260Ppbbk5', function=Function(arguments='{"code":"def act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:\\n        Integer  : Th

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_1.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 37
episode  1 score -612.950 avg score -8.513


2024-06-18 14:53:30,045 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_N7tHCHgszoiazq5XcqbeSyEp/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-18 14:53:30,047 - INFO - Compilación exitosa.
2024-06-18 14:53:30,047 - INFO - Status: requires_action
2024-06-18 14:53:30,296 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_N7tHCHgszoiazq5XcqbeSyEp "HTTP/1.1 200 OK"
2024-06-18 14:53:50,298 - INFO - Status: in_progress
2024-06-18 14:53:50,562 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_N7tHCHgszoiazq5XcqbeSyEp "HTTP/1.1 200 OK"
2024-06-18 14:54:10,566 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": ["{\"time\": 0, \"action\": 1, \"current status\": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], \"score\": 1.387, \"completed\": false}", "{\"time\": 2, \"action\": 1, \"current status\"

Tool call: [RequiredActionFunctionToolCall(id='call_Q1w7kzBnN6NghtIrQA4rXOvc', function=Function(arguments='{"code":"def act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:\\n        Integer  : Th

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_2.mp4\rl-video-episode-0.mp4
Número de instantes: 80. Tamaño de logs: 40
episode  1 score -267.002 avg score -3.380


2024-06-18 14:54:55,628 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_tDewnALqH8lZdS4FBZQtK0Ib/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-18 14:54:55,631 - INFO - Compilación exitosa.
2024-06-18 14:54:55,632 - INFO - Status: requires_action
2024-06-18 14:54:55,838 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_tDewnALqH8lZdS4FBZQtK0Ib "HTTP/1.1 200 OK"
2024-06-18 14:55:15,842 - INFO - Status: in_progress
2024-06-18 14:55:16,173 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_tDewnALqH8lZdS4FBZQtK0Ib "HTTP/1.1 200 OK"
2024-06-18 14:55:36,178 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": ["{\"time\": 0, \"action\": 1, \"current status\": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], \"score\": 1.387, \"completed\": false}", "{\"time\": 2, \"action\": 1, \"current status\"

Tool call: [RequiredActionFunctionToolCall(id='call_AHOJtnGy84Bh19VDB0O8ZXJt', function=Function(arguments='{"code":"def act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:\\n        Integer  : Th

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_3.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 37
episode  1 score -612.950 avg score -8.513


2024-06-18 14:56:21,650 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_kPm522eA5oVZYowjUsa4xWBy/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-18 14:56:21,652 - INFO - Compilación exitosa.
2024-06-18 14:56:21,652 - INFO - Status: requires_action
2024-06-18 14:56:21,905 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_kPm522eA5oVZYowjUsa4xWBy "HTTP/1.1 200 OK"
2024-06-18 14:56:41,907 - INFO - Status: in_progress
2024-06-18 14:56:42,216 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_kPm522eA5oVZYowjUsa4xWBy "HTTP/1.1 200 OK"
2024-06-18 14:57:02,220 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": ["{\"time\": 0, \"action\": 1, \"current status\": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], \"score\": 1.387, \"completed\": false}", "{\"time\": 2, \"action\": 1, \"current status\"

Tool call: [RequiredActionFunctionToolCall(id='call_oIBfvJEsCkpSOOaHOgPonj3m', function=Function(arguments='{"code":"def act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:\\n        Integer  : Th

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_4.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 37
episode  1 score -612.950 avg score -8.513


2024-06-18 14:57:47,179 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_8O42QO6O1VziCpUc9aYAhxNr/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-18 14:57:47,180 - INFO - Compilación exitosa.
2024-06-18 14:57:47,181 - INFO - Status: requires_action
2024-06-18 14:57:47,395 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_8O42QO6O1VziCpUc9aYAhxNr "HTTP/1.1 200 OK"
2024-06-18 14:58:07,399 - INFO - Status: in_progress
2024-06-18 14:58:07,726 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_8O42QO6O1VziCpUc9aYAhxNr "HTTP/1.1 200 OK"
2024-06-18 14:58:27,729 - INFO - These are the logs generated by your last code: {"landing attempt": 1, "logs": ["{\"time\": 0, \"action\": 1, \"current status\": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], \"score\": 1.387, \"completed\": false}", "{\"time\": 2, \"action\": 1, \"current status\"

Tool call: [RequiredActionFunctionToolCall(id='call_xHyywOpKxaPs6fkgZKFBI8cF', function=Function(arguments='{"code":"def act(observation):\\n    \'\'\'\\n    The function that codifies the action to be taken in each instant of time.\\n\\n    Args:\\n        observation (numpy.array):\\n            \\"description\\": \\"The state of the environment after the action is taken.\\",\\n            \\"positions\\": {  \\n                \\"0\\": \\"X position\\",\\n                \\"1\\": \\"Y position\\",\\n                \\"2\\": \\"X velocity\\",\\n                \\"3\\": \\"Y velocity\\",\\n                \\"4\\": \\"Angle\\",\\n                \\"5\\": \\"Angular velocity\\",\\n                \\"6\\": \\"Left contact sensor\\",\\n                \\"7\\": \\"Right contact sensor\\"\\n            },\\n            \\"min_values\\": [-1.5, -1.5, -5.0, -5.0, -3.14, -5.0, 0, 0],\\n            \\"max_values\\": [1.5, 1.5, 5.0, 5.0, 3.14, 5.0, 1, 1]\\n\\n    Returns:\\n        Integer  : Th

  logger.warn(


Semilla 38
Moviepy - Building video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\adria\Repos\TFM__LLM_landing_self-refinement\video\iteration_5.mp4\rl-video-episode-0.mp4
Número de instantes: 73. Tamaño de logs: 37
episode  1 score -613.255 avg score -8.517


2024-06-18 14:59:32,949 - INFO - HTTP Request: POST https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_Q58lgB9pPlOnfn1rWFtZKVzn/submit_tool_outputs "HTTP/1.1 200 OK"
2024-06-18 14:59:32,952 - INFO - Compilación exitosa.
2024-06-18 14:59:32,953 - INFO - Status: requires_action
2024-06-18 14:59:33,199 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_Q58lgB9pPlOnfn1rWFtZKVzn "HTTP/1.1 200 OK"
2024-06-18 14:59:53,200 - INFO - Status: in_progress
2024-06-18 14:59:53,489 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_Q58lgB9pPlOnfn1rWFtZKVzn "HTTP/1.1 200 OK"
2024-06-18 15:00:13,492 - INFO - Status: in_progress
2024-06-18 15:00:13,790 - INFO - HTTP Request: GET https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd/runs/run_Q58lgB9pPlOnfn1rWFtZKVzn "HTTP/1.1 200 OK"
2024-06-18 15:00:33,793 - INFO - These are the logs generated by your last code: {"lan

user: [TextContentBlock(text=Text(annotations=[], value='These are the logs generated by your last code: {"landing attempt": 1, "logs": ["{\\"time\\": 0, \\"action\\": 1, \\"current status\\": [0.007, 1.417, 0.37, 0.131, -0.007, -0.044, 0.0, 0.0], \\"score\\": 1.387, \\"completed\\": false}", "{\\"time\\": 2, \\"action\\": 1, \\"current status\\": [0.015, 1.421, 0.353, 0.077, -0.006, 0.026, 0.0, 0.0], \\"score\\": 1.473, \\"completed\\": false}", "{\\"time\\": 4, \\"action\\": 1, \\"current status\\": [0.021, 1.423, 0.332, 0.024, 0.003, 0.11, 0.0, 0.0], \\"score\\": 1.038, \\"completed\\": false}", "{\\"time\\": 6, \\"action\\": 1, \\"current status\\": [0.028, 1.422, 0.312, -0.03, 0.02, 0.189, 0.0, 0.0], \\"score\\": 0.039, \\"completed\\": false}", "{\\"time\\": 8, \\"action\\": 1, \\"current status\\": [0.034, 1.419, 0.29, -0.084, 0.046, 0.277, 0.0, 0.0], \\"score\\": -0.807, \\"completed\\": false}", "{\\"time\\": 10, \\"action\\": 1, \\"current status\\": [0.04, 1.414, 0.272, -0.1

2024-06-18 15:00:34,909 - INFO - HTTP Request: DELETE https://api.openai.com/v1/assistants/asst_7talA5FLrzwGWLbSVt9BReLW "HTTP/1.1 200 OK"
2024-06-18 15:00:35,132 - INFO - HTTP Request: DELETE https://api.openai.com/v1/threads/thread_0ivJ9fv4WQAHoR5OQ7OoAUNd "HTTP/1.1 200 OK"
2024-06-18 15:00:35,135 - INFO - 
Ejecución finalizada.




Asistente vaciado.
Run(id='run_Q58lgB9pPlOnfn1rWFtZKVzn', assistant_id='asst_7talA5FLrzwGWLbSVt9BReLW', cancelled_at=None, completed_at=1718715597, created_at=1718715508, expires_at=None, failed_at=None, incomplete_details=None, instructions="To complete the task you must follow the following steps and indications:\n1. Analyze and reason about the records received in the last landing attempts to learn how the spacecraft behaves in the environment. Keep in mind that conditions may vary.\n2. To measure how good a landing is, a scoring system that appears in the logs is used, your role is to maximize it. To do so, keep the following in mind:\n    - Increases/decreases the closer/further the spacecraft is from the landing area (both axes).\n    - Increased/decreased the slower/faster the spacecraft is moving.\n    - Decreased the more the spacecraft is tilted.\n    - Decreased by 0.03 points each frame a side engine is firing and 0.3 points for the center engine.\n    - Receive 100 points 