# Configuración e imports

In [1]:
import sys
import os

In [2]:
# Obtener ruta absoluta del directorio que contiene el notebook
notebook_dir = os.path.dirname(os.getcwd())  # sube un nivel desde /notebook
if notebook_dir not in sys.path:
    sys.path.append(notebook_dir)

In [3]:
import pandas as pd
from environment.sailing_env import SailingEnv
from environment.wind_field import MultiDayWindField
from environment.random_wind_wrapper import RandomizedWindWrapper
from environment.polar_diagram import PolarDiagram
from agents.ddpg import DDPGAgent
from training.train_drl import train_ddpg

2025-06-27 22:20:07.302686: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-27 22:20:07.320199: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751062807.342079 1734888 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751062807.349302 1734888 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1751062807.365563 1734888 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [4]:
import tensorflow as tf
print("Versión de TensorFlow:", tf.__version__)

Versión de TensorFlow: 2.19.0


In [5]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"✅ GPU detectada: {gpus[0].name}")
else:
    print("⚠️ No se ha detectado GPU. El entrenamiento usará CPU.")

✅ GPU detectada: /physical_device:GPU:0


# Carga de datos

In [6]:
# Diagrama polar
polar = PolarDiagram('../../data/polar_diagram.csv')

# Carpeta de rutas CSV del profesor
folder_path = "../../data/expert_trajectories"

In [7]:
# Definir rutas por fecha (ajusta con tus archivos)
csv_paths = {
    "2025-05-04": "../../data/processed/nodes_bathy_wind20250504.csv",
    "2025-05-04": "../../data/processed/nodes_bathy_wind20250507.csv",
    "2025-05-09": "../../data/processed/nodes_bathy_wind20250509.csv",
    "2025-05-11": "../../data/processed/nodes_bathy_wind20250511.csv",
    "2025-05-13": "../../data/processed/nodes_bathy_wind20250513.csv",
    "2025-05-15": "../../data/processed/nodes_bathy_wind20250515.csv",
    "2025-05-17": "../../data/processed/nodes_bathy_wind20250517.csv",
    "2025-05-19": "../../data/processed/nodes_bathy_wind20250519.csv",
    "2025-05-23": "../../data/processed/nodes_bathy_wind20250523.csv",
    "2025-05-25": "../../data/processed/nodes_bathy_wind20250525.csv",
    "2025-05-27": "../../data/processed/nodes_bathy_wind20250527.csv",
    "2025-05-28": "../../data/processed/nodes_bathy_wind20250528.csv",
    "2025-05-30": "../../data/processed/nodes_bathy_wind20250530.csv",
    "2025-06-01": "../../data/processed/nodes_bathy_wind20250601.csv"
}

# Crear campo de viento multi-día
wind_field = MultiDayWindField(csv_paths)

# Inicialización del entorno

- El barco empieza y termina dentro del área navegable.
- El episodio dura como máximo 24 horas.
- Las consultas de viento están cubiertas por tu CSV de Open-Meteo.

In [8]:
# Configuración de entorno simulado
config_base = {
    "start": [38.5, 1.0],
    "goal": [40.0, 4.5],
    "dt": 10,
    "max_steps": 135,
    "polar_diagram": polar,
    "debug": True
}

In [9]:
# Crear entorno con viento que cambia por episodio
env = RandomizedWindWrapper(SailingEnv, config_base, wind_field)

[RANDOMIZED WIND] Episodio con condiciones del día: 2025-05-11
[WIND FIELD] Usando datos de viento para el día: 2025-05-11


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


# Creación del agente

In [10]:
agent = DDPGAgent(env)

I0000 00:00:1751062812.385143 1734888 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 45463 MB memory:  -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:5b:00.0, compute capability: 8.6


# Entrenamiento

## Entrenamiento con aprendizaje por imitación

In [None]:
from agents.ddpg import get_actor
from training.imitation_learning import train_actor_supervised

# Crear actor sin entrenar
actor_model = get_actor(input_shape=6, action_bounds=[360, 20])

# Entrenar actor con todas las rutas disponibles
trained_actor = train_actor_supervised(actor_model, folder_path, epochs=200)

## Entrenamiento por refuerzo (Entorno simulado)

In [None]:
rewards = train_ddpg(agent, env, episodes=700)

# Visualización

In [None]:
import matplotlib.pyplot as plt

plt.plot(rewards)
plt.title("Recompensa por episodio")
plt.xlabel("Episodio")
plt.ylabel("Reward total")
plt.grid()
plt.show()


# (opcional) Evaluación

In [None]:
state = env.reset()
total_reward = 0

for _ in range(env.config['max_steps']):
    tf_state = tf.convert_to_tensor([state], dtype=tf.float32)
    action = agent.actor_model(tf_state)[0].numpy()
    state, reward, done, _ = env.step(action)
    total_reward += reward
    env.render()
    if done:
        break

print(f"Recompensa total: {total_reward:.2f}")