# Configuración e imports

In [1]:
import sys
import os

In [2]:
# Obtener ruta absoluta del directorio que contiene el notebook
notebook_dir = os.path.dirname(os.getcwd())  # sube un nivel desde /notebook
if notebook_dir not in sys.path:
    sys.path.append(notebook_dir)

In [3]:
import pandas as pd
from environment.sailing_env import SailingEnv
from environment.wind_model import WindField
from environment.polar_diagram import PolarDiagram
from agents.ddpg import DDPGAgent
from training.train_drl import train_ddpg


In [4]:
import tensorflow as tf
print("Versión de TensorFlow:", tf.__version__)

Versión de TensorFlow: 2.19.0


In [5]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"✅ GPU detectada: {gpus[0].name}")
else:
    print("⚠️ No se ha detectado GPU. El entrenamiento usará CPU.")

⚠️ No se ha detectado GPU. El entrenamiento usará CPU.


# Carga de datos

In [6]:
# Viento Open-Meteo
df_wind = pd.read_csv("../../data/graphs/nodes_bathy_wind20250504.csv")

# Diagrama polar
polar = PolarDiagram('../../data/graphs/polar_diagram.csv')

# Modelo de viento
wind = WindField(df_wind)

# Carpeta de rutas CSV del profesor
folder_path = "../../data/expert_trajectories"

# Inicialización del entorno

- El barco empieza y termina dentro del área navegable.
- El episodio dura como máximo 24 horas.
- Las consultas de viento están cubiertas por tu CSV de Open-Meteo.

In [7]:
env = SailingEnv({
    'start': [38.5, 1.0],
    'goal': [40.0, 4.5],
    'goal_threshold': 0.01,
    'dt': 10,
    'max_steps': 135, # 135 x 10 = 1350 minutos = 22h30min
    'continuous': True,
    'polar_diagram': polar,
    'wind': wind,
    'debug': True
})


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


# Creación del agente

In [8]:
agent = DDPGAgent(env)

# Entrenamiento

# Entrenamiento con aprendizaje por imitación

In [9]:
from agents.ddpg import get_actor
from training.imitation_learning import train_actor_supervised

# Crear actor sin entrenar
actor_model = get_actor(input_shape=6, action_bounds=[360, 20])

# Entrenar actor con todas las rutas disponibles
trained_actor = train_actor_supervised(actor_model, folder_path, epochs=50)

[INFO] Cargando 20 archivos de datos de experto desde: ../../data/expert_trajectories
Epoch 1/50 - Loss: 0.7968
Epoch 2/50 - Loss: 0.7967
Epoch 3/50 - Loss: 0.7967
Epoch 4/50 - Loss: 0.7967
Epoch 5/50 - Loss: 0.7959
Epoch 6/50 - Loss: 0.7357
Epoch 7/50 - Loss: 0.5078
Epoch 8/50 - Loss: 0.5230
Epoch 9/50 - Loss: 0.5230
Epoch 10/50 - Loss: 0.5230
Epoch 11/50 - Loss: 0.5230
Epoch 12/50 - Loss: 0.5230
Epoch 13/50 - Loss: 0.5230
Epoch 14/50 - Loss: 0.5230
Epoch 15/50 - Loss: 0.5230
Epoch 16/50 - Loss: 0.5230
Epoch 17/50 - Loss: 0.5230
Epoch 18/50 - Loss: 0.5230
Epoch 19/50 - Loss: 0.5230
Epoch 20/50 - Loss: 0.5230
Epoch 21/50 - Loss: 0.5230
Epoch 22/50 - Loss: 0.5230
Epoch 23/50 - Loss: 0.5230
Epoch 24/50 - Loss: 0.5230
Epoch 25/50 - Loss: 0.5230
Epoch 26/50 - Loss: 0.5230
Epoch 27/50 - Loss: 0.5230
Epoch 28/50 - Loss: 0.5230
Epoch 29/50 - Loss: 0.5230
Epoch 30/50 - Loss: 0.5230
Epoch 31/50 - Loss: 0.5230
Epoch 32/50 - Loss: 0.5230
Epoch 33/50 - Loss: 0.5230
Epoch 34/50 - Loss: 0.5230
Epoch

## Entrenamiento por refuerzo (Entorno simulado)

In [None]:
rewards = train_ddpg(agent, env, episodes=100)

[STEP LOG] dist=3.8041, heel=0.09°, angle_diff=65.9°, speed=3.30 kn
[STEP LOG] dist=3.8001, heel=0.11°, angle_diff=65.1°, speed=3.30 kn
[STEP LOG] dist=3.7958, heel=0.27°, angle_diff=64.3°, speed=3.47 kn
[STEP LOG] dist=3.7913, heel=0.30°, angle_diff=63.5°, speed=3.47 kn
[STEP LOG] dist=3.7854, heel=0.20°, angle_diff=62.7°, speed=4.41 kn
[STEP LOG] dist=3.7794, heel=0.17°, angle_diff=61.9°, speed=4.41 kn
[STEP LOG] dist=3.7731, heel=0.13°, angle_diff=61.2°, speed=4.41 kn
[STEP LOG] dist=3.7667, heel=0.06°, angle_diff=60.3°, speed=4.35 kn
[STEP LOG] dist=3.7600, heel=0.10°, angle_diff=59.5°, speed=4.35 kn
[STEP LOG] dist=3.7532, heel=0.14°, angle_diff=58.7°, speed=4.35 kn
[STEP LOG] dist=3.7453, heel=0.45°, angle_diff=57.9°, speed=4.91 kn
[STEP LOG] dist=3.7373, heel=0.36°, angle_diff=57.2°, speed=4.87 kn
[STEP LOG] dist=3.7292, heel=0.23°, angle_diff=56.4°, speed=4.74 kn
[STEP LOG] dist=3.7210, heel=0.20°, angle_diff=55.7°, speed=4.74 kn
[STEP LOG] dist=3.7126, heel=0.16°, angle_diff=5



[STEP LOG] dist=2.5520, heel=3.56°, angle_diff=114.8°, speed=7.16 kn
[STEP LOG] dist=2.5596, heel=3.56°, angle_diff=116.4°, speed=7.16 kn
Episode 30: Reward = -439.15, Buffer = 4050
[INFO] Modelos guardados en episodio 30
[STEP LOG] dist=3.8041, heel=0.09°, angle_diff=65.7°, speed=3.30 kn
[STEP LOG] dist=3.8000, heel=0.13°, angle_diff=64.7°, speed=3.30 kn
[STEP LOG] dist=3.7956, heel=0.29°, angle_diff=63.7°, speed=3.47 kn
[STEP LOG] dist=3.7910, heel=0.32°, angle_diff=62.7°, speed=3.47 kn
[STEP LOG] dist=3.7849, heel=0.17°, angle_diff=61.8°, speed=4.41 kn
[STEP LOG] dist=3.7785, heel=0.12°, angle_diff=60.8°, speed=4.41 kn
[STEP LOG] dist=3.7719, heel=0.08°, angle_diff=59.9°, speed=4.41 kn
[STEP LOG] dist=3.7652, heel=0.12°, angle_diff=58.9°, speed=4.35 kn
[STEP LOG] dist=3.7582, heel=0.16°, angle_diff=57.9°, speed=4.35 kn
[STEP LOG] dist=3.7509, heel=0.20°, angle_diff=56.9°, speed=4.35 kn
[STEP LOG] dist=3.7426, heel=0.32°, angle_diff=55.9°, speed=4.87 kn
[STEP LOG] dist=3.7339, heel=0

KeyboardInterrupt: 

# Visualización

In [None]:
import matplotlib.pyplot as plt

plt.plot(rewards)
plt.title("Recompensa por episodio")
plt.xlabel("Episodio")
plt.ylabel("Reward total")
plt.grid()
plt.show()


# (opcional) Evaluación

In [None]:
state = env.reset()
total_reward = 0

for _ in range(env.config['max_steps']):
    tf_state = tf.convert_to_tensor([state], dtype=tf.float32)
    action = agent.actor_model(tf_state)[0].numpy()
    state, reward, done, _ = env.step(action)
    total_reward += reward
    env.render()
    if done:
        break

print(f"Recompensa total: {total_reward:.2f}")