# Configuración e imports

In [10]:
import sys
import os

In [11]:
# Obtener ruta absoluta del directorio que contiene el notebook
notebook_dir = os.path.dirname(os.getcwd())  # sube un nivel desde /notebook
if notebook_dir not in sys.path:
    sys.path.append(notebook_dir)

In [12]:
import pandas as pd
from environment.sailing_env import SailingEnv
from environment.wind_model import WindField
from environment.polar_diagram import PolarDiagram
from agents.ddpg import DDPGAgent
from training.train_drl import train_ddpg


# Carga de datos

In [None]:
# Viento Open-Meteo
df_wind = pd.read_csv("data/wind_forecast.csv")

# Diagrama polar
polar = PolarDiagram("data/polar/conrad1200.csv")

# Modelo de viento
wind = WindField(df_wind)

# Inicialización del entorno

In [None]:
env = SailingEnv({
    'start': [43.0, 7.0],
    'goal': [43.2, 7.5],
    'goal_threshold': 0.01,
    'dt': 10,
    'max_steps': 200,
    'polar_diagram': polar,
    'wind': wind,
    'continuous': True,
    'grid': None
})

# Creación del agente

In [None]:
agent = DDPGAgent(env)

# Entrenamiento

In [None]:
rewards = train_ddpg(agent, env, episodes=300)

# Visualización

In [None]:
import matplotlib.pyplot as plt

plt.plot(rewards)
plt.title("Recompensa por episodio")
plt.xlabel("Episodio")
plt.ylabel("Reward total")
plt.grid()
plt.show()


# (opcional) Evaluación

In [None]:
state = env.reset()
total_reward = 0

for _ in range(env.config['max_steps']):
    tf_state = tf.convert_to_tensor([state], dtype=tf.float32)
    action = agent.actor_model(tf_state)[0].numpy()
    state, reward, done, _ = env.step(action)
    total_reward += reward
    env.render()
    if done:
        break

print(f"Recompensa total: {total_reward:.2f}")