In [None]:
from pyproj import datadir
datadir.set_data_dir("/home/jupyter-daniela/.conda/envs/peru_environment/share/proj")
from pathlib import Path
import matplotlib.pyplot as plt
import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gpd
import cmocean
import calendar

In [None]:

# path_features = Path("/home/jupyter-daniela/suyana/peru_production/features/")

# archivos = [
#     f for f in path_features.glob("hycom_*.nc")
#     if re.match(r"hycom_(1ra|2da)-\d{4}\.nc$", f.name)
# ]

# def orden_temporada(f):
#     m = re.match(r"hycom_(1ra|2da)-(\d{4})\.nc$", f.name)
#     if m:
#         temporada, anio = m.groups()
#         orden_t = 0 if temporada == "1ra" else 1
#         return int(anio), orden_t
#     return (9999, 9)

# archivos_ordenados = sorted(archivos, key=orden_temporada)


## grafico calas y salinidad por temporada

In [None]:
class HycomVisualizer:
    def __init__(self, anio, temporada, path_features, path_output, boundary_path, calas_path):
        self.anio = anio
        self.temporada = temporada
        self.path_features = Path(path_features)
        self.path_output = Path(path_output)
        self.boundary = gpd.read_file(boundary_path)
        self.df_calas = self._load_calas(calas_path)
        self.fname = next(
            (f for f in self.path_features.glob(f"hycom_{temporada}-{anio}.nc")), None
        )

    def _load_calas(self, calas_path):
        empresas_equivalentes = {
            "AUSTRAL GROUP SAA": "AUSTRAL GROUP SAA",
            "CFG-COPEINCA": "COPEINCA",
            "HAYDUK": "HAYDUK",
            "TASA": "TASA",
            "TASA ": "TASA",
            "PESQUERA CENTINELA S.A.C": "CENTINELA S.A.C",
            "CENTINELA": "CENTINELA S.A.C",
            "PESQUERA DIAMANTE S.A.": "DIAMANTE S.A.",
            "DIAMANTE": "DIAMANTE S.A.",
            "Pesquera Diamante S.A.": "DIAMANTE S.A.",
            "PESQUERA EXALMAR S.A.A.": "EXALMAR S.A.A.",
            "CAPRICORNIO": "CAPRICORNIO",
            "INVERSIONES QUIAZA SAC": "INVERSIONES QUIAZA SAC",
            "QUIAZA": "INVERSIONES QUIAZA SAC",
            "PESQ. ISA": "ISA",
            "LOS HALCONES SA": "LOS HALCONES SA",
            "LOS HALCONES": "LOS HALCONES SA",
            "Inversiones Eccola": "INVERSIONES ECCOLA",
            "INVERSIONES ECCOLA": "INVERSIONES ECCOLA",
            "PESQ. NINFAS DEL MAR": "NINFAS DEL MAR",
            "PESQ. MAJAT": "MAJAT",
            "PESQUERA LUCIANA SAC": "LUCIANA SAC",
            "PESQUERA SKAVOS S.A.C.": "SKAVOS S.A.C."
        }

        df = pd.read_csv(calas_path, low_memory=False)
        if "latitud " in df.columns:
            df = df.rename(columns={"latitud ": "latitud"})
        df["anio"] = df["temporada"].str.extract(r"(\d{4})").astype(int)
        df["empresa_normalizada"] = df["empresa"].replace(empresas_equivalentes)
        df["mes"] = pd.to_datetime(df["fecha_cala"]).dt.month

        return df

    def procesar_salinidad(self, compute_anomaly=False):
        if self.fname is None:
            return None
        ds = xr.open_dataset(self.fname)
        if (ds.lon > 180).any():
            ds = ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180))
            ds = ds.sortby(ds.lon)
        sal = ds["salinity"]
        if compute_anomaly:
            sal = sal - 35.1
        return ds["lon"], ds["lat"], sal

    def plot_mapa_salinidad(self, lon, lat, campo, cmap, levels, ticks, label, mes=None, vmin=None, vmax=None, df_temp=None):
        if df_temp is None:
            df_temp = self.df_calas[
                (self.df_calas["anio"] == self.anio)
                & (self.df_calas["temporada"].str.contains(self.temporada, case=False))
            ]

        fig= plt.figure(figsize=(6, 8))
        ax = plt.axes([0.05, 0.05, 0.85, 0.9])
        cf = ax.contourf(lon, lat, campo, levels=levels, cmap=cmap, extend="both")
        ax.contour(lon, lat, campo, levels=[0 if "anom" in label else 35.1], colors="black", linewidths=1.2)
        self.boundary.plot(ax=ax, facecolor="none", edgecolor="black", linewidth=0.5)
        if not df_temp.empty:
            sc = ax.scatter(
                df_temp["longitud"], df_temp["latitud"],
                c=df_temp["declarado_tm"], cmap="YlOrRd", alpha=0.5, s=30,
                vmin=vmin, vmax=vmax
            )
        ax.set_xlim([-83, -74.5])
        ax.set_ylim([-16, -4])
        title = f"{self.temporada} {self.anio}" if mes is None else f"{self.temporada} {self.anio} - {calendar.month_abbr[mes]}"
        ax.set_title(title, fontsize=11, loc="left")

        cax1 = fig.add_axes([0.92, 0.5, 0.02, 0.4])
        fig.colorbar(cf, cax=cax1, ticks=ticks, label=label)
        if not df_temp.empty:
            cax2 = fig.add_axes([0.92, 0.05, 0.02, 0.4])
            fig.colorbar(sc, cax=cax2, label="Toneladas métricas (calas)")

        suffix = f"_{self.temporada}_{self.anio}" if mes is None else f"_{self.temporada}_{self.anio}_{mes:02d}"
        fname = label.replace(" ", "_").replace("(", "").replace(")", "")
        fig.savefig(self.path_output / f"{fname}{suffix}.png", bbox_inches="tight", dpi=400)
        plt.close(fig)

    def plot_temporada(self, compute_anomaly=False):
        lon, lat, sal = self.procesar_salinidad(compute_anomaly)
        if sal is None:
            return
        campo = sal.mean("time")
        vmax = self.df_calas["declarado_tm"].quantile(0.95)
        if compute_anomaly:
            cmap, levels, ticks, label = cmocean.cm.curl, np.linspace(-1.0, 1.0, 32), np.arange(-1.0, 1.1, 0.2), "Salinity anomaly (psu)"
        else:
            cmap, levels, ticks, label = cmocean.cm.haline, np.linspace(34, 35.5, 32), np.arange(34, 35.5, 0.2), "Salinity (psu)"
        self.plot_mapa_salinidad(lon, lat, campo, cmap, levels, ticks, label, vmax=vmax)

    def plot_monthly(self, compute_anomaly=False):
        lon, lat, sal = self.procesar_salinidad(compute_anomaly)
        if sal is None:
            return
        sal_monthly = sal.groupby("time.month").mean()
        vmax = self.df_calas["declarado_tm"].quantile(0.95)
        if compute_anomaly:
            cmap, levels, ticks, label = cmocean.cm.curl, np.linspace(-1.0, 1.0, 32), np.arange(-1.0, 1.1, 0.2), "Salinity anomaly (psu)"
        else:
            cmap, levels, ticks, label = cmocean.cm.haline, np.linspace(34, 35.5, 32), np.arange(34, 35.5, 0.2), "Salinity (psu)"
        for mes in sal_monthly.month.values:
            campo = sal_monthly.sel(month=mes)
            df_mes = self.df_calas[
                (self.df_calas["anio"] == self.anio)
                & (self.df_calas["temporada"].str.contains(self.temporada, case=False))
                & (self.df_calas["mes"] == mes)
            ]
            self.plot_mapa_salinidad(lon, lat, campo, cmap, levels, ticks, label, mes=mes, vmax=vmax, df_temp=df_mes)

    def duracion_temporada(self):
        df_temp = self.df_calas[
            (self.df_calas["anio"] == self.anio)
            & (self.df_calas["temporada"].str.contains(self.temporada, case=False))
        ]
        if df_temp.empty:
            print(f"Sin datos de calas para {self.temporada} {self.anio}")
            return None
        fechas = pd.to_datetime(df_temp["fecha_cala"])
        fecha_inicio = fechas.min().date()
        fecha_fin = fechas.max().date()
        duracion_dias = (fechas.max() - fechas.min()).days
        print(f"Temporada {self.temporada} {self.anio}: {duracion_dias} días ({fecha_inicio} a {fecha_fin})")
        return {"anio": self.anio, "temporada": self.temporada, "inicio": fecha_inicio, "fin": fecha_fin, "duracion_dias": duracion_dias}



In [None]:

path_features = "/home/jupyter-daniela/suyana/peru_production/features/"
path_output = "/home/jupyter-daniela/suyana/peru_production/displays/"
boundary_path = "/home/jupyter-daniela/suyana/geometries/ne_10m_admin_0_countries/ne_10m_admin_0_countries.shp"
calas_path = "/home/jupyter-daniela/suyana/peru_production/outputs/calas_all_data.csv"

anios = [anio for anio in range(2015, 2025)]
temporadas = ["1ra", "2da"]

for anio in anios:
    for temporada in temporadas:
        viz = HycomVisualizer(anio, temporada, path_features, path_output, boundary_path, calas_path)
        if viz.fname is None:
            print(f"Sin archivo HYCOM para {temporada} {anio}, se omite.")
            continue        
        viz.plot_monthly(compute_anomaly=True)
        viz.duracion_temporada()

