In [1]:
import pandas as pd 
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import geopandas as gpd
from pyproj import datadir
import rioxarray

import xarray as xr
datadir.set_data_dir("/home/jupyter-daniela/.conda/envs/peru_environment/share/proj")



  _set_context_ca_bundle_path(ca_bundle_path)


In [2]:
shapefile_peru = Path("/home/jupyter-daniela/suyana/geometries/areas_pesca_peru/areas_pesca_peru.shp")
base_path = Path("/home/jupyter-daniela/suyana/sources/hycom")

gdf_peru = gpd.read_file(shapefile_peru)

In [3]:

mapa_regiones = {
    'TUMBES': 'norte',
    'PIURA': 'norte',
    'LAMBAYEQUE': 'norte',
    'LA LIBERTAD': 'centro',
    'ANCASH': 'centro',
    'LIMA': 'centro',
    'ICA': 'centro',
    'AREQUIPA': 'sur',
    'MOQUEGUA': 'sur',
    'TACNA': 'sur'
}
gdf_peru["region_macro"] = gdf_peru["DPTO"].map(mapa_regiones)

In [None]:

registros = []

for year_folder in sorted(base_path.glob("*")):
    if not year_folder.is_dir():
        continue
    archivos = sorted(year_folder.glob("hycom_*.nc"))
    for archivo in archivos:
        ds = xr.open_dataset(archivo)
        if "time" not in ds:
            continue
        if ds["lon"].max() > 180:
            ds["lon"] = xr.where(ds["lon"] > 180, ds["lon"] - 360, ds["lon"])
            ds = ds.sortby("lon")
        ds = ds.rename({"lon": "x", "lat": "y"})
        ds = ds.rio.write_crs("EPSG:4326")
        ds_crop = ds.rio.clip(gdf_peru.geometry, gdf_peru.crs)
        ds_crop = ds_crop.sortby("time")
        ds_diario = ds_crop.resample(time="1D").mean()
        for _, row in gdf_peru.iterrows():
            dpto = row["DPTO"]
            region = row["region_macro"]
            geom = gpd.GeoDataFrame(index=[0], geometry=[row.geometry], crs=gdf_peru.crs)
            sub_ds = ds_diario.rio.clip(geom.geometry, geom.crs)
            temp = sub_ds["water_temp"].mean(dim=["y", "x"], skipna=True)
            salt = sub_ds["salinity"].mean(dim=["y", "x"], skipna=True)
            df_temp = temp.to_dataframe(name="temperatura").reset_index()
            df_salt = salt.to_dataframe(name="salinidad").reset_index()
            df_merge = pd.merge(df_temp, df_salt, on="time", how="outer")
            df_merge["DPTO"] = dpto
            df_merge["region_macro"] = region
            registros.append(df_merge)

df_final = pd.concat(registros, ignore_index=True)
df_final = df_final.rename(columns={"time": "fecha"})

In [None]:
df_final = df_final[["fecha", "temperatura", "salinidad", "DPTO", "region_macro"]]

In [None]:
df_final = df_final.copy()
df_final.loc[:, "anio"] = pd.to_datetime(df_final["fecha"]).dt.year
df_final.loc[:, "dia_juliano"] = pd.to_datetime(df_final["fecha"]).dt.dayofyear

clim = (
    df_final.groupby(["region_macro", "dia_juliano"])[["temperatura", "salinidad"]]
    .mean()
    .rename(columns={"temperatura": "temp_clim", "salinidad": "sal_clim"})
    .reset_index()
)

df_final = df_final.merge(clim, on=["region_macro", "dia_juliano"], how="left")

df_final.loc[:, "anom_temp"] = df_final["temperatura"] - df_final["temp_clim"]
df_final.loc[:, "anom_sal"] = df_final["salinidad"] - df_final["sal_clim"]
df_final.loc[:, "anom_sal_ref35"] = df_final["salinidad"] - 35.1


In [None]:
import matplotlib.pyplot as plt

df_plot = df_final.groupby(["fecha", "region_macro"])[["temperatura", "salinidad"]].mean().reset_index()

variables = ["temperatura", "salinidad"]

for var in variables:
    regiones = df_plot["region_macro"].unique()
    fig, axes = plt.subplots(len(regiones), 1, figsize=(12, 3 * len(regiones)), sharex=True)
    if len(regiones) == 1:
        axes = [axes]
    for i, region in enumerate(regiones):
        df_r = df_plot[df_plot["region_macro"] == region]
        axes[i].plot(df_r["fecha"], df_r[var], lw=1.2)
        axes[i].set_title(f"{region} - {var}")
        axes[i].grid(True, linestyle=":", alpha=0.6)
    axes[-1].set_xlabel("Fecha")
    fig.suptitle(f"Serie diaria promedio por región - {var}", y=0.93)
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()
