In [1]:
import pandas as pd
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

In [2]:
output_path = Path("/home/jupyter-daniela/suyana/peru_production/outputs/")

df_sal_temp = pd.read_csv(output_path / "salinity_temperature_peru_daily.csv", parse_dates=["fecha"])
df_calas = pd.read_csv(output_path / "calas_serie_diaria.csv", parse_dates=["fecha_cala"])


df_calas["fecha_cala"] = pd.to_datetime(df_calas["fecha_cala"]).dt.tz_localize(None)
df_sal_temp["fecha"] = pd.to_datetime(df_sal_temp["fecha"]).dt.tz_localize(None)

df_merge = pd.merge(df_calas, df_sal_temp, left_on="fecha_cala", right_on="fecha", how="inner")


In [3]:
df_calas

Unnamed: 0,fecha_cala,total_pescado_tm,DPTO,temporada,region_macro
0,2016-06-18,2830.0,ANCASH,1ra 2016,centro
1,2016-06-19,9410.0,ANCASH,1ra 2016,centro
2,2016-06-20,11090.0,ANCASH,1ra 2016,centro
3,2016-06-21,2965.0,ANCASH,1ra 2016,centro
4,2016-06-22,5320.0,ANCASH,1ra 2016,centro
...,...,...,...,...,...
6028,2023-01-30,,PIURA,2da 2022,norte
6029,2023-01-31,1450.0,PIURA,2da 2022,norte
6030,2023-02-01,,PIURA,2da 2022,norte
6031,2023-02-02,30.0,PIURA,2da 2022,norte


In [4]:
df_sal_temp

Unnamed: 0,fecha,temperatura,salinidad,DPTO,region_macro,anio,dia_juliano,temp_clim,sal_clim,anom_temp,anom_sal,anom_sal_ref35
0,2015-01-01,21.275284,35.112797,TACNA,sur,2015,1,21.666075,35.093830,-0.390791,0.018967,0.012798
1,2015-01-02,21.351906,35.104282,TACNA,sur,2015,2,21.623285,35.094368,-0.271379,0.009914,0.004284
2,2015-01-03,21.451557,35.107770,TACNA,sur,2015,3,21.618841,35.093853,-0.167284,0.013916,0.007771
3,2015-01-04,21.267560,35.110350,TACNA,sur,2015,4,21.623861,35.094790,-0.356302,0.015560,0.010353
4,2015-01-05,21.283667,35.109562,TACNA,sur,2015,5,21.690758,35.095802,-0.407091,0.013760,0.009563
...,...,...,...,...,...,...,...,...,...,...,...,...
35325,2024-09-01,20.301203,34.802900,TUMBES,norte,2024,245,19.215267,34.899166,1.085936,-0.096268,-0.297100
35326,2024-09-02,20.331703,34.664955,TUMBES,norte,2024,246,19.247590,34.878258,1.084114,-0.213303,-0.435043
35327,2024-09-03,20.342840,34.540880,TUMBES,norte,2024,247,19.182440,34.870132,1.160400,-0.329254,-0.559120
35328,2024-09-04,20.378141,34.507572,TUMBES,norte,2024,248,19.203117,34.859140,1.175024,-0.351566,-0.592426


In [5]:
cols_drop = [
    "DPTO_y", "region_macro_y", "fecha", "anio", "dia_juliano",
    "temp_clim", "sal_clim"
]

df_merge = (
    df_merge
    .drop(columns=cols_drop, errors="ignore")
    .rename(columns={"fecha_cala": "fecha", "DPTO_x": "DPTO", "region_macro_x": "region_macro"})
)

In [6]:
df_merge

Unnamed: 0,fecha,total_pescado_tm,DPTO,temporada,region_macro,temperatura,salinidad,anom_temp,anom_sal,anom_sal_ref35
0,2016-06-18,2830.0,ANCASH,1ra 2016,centro,19.101004,35.064500,0.783262,0.031097,-0.035500
1,2016-06-18,2830.0,ANCASH,1ra 2016,centro,18.869705,35.043457,0.551964,0.010056,-0.056541
2,2016-06-18,2830.0,ANCASH,1ra 2016,centro,18.641376,35.065740,0.323635,0.032337,-0.034260
3,2016-06-18,2830.0,ANCASH,1ra 2016,centro,18.170180,35.107826,-0.941936,-0.034996,0.007828
4,2016-06-18,2830.0,ANCASH,1ra 2016,centro,19.226192,35.165535,0.114077,0.022713,0.065536
...,...,...,...,...,...,...,...,...,...,...
60175,2023-02-03,90.0,PIURA,2da 2022,norte,20.734848,35.118244,-1.432270,0.008141,0.018246
60176,2023-02-03,90.0,PIURA,2da 2022,norte,20.857475,35.035290,-1.309643,-0.074814,-0.064709
60177,2023-02-03,90.0,PIURA,2da 2022,norte,21.389566,34.877080,-2.302939,0.282509,-0.222919
60178,2023-02-03,90.0,PIURA,2da 2022,norte,24.138838,34.409252,0.446333,-0.185318,-0.690746


In [7]:
from linearmodels.panel import PanelOLS
import pandas as pd
import statsmodels.api as sm

df_model = df_merge.copy()
df_model["fecha"] = pd.to_datetime(df_model["fecha"])
df_model = df_model.set_index(["region_macro", "fecha"])

y = df_model["total_pescado_tm"]
X = df_model[["anom_temp", "anom_sal_ref35"]]
X = sm.add_constant(X)

mod = PanelOLS(y, X, entity_effects=True)
res = mod.fit(cov_type="clustered", cluster_entity=True)
print(res.summary)


                          PanelOLS Estimation Summary                           
Dep. Variable:       total_pescado_tm   R-squared:                        0.0014
Estimator:                   PanelOLS   R-squared (Between):             -0.9148
No. Observations:               38120   R-squared (Within):               0.0014
Date:                Fri, Oct 17 2025   R-squared (Overall):              0.0016
Time:                        19:58:08   Log-likelihood                -3.937e+05
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      26.974
Entities:                           3   P-value                           0.0000
Avg Obs:                    1.271e+04   Distribution:                 F(2,38115)
Min Obs:                       140.00                                           
Max Obs:                    3.136e+04   F-statistic (robust):          1.922e+04
                            

In [8]:
import numpy as np

df_model = df_merge.copy()
df_model["fecha"] = pd.to_datetime(df_model["fecha"])
df_model = df_model.set_index(["region_macro", "fecha"])

df_model = df_model[df_model["total_pescado_tm"] > 0]  # evitar log de cero o negativo
df_model["log_pescado"] = np.log(df_model["total_pescado_tm"])

y = df_model["log_pescado"]
X = df_model[["anom_temp", "anom_sal_ref35"]]
X = sm.add_constant(X)

mod = PanelOLS(y, X, entity_effects=True)
res = mod.fit(cov_type="clustered", cluster_entity=True)
print(res.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:            log_pescado   R-squared:                        0.0032
Estimator:                   PanelOLS   R-squared (Between):             -0.8604
No. Observations:               38120   R-squared (Within):               0.0032
Date:                Fri, Oct 17 2025   R-squared (Overall):              0.0033
Time:                        19:58:08   Log-likelihood                -7.562e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      60.839
Entities:                           3   P-value                           0.0000
Avg Obs:                    1.271e+04   Distribution:                 F(2,38115)
Min Obs:                       140.00                                           
Max Obs:                    3.136e+04   F-statistic (robust):          2.807e+04
                            

In [9]:
df_model = df_merge.copy()
df_model["fecha"] = pd.to_datetime(df_model["fecha"])
df_model = df_model[df_model["total_pescado_tm"] > 0]
df_model = df_model.loc[df_model["region_macro"] != 3]

def resumir_3d(g):
    g = g.sort_values("fecha")
    num_cols = g.select_dtypes(include=[np.number]).columns
    g_mean = g.resample("3D", on="fecha")[num_cols].mean()
    return g_mean

df_3d = (
    df_model
    .groupby("region_macro", group_keys=True)
    .apply(resumir_3d)
    .dropna(subset=["total_pescado_tm"])
)

df_3d = df_3d.reset_index().set_index(["region_macro", "fecha"])

df_3d["log_pescado"] = np.log(df_3d["total_pescado_tm"])

y = df_3d["log_pescado"]
X = df_3d[["anom_temp", "anom_sal_ref35"]]
X = sm.add_constant(X)

mod = PanelOLS(y, X, entity_effects=True)
res = mod.fit(cov_type="clustered", cluster_entity=True)
print(res.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:            log_pescado   R-squared:                        0.0115
Estimator:                   PanelOLS   R-squared (Between):             -0.3163
No. Observations:                 640   R-squared (Within):               0.0115
Date:                Fri, Oct 17 2025   R-squared (Overall):              0.0127
Time:                        19:58:08   Log-likelihood                   -1050.2
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      3.6815
Entities:                           3   P-value                           0.0257
Avg Obs:                       213.33   Distribution:                   F(2,635)
Min Obs:                       8.0000                                           
Max Obs:                       405.00   F-statistic (robust):             775.06
                            

In [11]:

df_model = df_merge.copy()
df_model["fecha"] = pd.to_datetime(df_model["fecha"])
df_model = df_model.set_index(["DPTO", "fecha"])

df_model = df_model[df_model["total_pescado_tm"] > 0]
df_model["log_pescado"] = np.log(df_model["total_pescado_tm"])

y = df_model["log_pescado"]
X = df_model[["anom_temp", "anom_sal_ref35"]]
X = sm.add_constant(X)

mod = PanelOLS(y, X, entity_effects=True)
res = mod.fit(cov_type="clustered", cluster_entity=True)
print(res.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:            log_pescado   R-squared:                        0.0031
Estimator:                   PanelOLS   R-squared (Between):             -0.1884
No. Observations:               38120   R-squared (Within):               0.0031
Date:                Fri, Oct 17 2025   R-squared (Overall):              0.0033
Time:                        20:03:19   Log-likelihood                -7.506e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      59.344
Entities:                           7   P-value                           0.0000
Avg Obs:                       5445.7   Distribution:                 F(2,38111)
Min Obs:                       140.00                                           
Max Obs:                       9330.0   F-statistic (robust):             3.3861
                            

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(18,6))

def anio_pesquero(ts):
    return ts.year if ts.month >= 3 else ts.year - 1

df_plot = df_model.copy()
df_plot["anio_pesquero"] = df_plot["fecha"].apply(anio_pesquero)

base_cmap = plt.colormaps.get_cmap("tab20")

for region, dfg in df_plot.groupby("region_macro"):
    aps = sorted(dfg["anio_pesquero"].unique())
    colors = [base_cmap(i / max(1, len(aps) - 1)) for i in range(len(aps))]
    color_map = {ap: colors[i] for i, ap in enumerate(aps)}
    for ap, df in dfg.groupby("anio_pesquero"):
        df = df.sort_values("fecha")
        ax.plot(
            df["fecha"], df["total_pescado_tm"],
            lw=1.2, color=color_map[ap],
            marker="o", markersize=2,
            label=f"{region} {ap}"
        )

ax.set_xlabel("Fecha")
ax.set_ylabel("Toneladas totales")
ax.set_title("Pesca diaria total por región (colores por año pesquero marzo–febrero)")
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.grid(axis="x", linestyle="--", alpha=0.6)
plt.legend(frameon=False, ncol=3, fontsize=8)
plt.tight_layout()
plt.show()
