In [1]:
import pandas as pd
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

In [2]:
output_path = Path("/home/jupyter-daniela/suyana/peru_production/outputs/")

df_sal_temp = pd.read_csv(output_path / "salinity_temperature_peru_daily.csv", parse_dates=["fecha"])
df_calas = pd.read_csv(output_path / "calas_serie_diaria.csv", parse_dates=["fecha_cala"])


df_calas["fecha_cala"] = pd.to_datetime(df_calas["fecha_cala"]).dt.tz_localize(None)
df_sal_temp["fecha"] = pd.to_datetime(df_sal_temp["fecha"]).dt.tz_localize(None)

df_merge = pd.merge(df_calas, df_sal_temp, left_on="fecha_cala", right_on="fecha", how="inner")


In [3]:
cols_drop = [
    "DPTO_y", "region_macro_y", "fecha", "anio", "dia_juliano",
    "temp_clim", "sal_clim"
]

df_merge = (
    df_merge
    .drop(columns=cols_drop, errors="ignore")
    .rename(columns={"fecha_cala": "fecha", "DPTO_x": "DPTO", "region_macro_x": "region_macro"})
)

In [4]:
df_merge

Unnamed: 0,fecha,total_pescado_tm,DPTO,temporada,region_macro,temperatura,salinidad,anom_temp,anom_sal,anom_sal_ref35
0,2016-06-18,2830.0,ANCASH,1ra 2016,centro,19.101004,35.064500,0.783262,0.031097,-0.035500
1,2016-06-18,2830.0,ANCASH,1ra 2016,centro,18.869705,35.043457,0.551964,0.010056,-0.056541
2,2016-06-18,2830.0,ANCASH,1ra 2016,centro,18.641376,35.065740,0.323635,0.032337,-0.034260
3,2016-06-18,2830.0,ANCASH,1ra 2016,centro,18.170180,35.107826,-0.941936,-0.034996,0.007828
4,2016-06-18,2830.0,ANCASH,1ra 2016,centro,19.226192,35.165535,0.114077,0.022713,0.065536
...,...,...,...,...,...,...,...,...,...,...
60175,2023-02-03,90.0,PIURA,2da 2022,norte,20.734848,35.118244,-1.432270,0.008141,0.018246
60176,2023-02-03,90.0,PIURA,2da 2022,norte,20.857475,35.035290,-1.309643,-0.074814,-0.064709
60177,2023-02-03,90.0,PIURA,2da 2022,norte,21.389566,34.877080,-2.302939,0.282509,-0.222919
60178,2023-02-03,90.0,PIURA,2da 2022,norte,24.138838,34.409252,0.446333,-0.185318,-0.690746


In [5]:
from linearmodels.panel import PanelOLS
import pandas as pd
import statsmodels.api as sm

df_model = df_merge.copy()
df_model["fecha"] = pd.to_datetime(df_model["fecha"])
df_model = df_model.set_index(["region_macro", "fecha"])

y = df_model["total_pescado_tm"]
X = df_model[["anom_temp", "anom_sal_ref35"]]
X = sm.add_constant(X)

mod = PanelOLS(y, X, entity_effects=True)
res = mod.fit(cov_type="clustered", cluster_entity=True)
print(res.summary)


                          PanelOLS Estimation Summary                           
Dep. Variable:       total_pescado_tm   R-squared:                        0.0014
Estimator:                   PanelOLS   R-squared (Between):             -0.9148
No. Observations:               38120   R-squared (Within):               0.0014
Date:                Fri, Oct 17 2025   R-squared (Overall):              0.0016
Time:                        11:42:37   Log-likelihood                -3.937e+05
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      26.974
Entities:                           3   P-value                           0.0000
Avg Obs:                    1.271e+04   Distribution:                 F(2,38115)
Min Obs:                       140.00                                           
Max Obs:                    3.136e+04   F-statistic (robust):          1.922e+04
                            

In [7]:

df_model = df_merge.copy()
df_model["fecha"] = pd.to_datetime(df_model["fecha"])
df_model = df_model.set_index(["region_macro", "fecha"])

y = df_model["total_pescado_tm"]
X = df_model[["anom_temp", "anom_sal"]]
X = sm.add_constant(X)

mod = PanelOLS(y, X, entity_effects=True)
res = mod.fit(cov_type="clustered", cluster_entity=True)
print(res.summary)


                          PanelOLS Estimation Summary                           
Dep. Variable:       total_pescado_tm   R-squared:                        0.0020
Estimator:                   PanelOLS   R-squared (Between):             -0.8555
No. Observations:               38120   R-squared (Within):               0.0020
Date:                Fri, Oct 17 2025   R-squared (Overall):              0.0022
Time:                        12:26:12   Log-likelihood                -3.937e+05
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      38.674
Entities:                           3   P-value                           0.0000
Avg Obs:                    1.271e+04   Distribution:                 F(2,38115)
Min Obs:                       140.00                                           
Max Obs:                    3.136e+04   F-statistic (robust):             593.10
                            