In [23]:
## IMPORTS

import numpy as np
import pandas as pd
import xarray as xr

import warnings
warnings.filterwarnings('ignore')

In [24]:
ds_anom = xr.open_dataset('../data/MODIS/processed/sst_anomaly_daily_2002_2025.nc')
ds_anom['lon'] = ((ds_anom['lon'] + 180) % 360) - 180
df_prod = pd.read_csv('../data/imarpe/processed/df_tons_by_group.csv').rename({'date': 'time'}, axis=1)
df_prod['time'] = pd.to_datetime(df_prod['time'])
# ds_prod = df_prod.set_index('time').to_xarray()
# ds_prod = ds_prod.broadcast_like(ds_anom)
columns_to_correlate = df_prod.columns.drop(['time', 'season', 'Group 5', 'Group 6'])


In [25]:
# Convert landings to xarray
landing_ds = xr.Dataset({
    col: xr.DataArray(df_prod[col].values, coords={'time': df_prod['time']}, dims='time')
    for col in columns_to_correlate
})
common_times = np.intersect1d(landing_ds.time.values, ds_anom.time.values)
landing_ds = landing_ds.sel(time=common_times)
ds_anom = ds_anom.sel(time=common_times)

In [26]:
upwelling_mask_pesca_invierno_da = xr.open_dataset("../data/ocean_data_sst/processed/upwelling_mask_pesca_invierno.nc")
upwelling_mask_pesca_verano_da = xr.open_dataset("../data/ocean_data_sst/processed/upwelling_mask_pesca_verano.nc")
upwelling_mask_pesca_invierno_da['lon'] = ((upwelling_mask_pesca_invierno_da['lon'] + 180) % 360) - 180
upwelling_mask_pesca_verano_da['lon'] = ((upwelling_mask_pesca_verano_da['lon'] + 180) % 360) - 180


In [27]:
mask_interp = upwelling_mask_pesca_invierno_da.interp(lon=ds_anom['lon'], lat=ds_anom['lat'], method='nearest')
ds_masked = ds_anom.where(mask_interp['upwelling_mask_pesca_invierno'].notnull())


In [None]:
# 1. Correlation between original variables
sst_correlations = {var: xr.corr(ds_masked['sst'], landing_ds[var], dim='time') for var in columns_to_correlate}

sst_corr_ds = xr.Dataset(sst_correlations)



In [29]:
sst_corr_ds.to_netcdf('../data/outputs/correlation_sst_desembarques_trial.nc')

In [21]:
# Apply 5-day rolling mean to both datasets
landing_ds_rolling = landing_ds.rolling(time=5, center=True).mean()
ds_anom_rolling = ds_anom.rolling(time=5, center=True).mean()

sst_correlations = {var: xr.corr(ds_anom_rolling['sst'], landing_ds_rolling[var], dim='time') 
                    for var in columns_to_correlate}

sst_corr_ds = xr.Dataset(sst_correlations)

sst_corr_ds.to_netcdf('../data/outputs/correlation_sst_desembarques_5day_rolling.nc')

In [22]:
# Create month masks for the periods of interest
months_mask_1 = landing_ds_rolling.time.dt.month.isin([4, 5, 6, 7])  # April to July
months_mask_2 = landing_ds_rolling.time.dt.month.isin([11, 12, 1])   # November to January

landing_ds_apr_jul = landing_ds_rolling.sel(time=months_mask_1)
ds_anom_apr_jul = ds_anom_rolling.sel(time=months_mask_1)

sst_correlations_apr_jul = {var: xr.corr(ds_anom_apr_jul['sst'], landing_ds_apr_jul[var], dim='time') 
                           for var in columns_to_correlate}

landing_ds_nov_jan = landing_ds_rolling.sel(time=months_mask_2)
ds_anom_nov_jan = ds_anom_rolling.sel(time=months_mask_2)

sst_correlations_nov_jan = {var: xr.corr(ds_anom_nov_jan['sst'], landing_ds_nov_jan[var], dim='time') 
                           for var in columns_to_correlate}

sst_corr_ds_apr_jul = xr.Dataset(sst_correlations_apr_jul)
sst_corr_ds_nov_jan = xr.Dataset(sst_correlations_nov_jan)

sst_corr_ds_apr_jul.to_netcdf('../data/outputs/correlation_sst_desembarques_5day_rolling_apr_jul.nc')
sst_corr_ds_nov_jan.to_netcdf('../data/outputs/correlation_sst_desembarques_5day_rolling_nov_jan.nc')