In [1]:
import os, glob
import numpy as np
import pandas as pd
import xarray as xr
import regionmask

os.getcwd()
# HDF5 locking issues (Rockfish/HPC): set once per process if needed
os.environ.setdefault("HDF5_USE_FILE_LOCKING", "FALSE")
os.environ["NETCDF_HDF5_FILE_LOCKING"] = "FALSE"
os.system('module load nco') # load up nco modules 
os.system('set -euo pipefail')

#xr.set_options(file_cache_maxsize=1)  # avoid netCDF4 cache bloat
xr.set_options(display_style="text")

# --path--
era5land_hourly_data = "/vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_hourly/"
output_directory  = "/vast/bzaitch1/trp_climate_model_data/out"
os.makedirs(output_directory, exist_ok=True)


# xr.open_dataset(era5land_hourly_data + "1970_01.nc")
var_aliases = {
    'tp' : 'total precipitation', 
    't2m' : '2 metre temperature',
    'e':'evaporation',
}

df = pd.ExcelFile("./df_country_data_climate.xlsx")
list_countries = df.sheet_names

Assume ```Q-DEC``` : Q1 = Jan–Mar, Q4 ends Dec (this is the most common).

In [None]:
QUARTER_RULE = "Q-DEC"  # change if your macro data uses a different fiscal year-end
econ = pd.read_excel(df, sheet_name='Argentina')
econ["time"] = pd.PeriodIndex(econ['Unnamed: 0'], freq=QUARTER_RULE).to_timestamp(how="end")


In [19]:
econ["time"]

0     1979-06-30 23:59:59.999999999
1     1979-09-30 23:59:59.999999999
2     1979-12-31 23:59:59.999999999
3     1980-03-31 23:59:59.999999999
4     1980-06-30 23:59:59.999999999
                   ...             
158   2018-12-31 23:59:59.999999999
159   2019-03-31 23:59:59.999999999
160   2019-06-30 23:59:59.999999999
161   2019-09-30 23:59:59.999999999
162   2019-12-31 23:59:59.999999999
Name: time, Length: 163, dtype: datetime64[ns]

In [2]:
ds = xr.open_mfdataset(era5land_hourly_data + "1970_01.nc")
#da = ds['t2m']

In [3]:
ds

In [5]:
ds.close()

In [None]:
da = ds['t2m']
# --- rename dims to standard names ---
da = da.rename({"valid_time": "time", "latitude": "lat", "longitude": "lon"})

# --- rechunk for faster resampling & masking (tune to your RAM/cluster) ---
da = da.chunk({"time": 24*31, "lat": 200, "lon": 300})  # ~monthly time chunks




: 

In [None]:
# --- fix longitude from 0..360 to -180..180 and sort ---
if float(da.lon.max()) > 180:
    lon_new = ((da.lon + 180) % 360) - 180
    da = da.assign_coords(lon=lon_new).sortby("lon")

# --- make latitude ascending (optional but helpful for weights) ---
if da.lat[0] > da.lat[-1]:
    da = da.sortby("lat")

# --- convert K -> °C ---
if str(da.attrs.get("units", "")).lower() in ("k", "kelvin"):
    da = da - 273.15
    da.attrs["units"] = "degC"

In [None]:
def detrend_dim(da, dim, deg=1):    #subtracts linear fit)
    p = da.polyfit(dim=dim, deg=deg)
    fit = xr.polyval(da[dim], p.polyfit_coefficients)
    return da - fit

def det_data(ds):
    climatology = ds.groupby("time.month").mean("time") #calculates seasonality
    anomalies = ds.groupby("time.month") - climatology  #removes seasonality
    #ga = anomalies.mean(dim = ['lat','lon'])
    detrend = detrend_dim(anomalies,'time') #calls to detrend_dim function, subtracts linear fit
    return detrend