In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
# Define climatology and sst files

file_clim = '/p/project1/training2533/datasets/era5_climatology/era5_sfc_climatology_1deg.nc'
file_real = '/p/project1/training2533/zampieri2/data/example_enso/era5_fields_2010-2013_1deg.nc'

ds_clim = xr.open_dataset(file_clim)
ds_real = xr.open_dataset(file_real)

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return np.asarray(self.get_duck_array(), dtype=dtype)


In [5]:
print(ds_clim)
print(ds_real)

<xarray.Dataset> Size: 569MB
Dimensions:  (time: 366, lon: 360, lat: 180)
Coordinates:
  * time     (time) object 3kB -25527-01-01 00:00:00 ... -25527-12-31 00:00:00
  * lon      (lon) float64 3kB 0.0 1.0 2.0 3.0 4.0 ... 356.0 357.0 358.0 359.0
  * lat      (lat) float64 1kB -89.5 -88.5 -87.5 -86.5 ... 86.5 87.5 88.5 89.5
Data variables:
    var31    (time, lat, lon) float32 95MB ...
    var34    (time, lat, lon) float32 95MB ...
    var165   (time, lat, lon) float32 95MB ...
    var166   (time, lat, lon) float32 95MB ...
    var167   (time, lat, lon) float32 95MB ...
    var207   (time, lat, lon) float32 95MB ...
Attributes:
    CDI:          Climate Data Interface version 2.4.4 (https://mpimet.mpg.de...
    Conventions:  CF-1.6
    institution:  European Centre for Medium-Range Weather Forecasts
    history:      Fri Aug 22 16:24:51 2025: cdo -f nc4 -remapdis,r360x180 -se...
    CDO:          Climate Data Operators version 2.4.4 (https://mpimet.mpg.de...
<xarray.Dataset> Size: 379MB


In [9]:
# --------------------
# INPUTS (rename to match your variables)
#   ds_clim: 366-day daily climatology dataset (your first dataset, var34)
#   ds_real: real daily SST dataset with real dates (your second dataset, var34)

# Use 'sst' as a consistent variable name
if 'sst' not in ds_clim:
    ds_clim = ds_clim.rename({'var34': 'sst'})
if 'sst' not in ds_real:
    ds_real = ds_real.rename({'var34': 'sst'})
    
# --------------------
# CONFIG
# --------------------
start_year = 2011          # inclusive
end_year   = 2012          # inclusive
lat_bounds = (-5, 5)       # Niño 3.4 box lat
# Niño 3.4 longitudes: 170°W–120°W
# If lon is 0..359, that's 190..240E; if lon is -180..180, that's -170..-120
lon_bounds_east = (190, 240)
# lon_bounds_west = (-170, -120)

# --------------------
# PREP: drop Feb-29 so we have a 365-day DoY in both datasets
# --------------------
is_feb29 = lambda t: (t.dt.month == 2) & (t.dt.day == 29)

# Climatology → drop Feb 29 and collapse to a 365-value Series keyed by dayofyear
clim365 = ds_clim.sel(time=~is_feb29(ds_clim['time']))
clim_doy = clim365['sst'].groupby('time.dayofyear').first()  # length 365 (DoY = 1..365)

# Real data → select analysis window and drop Feb 29
dsr = ds_real.sel(time=slice(f'{start_year}-01-01', f'{end_year}-12-31'))
dsr = dsr.sel(time=~is_feb29(dsr['time']))

# --------------------
# DAILY ANOMALIES ALIGNED BY DAY-OF-YEAR
# --------------------
# xarray aligns by the group key: subtracts the 365-element clim_doy from each day with matching DoY
sst_anom = dsr['sst'].groupby('time.dayofyear') - clim_doy

# --------------------
# SELECT NIÑO 3.4 REGION AND AREA-WEIGHTED MEAN
# --------------------
# Latitude slice
sst_anom_lat = sst_anom.sel(lat=slice(lat_bounds[0], lat_bounds[1]))

# Longitude slice depending on grid
if float(sst_anom_lat['lon'].max()) > 180:
    # 0..360 grid
    sst_box = sst_anom_lat.sel(lon=slice(lon_bounds_east[0], lon_bounds_east[1]))
else:
    # -180..180 grid
    sst_box = sst_anom_lat.sel(lon=slice(lon_bounds_west[0], lon_bounds_west[1]))

# Cosine-latitude weights
weights = np.cos(np.deg2rad(sst_box['lat']))
n34_daily = sst_box.weighted(weights).mean(dim=('lat', 'lon'))  # DataArray (time)

# --------------------
# MONTHLY MEANS + 3-MONTH CENTERED MEAN (ONI-STYLE)
# --------------------
n34_monthly = n34_daily.resample(time='MS').mean()                 # monthly anomaly (°C)
n34_3mo     = n34_monthly.rolling(time=3, center=True).mean()      # 3-month centered mean

# --------------------
# Tidy outputs (optional)
# --------------------
n34_df = xr.Dataset(
    {
        'nino34_monthly': n34_monthly,
        'nino34_3mo': n34_3mo
    }
).to_dataframe()
print(n34_df)

# Example: write CSV
# n34_df.to_csv('nino34_2011_2012.csv')  # uncomment to save

            nino34_monthly  nino34_3mo
time                                  
2011-01-01       -1.693282         NaN
2011-02-01       -1.207254   -1.305232
2011-03-01       -1.015161   -0.962079
2011-04-01       -0.663821   -0.711862
2011-05-01       -0.456603   -0.478725
2011-06-01       -0.315751   -0.396849
2011-07-01       -0.418192   -0.484423
2011-08-01       -0.719325   -0.640911
2011-09-01       -0.785216   -0.845373
2011-10-01       -1.031579   -1.030585
2011-11-01       -1.274959   -1.132185
2011-12-01       -1.090017   -1.146266
2012-01-01       -1.073821   -0.971754
2012-02-01       -0.751422   -0.813999
2012-03-01       -0.616754   -0.588702
2012-04-01       -0.397929   -0.398133
2012-05-01       -0.179717   -0.138297
2012-06-01        0.162754    0.117079
2012-07-01        0.368200    0.401885
2012-08-01        0.674701    0.490233
2012-09-01        0.427797    0.445431
2012-10-01        0.233794    0.284517
2012-11-01        0.191959    0.086996
2012-12-01       -0.16476

In [12]:
# --- robust plotting that works in headless/HPC and notebooks ---
import matplotlib
import matplotlib.pyplot as plt

# Force a non-interactive backend (safe on clusters/headless)
# If you're in Jupyter it'll still save the PNG; showing inline depends on the kernel.
matplotlib.use("Agg", force=True)

fig, ax = plt.subplots(figsize=(10, 4))
ax.plot(n34_monthly['time'].values, n34_monthly.values,
        label='Monthly Niño 3.4 anomaly (°C)', linewidth=1.5, alpha=0.85)
ax.plot(n34_3mo['time'].values, n34_3mo.values,
        label='3-mo centered mean', linewidth=2)
ax.axhline(0, linewidth=0.8)
ax.set_title('Niño 3.4 SST anomaly — 2011–2012')
ax.set_xlabel('Time'); ax.set_ylabel('°C'); ax.legend()
fig.tight_layout()

# Always save to file (reliable everywhere)
out_png = 'nino34_2011_2012.png'
fig.savefig(out_png, dpi=150, bbox_inches='tight')
print(f"Saved plot to {out_png}")

Saved plot to nino34_2011_2012.png
