In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import glob
from matplotlib import pyplot as plt




In [2]:
def compute_climatology(datasettemp):
    # Create an empty DataArray to hold the climatology results with the same shape as the input
    climatology_rolling = xr.DataArray(
        np.full_like(datasettemp.Z, np.nan),  # Initialize with NaNs
        coords=datasettemp.coords,
        dims=datasettemp.dims
    )
    climatology_rolling_std = xr.DataArray(
        np.full_like(datasettemp.Z, np.nan),  # Initialize with NaNs
        coords=datasettemp.coords,
        dims=datasettemp.dims
    )
    data = datasettemp.Z
    # Define the window length for climatology
    window_length = 30  # years
    # Extract the years from the time dimension
    years = pd.DatetimeIndex(data['time'].values).year
    # Iterate through unique years in the dataset
    unique_years = np.unique(years)
    for year in unique_years:
        # Define the 30-year rolling window for the current year
        start_year = year - window_length // 2
        end_year = year + window_length // 2
        # Select data within the rolling window based on years
        window_data = data.sel(time=slice(f'{start_year}-01-01', f'{end_year}-12-31'))
        # Compute the day of the year for the windowed data
        window_data['dayofyear'] = window_data['time'].dt.dayofyear
        # Group by the day of the year and compute the mean climatology
        climatology_doy = window_data.groupby('dayofyear').mean(dim='time')
        climatology_doy_std = window_data.groupby('dayofyear').std(dim='time')
        # Assign the computed climatology back to the rolling DataArray for each day in the year
        for doy in range(1, 367):  # Including leap year day if present
            try:
                # Get the actual day dates in the current year
                days_in_year = data.sel(time=str(year)).where(data['time'].dt.dayofyear == doy, drop=True)
                # Check if there are days for this DOY in the current year
                if not days_in_year['time'].values.size:
                    continue
                # Assign the computed climatology value to the corresponding days
                climatology_value = climatology_doy.sel(dayofyear=doy)
                climatology_rolling.loc[{'time': days_in_year['time']}] = climatology_value

                climatology_value_std = climatology_doy_std.sel(dayofyear=doy)
                climatology_rolling_std.loc[{'time': days_in_year['time']}] = climatology_value_std
            except KeyError:
                # In case the DOY does not exist in climatology_doy (e.g., Feb 29 in non-leap years)
                continue
        # Optional: Print progress
        # print(f"Processed climatology for the year: {year}")
    #smooth both things
    climatology_rolling = climatology_rolling.to_dataset(name='Z_climo')
    climatology_rolling = climatology_rolling.Z_climo.rolling(time=60, center=True, min_periods=29).mean()
    climatology_rolling = climatology_rolling.to_dataset()

    climatology_rolling_std = climatology_rolling_std.to_dataset(name='Z_climo')
    climatology_rolling_std = climatology_rolling_std.Z_climo.rolling(time=60, center=True, min_periods=29).mean()
    climatology_rolling_std = climatology_rolling_std.to_dataset()
    return climatology_rolling, climatology_rolling_std

def fourierfilter(dataarray,cutoff_period=10):
    # Compute the Fourier transform along the time axis
    fft_data = np.fft.fft(dataarray, axis=0)
    # Get the frequencies corresponding to the FFT components
    freqs = np.fft.fftfreq(dataarray.shape[0], d=1)  # d=1 assumes daily data; adjust if different
    
    # Compute the corresponding periods (in days)
    periods = np.abs(1 / freqs)
    
    # Define the cutoff period for high-pass filter (10 days)
    cutoff_period = 10
    
    # Create a mask to filter out low-frequency components (longer than 10 days)
    high_pass_mask = periods < cutoff_period
    
    # Apply the mask to the FFT data (set low-frequency components to zero)
    fft_data_filtered = fft_data.copy()
    fft_data_filtered[high_pass_mask, :, :] = 0
    
    # Perform the inverse FFT to get the filtered data back in the time domain
    filtered_data = np.fft.ifft(fft_data_filtered, axis=0).real
    
    # Create a new xarray DataArray to store the filtered data
    filtered_anomalies = xr.DataArray(
        filtered_data,
        dims=dataarray.dims,
        coords=dataarray.coords,
        attrs=dataarray.attrs
    )
    return filtered_anomalies

def compute_anoms_experiment_complete(name_reanalysis):
    name_experiment = unique_names_experiments[id_experiment]
    print(f'Started {name_experiment}')
    where_files = np.where(names_experiments_all==name_experiment)[0]
    files_temp = filenames[where_files]
    dataset_temp = extractz500_several_files(files_temp)
    
    climatology_temp, climatology_std_temp = compute_climatology(dataset_temp)
    
    anoms = (dataset_temp.Z500 - climatology_temp.Z_climo)/climatology_std_temp.Z_climo
    del(climatology_temp)
    del(climatology_std_temp)
    filtered_anoms = fourierfilter(anoms)
    
    filtered_anoms = filtered_anoms.to_dataset(name='Z_anoms')
    filtered_anoms.to_netcdf(f'{path_outputs_anoms}anoms_{name_experiment}.nc')
    print(f'Experiment {name_experiment} complete')

# ERA5 Example

In [11]:
path_origins = '/glade/derecho/scratch/jhayron/Data4WRsClimateChange/ProcessedDataReanalyses/'
name_reanalysis = 'ERA5'
dataset = xr.open_dataset(f'{path_origins}Z500_{name_reanalysis}.nc')
climatology_temp, climatology_std_temp = compute_climatology(dataset)
anoms = (dataset.Z - climatology_temp.Z_climo)/climatology_std_temp.Z_climo
# del(climatology_temp)
# del(climatology_std_temp)
filtered_anoms = fourierfilter(anoms)
filtered_anoms = filtered_anoms.to_dataset(name='Z_anoms')
path_output_anoms = f'{path_origins}Z500Anoms_{name_reanalysis}.nc'
filtered_anoms.to_netcdf(path_output_anoms)

# All the others

In [3]:
def compute_anoms_reanalysis(name_reanalysis):
    path_origins = '/glade/derecho/scratch/jhayron/Data4WRsClimateChange/ProcessedDataReanalyses/'
    dataset = xr.open_dataset(f'{path_origins}Z500_{name_reanalysis}.nc')
    climatology_temp, climatology_std_temp = compute_climatology(dataset)
    anoms = (dataset.Z - climatology_temp.Z_climo)/climatology_std_temp.Z_climo
    filtered_anoms = fourierfilter(anoms)
    filtered_anoms = filtered_anoms.to_dataset(name='Z_anoms')
    path_output_anoms = f'{path_origins}Z500Anoms_{name_reanalysis}.nc'
    filtered_anoms.to_netcdf(path_output_anoms)

In [26]:
compute_anoms_reanalysis('JRA3Q')

  periods = np.abs(1 / freqs)


In [4]:
compute_anoms_reanalysis('MERRA2')

  periods = np.abs(1 / freqs)


In [5]:
compute_anoms_reanalysis('NCEP_NCAR')

  periods = np.abs(1 / freqs)


In [6]:
compute_anoms_reanalysis('NCEP_DOE')

  periods = np.abs(1 / freqs)
