In [3]:
import numpy as np
import os
import xarray as xr
import glob
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.colors import BoundaryNorm
import matplotlib.colors as mcolors

In [4]:
# Base path where the folders are located
base_path = '/data/rong4/Data/ERA5/3hourly/quvw_US'

# Define the range of years and months
years = [str(year) for year in range(1979, 2019)]

def get_files(folder, component):
    # Use glob to find files matching the component pattern
    files = glob.glob(os.path.join(base_path, folder, f"era5.{component}.*.nc"))
    # Filter files by years and months
    filtered_files = [f for f in files if any(year in f for year in years)]
    return filtered_files

# Get the files for each component
u_files = get_files('u_component_of_wind', 'u_component_of_wind')
v_files = get_files('v_component_of_wind', 'v_component_of_wind')
q_files = get_files('specific_humidity', 'specific_humidity')

all_files = u_files + v_files + q_files

# # Open all datasets at once
era5 = xr.open_mfdataset(all_files, combine='by_coords')

# Open the dataset with chunking applied
era5 = xr.open_mfdataset(all_files, combine='by_coords')

# Converting to central time
era5['time'] = era5['time'] - pd.Timedelta(hours=6)

era5sgp = era5.sel(latitude=slice(39.0, 30.0), longitude=slice(-105.0, -95.0))

KeyboardInterrupt: 

In [None]:
## time subsets for fig 2c) and 2f) 

era2011 = era5sgp.sel(time=slice('2011-01-01', '2011-12-31'))
eraclim = era5sgp.sel(time=slice('1979-01-01', '2018-12-31'))

# daytime composites -> averages over 18 UTC (12 LST) and 0 UTC (18 LST)
# nighttime composites -> averages over 6 UTC (0 LST) and 12 UTC (6 LST)

_2011night = era2011.sel(time=era2011['time'].dt.hour.isin([0, 6]))
_2011day = era2011.sel(time=era2011['time'].dt.hour.isin([12, 18]))

_2011night_monthly = _2011night.resample(time='ME').mean()
_2011day_monthly = _2011day.resample(time='ME').mean()

_climnight = era5clim.sel(time=eraclim['time'].dt.hour.isin([0, 6]))
_climday = era5clim.sel(time=eraclim['time'].dt.hour.isin([12, 18]))

_climnight_monthly = _climnight.groupby('time.month').mean('time')
_climday_monthly = _climday.groupby('time.month').mean('time')

_all_times = {
    '2011_night_monthly': _2011night_monthly,
    '2011_day_monthly': _2011day_monthly,
    'clim_night_monthly': _climnight_monthly,
    'clim_day_monthly': _climday_monthly
}

In [None]:
def moisture_budget_profiles(u, v, q, lat, lon):

    R = 6371000  # Radius of Earth (meters)
    
    lon_grid, lat_grid = np.meshgrid(np.radians(lon), np.radians(lat))
    grid_dist = np.radians(lat[1]) - np.radians(lat[0])
    
    # shaped as (#lons, #lats) so first row represents all values for first latitude (eg: 30,30,30...)
    cos_correction = R * np.cos(lat_grid)
    dy_cos_correction = np.cos(lat_grid)
    
    # Initialize 3D arrays for derivatives
    dudx = np.zeros_like(u)
    dvdy = np.zeros_like(v)
    dqdx = np.zeros_like(q)
    dqdy = np.zeros_like(q)
    
    # Loop through each pressure level
    for k in range(u.shape[0]):  # Loop over pressure levels
        v_dy = v[k] * dy_cos_correction
        q_dy = q[k] * dy_cos_correction

        # Central differences for interior points
    
        dudx[k, :, 1:-1] = (u[k, :, 2:] - u[k, :, :-2]) / (2 * grid_dist)
        dvdy[k, 1:-1, :] = (v_dy[2:, :] - v_dy[:-2, :]) / (2 * grid_dist)
        dqdx[k, :, 1:-1] = (q[k, :, 2:] - q[k, :, :-2]) / (2 * grid_dist)
        dqdy[k, 1:-1, :] = (q_dy[2:, :] - q_dy[:-2, :]) / (2 * grid_dist)

        # Boundary points (one-sided differences)
        # Left and right boundaries (x-direction)
        dudx[k, :, 0] = (u[k, :, 1] - u[k, :, 0]) / grid_dist  # forward difference
        dudx[k, :, -1] = (u[k, :, -1] - u[k, :, -2]) / grid_dist  # backward difference
        
        dqdx[k, :, 0] = (q[k, :, 1] - q[k, :, 0]) / grid_dist
        dqdx[k, :, -1] = (q[k, :, -1] - q[k, :, -2]) / grid_dist

        # Top and bottom boundaries (y-direction)
        dvdy[k, 0, :] = (v_dy[1, :] - v_dy[0, :]) / grid_dist  # forward difference
        dvdy[k, -1, :] = (v_dy[-1, :] - v_dy[-2, :]) / grid_dist  # backward difference
        
        dqdy[k, 0, :] = (q_dy[1, :] - q_dy[0, :]) / grid_dist
        dqdy[k, -1, :] = (q_dy[-1, :] - q_dy[-2, :]) / grid_dist

        # Apply cos_correction for spherical coordinates
        dudx[k] = (1 / cos_correction) * dudx[k]
        dvdy[k] = (1 / cos_correction) * dvdy[k]
        dqdy[k] = (1 / cos_correction) * dqdy[k]
        dqdx[k] = (1 / cos_correction) * dqdx[k]

    # Moisture advection for each pressure level
    moisture_advection = u * dqdx + v * dqdy
    
    zonal_advection = -(u * dqdx)
    
    meridional_advection = -(v * dqdy)

    # Dynamical convergence for each pressure level
    dynamical_convergence = q * dudx + q * dvdy

    # Total moisture flux convergence
    mfc = dynamical_convergence + moisture_advection
    # Units --> 1/s
    return zonal_advection, meridional_advection



# Vertically integrated moisture flux convergence and its components
def vertically_integrated_mfc(u_levels, v_levels, q_levels, pressure_levels, latitude, longitude):
    """
    u_levels, v_levels, q_levels: 3D arrays (pressure, lat, lon) of wind and humidity at different pressure levels
    dx, dy: grid spacing in meters (uniform for simplicity)
    pressure_levels: 1D array of pressure levels in Pascals (Pa), typically ordered from top to surface
    """
    g = 9.81  # gravitational acceleration in m/s^2
    rho_water = 1000 # kg/m^3
    scale_factor = 1000 * 60 * 60 * 24 # to convert from m/s --> mm/day in final computation
    
    zonal_adv_levels, meridional_adv_levels = moisture_budget_profiles(u_levels, v_levels, q_levels, latitude, longitude)
    
    # Use trapezoidal rule to integrate MFC and its components over pressure
    dp = np.diff(pressure_levels)  # Differences between pressure levels (positive when integrating from top to surface)
    
    zonal_adv_profile = []
    merid_adv_profile = []
    
    # The integrations (for full column) are 2D
    zonal_adv_int = np.zeros_like(zonal_adv_levels[0], dtype=np.float64)
    meridional_adv_int = np.zeros_like(meridional_adv_levels[0], dtype=np.float64)
#     dyn_conv_integrated = np.zeros_like(dyn_conv_levels[0], dtype=np.float64)  
#     moist_adv_integrated = np.zeros_like(moist_adv_levels[0], dtype=np.float64) 
    
    # Trapezoidal integration: sum over pressure levels

    for i in range(len(dp)):
        zonal_adv_int += 0.5 * (zonal_adv_levels[i] + zonal_adv_levels[i+1]) * dp[i]
        meridional_adv_int += 0.5 * (meridional_adv_levels[i] + meridional_adv_levels[i+1]) * dp[i]
                
        zonal_adv_profile.append(0.5 * (zonal_adv_levels[i] + zonal_adv_levels[i+1]) * dp[i])
        merid_adv_profile.append(0.5 * (meridional_adv_levels[i] + meridional_adv_levels[i+1]) * dp[i])

#         mfc_integrated += 0.5 * (mfc_levels[i] + mfc_levels[i+1]) * dp[i]
#         dyn_conv_integrated += 0.5 * (dyn_conv_levels[i] + dyn_conv_levels[i+1]) * dp[i]
#         moist_adv_integrated += 0.5 * (moist_adv_levels[i] + moist_adv_levels[i+1]) * dp[i]

    # Convert to vertically integrated MFC and its components by dividing by g
    
    zonal_adv_integrated = zonal_adv_int * (1/g) * (1/rho_water) * scale_factor
    meridional_adv_integrated = meridional_adv_int * (1/g) * (1/rho_water) * scale_factor
    
    zonal_adv_profile = np.array(zonal_adv_profile)
    merid_adv_profile = np.array(merid_adv_profile)
    
    zonal_adv_profile = zonal_adv_profile * (1/g) * (1/rho_water) * scale_factor
    merid_adv_profile = merid_adv_profile * (1/g) * (1/rho_water) * scale_factor
    
#     mfc_integrated = mfc_integrated * (1/g) * (1/rho_water) * scale_factor 
#     dyn_conv_integrated = dyn_conv_integrated * (1/g) * (1/rho_water) * scale_factor
#     moist_adv_integrated = moist_adv_integrated * (1/g) * (1/rho_water) * scale_factor 

    # Units --> mm/day
    return zonal_adv_integrated, meridional_adv_integrated, zonal_adv_profile, merid_adv_profile


Figure 6 c) & f) reproduction 

In [None]:
# Initialize an empty dictionary to store results
results = {}

# Loop through the datasets
for df_name, df in _all_times.items():
    print(df_name)
    identifier = df_name
    u = df['u'].load().data[:, :, ::-1, :]  
    v = df['v'].load().data[:, :, ::-1, :]  
    q = df['q'].load().data[:, :, ::-1, :] 
    levels = df['level'].data * 100 
    latitude = df['latitude'].data[::-1]
    longitude = df['longitude'].data  
    
    zonals = []
    merids = []
    
    # Loop through each month 
    for i in range(u.shape[0]):
        # Calculate zonal and meridional advection (will be Dask arrays)
        zonal_adv_int, meridional_adv_int = vertically_integrated_mfc(u[i], v[i], q[i], levels, latitude, longitude)
        # Append mean values (these are Dask arrays)
        zonals.append(zonal_adv_int.mean())
        merids.append(meridional_adv_int.mean())

    # Store the results in a dictionary with Dask arrays
    results[identifier] = {
        'zonal': zonals,  # List of Dask arrays
        'meridional': merids  # List of Dask arrays
    }


In [None]:
#plotting zonal advection

fig, ax = plt.subplots(figsize=(8, 4))

months=["Jan", "Feb", "March", "April", "May", "June", "July", "Aug", "Sept", "Oct", "Nov", "Dec"]

plt.plot(months, results['2011_night_monthly']['zonal'], color='red', linestyle='--', label='2011-night')
plt.plot(months, results['2011_day_monthly']['zonal'], color='red', label='2011-day')
plt.plot(months, results['clim_night_monthly']['zonal'], color='blue', linestyle='--', label='Climatology-night')
plt.plot(months, results['clim_day_monthly']['zonal'], color='blue', label='Climatology-day')
plt.axhline(0, color='grey', linewidth=2)
plt.ylabel("mm/day")
plt.title("Vertically Integrated Zonal Advection")
plt.ylim(-5,2)
plt.legend()
plt.show()

#plotting meridional advection

fig, ax = plt.subplots(figsize=(8, 4))

months=["Jan", "Feb", "March", "April", "May", "June", "July", "Aug", "Sept", "Oct", "Nov", "Dec"]

plt.plot(months, results['2011_night_monthly']['meridional'], color='red', linestyle='--', label='2011-night')
plt.plot(months, results['2011_day_monthly']['meridional'], color='red', label='2011-day')
plt.plot(months, results['clim_night_monthly']['meridional'], color='blue', linestyle='--', label='Climatology-night')
plt.plot(months, results['clim_day_monthly']['meridional'], color='blue', label='Climatology-day')
plt.axhline(0, color='grey', linewidth=2)
plt.title("Vertically Integrated Meridional Advection")
plt.ylabel("mm/day")
# plt.ylim(-5,2)
plt.legend()
plt.show()

Figure 6 a) b) d) and f) reproduction 

In [None]:
era2011_night_plus_day = era2011.sel(time=era2011['time'].dt.hour.isin([0, 6, 12, 18]))
era2011monthly = era2011_night_plus_day.resample(time='ME').mean()
u_2011 = era2011monthly['u'].load().data[:, :, ::-1, :]  
v_2011 = era2011monthly['v'].load().data[:, :, ::-1, :]  
q_2011 = era2011monthly['q'].load().data[:, :, ::-1, :] 
levels = era2011monthly['level'].data * 100 
latitude = era2011monthly['latitude'].data[::-1]
longitude = era2011monthly['longitude'].data

In [None]:
zonal_profile = []
merid_profile = []
    
# Loop through each month 
for i in range(u_2011.shape[0]):
    _, _, _2011_zonal_adv, _2011_merid_adv = vertically_integrated_mfc(u_2011[i], v_2011[i], q_2011[i], levels, latitude, longitude)
    zonal_profile.append(_2011_zonal_adv)
    merid_profile.append(_2011_merid_adv)

In [None]:
all_months_zonal_profiles = []
for month in zonal_profile: 
    month_profile = np.mean(month, axis=(1, 2))
    all_months_zonal_profiles.append(month_profile)
    
all_months_merid_profiles = []
for month in merid_profile: 
    month_profile = np.mean(month, axis=(1, 2))
    all_months_merid_profiles.append(month_profile)

Figure 6b) and e)

In [None]:
advection_profiles = [all_months_zonal_profiles, all_months_merid_profiles]
titles = ['Zonal', 'Meridional']

pressure_levels = era2011monthly['level'].data[2:]

for i, arr in enumerate(advection_profiles):  # Use enumerate to get both index and array
    
    #subsetting only for pressure levels up to 100 
    hovmoller_data = np.array(arr)[:,2:]
    print(np.min(hovmoller_data), np.max(hovmoller_data))

    levels = np.array([-0.5, -0.2, -0.1, -0.05, -0.01, 0.01, 0.05, 0.1, 0.2, 0.5])

    # Create a custom colormap with a white segment between -0.01 and 0.01
    colors = ['#67001f', '#b2182b', '#d6604d', '#f4a582', 'white', 'white', '#92c5de', '#4393c3', '#2166ac', '#053061']
    cmap = mcolors.LinearSegmentedColormap.from_list("custom_coolwarm", colors, N=len(levels)-1)

    # Normalize the data based on defined levels
    norm = mcolors.BoundaryNorm(levels, ncolors=len(levels)-1, clip=True)

    plt.figure(figsize=(10, 5)) #width, height

    plt.imshow(hovmoller_data.T, aspect='auto', origin='lower', cmap=cmap, norm=norm, interpolation='bilinear')
    
    plt.ylabel('hPa')

    plt.xticks(np.arange(12), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])

    plt.yticks(np.arange(27), pressure_levels)

    plt.gca().invert_yaxis()
    
    cbar = plt.colorbar(label='mm/day', ticks=levels)
    cbar.ax.set_yticklabels([f'{level:.2f}' for level in levels])

    # Show the plot with the correct title
    plt.title('2011 {} Advection'.format(titles[i]))
    plt.show()


In [None]:
clim = eraclim.groupby('time.month').mean('time')
u_clim = clim['u'].load().data[:, :, ::-1, :]  
v_clim = clim['v'].load().data[:, :, ::-1, :]  
q_clim = clim['q'].load().data[:, :, ::-1, :] 
levels = clim['level'].data * 100 
latitude = clim['latitude'].data[::-1]
longitude = clim['longitude'].data

In [None]:
zonal_profile_clim = []
merid_profile_clim = []
    
# Loop through each month 
for i in range(u_clim.shape[0]):
    _, _, clim_zonal_adv, clim_merid_adv = vertically_integrated_mfc(u_clim[i], v_clim[i], q_clim[i], levels, latitude, longitude)
    zonal_profile_clim.append(clim_zonal_adv)
    merid_profile_clim.append(clim_merid_adv)

In [None]:
all_months_zonal_profiles_clim = []
for month in zonal_profile_clim: 
    month_profile = np.mean(month, axis=(1, 2))
    all_months_zonal_profiles_clim.append(month_profile)
    
all_months_merid_profiles_clim = []
for month in merid_profile_clim: 
    month_profile = np.mean(month, axis=(1, 2))
    all_months_merid_profiles_clim.append(month_profile)

Figure 6a) and d)

In [None]:
advection_profiles = [all_months_zonal_profiles_clim, all_months_merid_profiles_clim]
titles = ['Zonal', 'Meridional']

pressure_levels = clim['level'].data[2:]

for i, arr in enumerate(advection_profiles):  # Use enumerate to get both index and array
    
    hovmoller_data = np.array(arr)[:,2:]
    print(np.min(hovmoller_data), np.max(hovmoller_data))

    levels = np.array([-0.3, -0.2, -0.1, -0.05, -0.01, 0.01, 0.05, 0.1, 0.2, 0.4])

    # Create a custom colormap with a white segment between -0.01 and 0.01
    colors = ['#67001f', '#b2182b', '#d6604d', '#f4a582', 'white', 'white', '#92c5de', '#4393c3', '#2166ac', '#053061']
    cmap = mcolors.LinearSegmentedColormap.from_list("custom_coolwarm", colors, N=len(levels)-1)

    # Normalize the data based on defined levels
    norm = mcolors.BoundaryNorm(levels, ncolors=len(levels)-1, clip=True)

    plt.figure(figsize=(10, 5)) #width, height

    plt.imshow(hovmoller_data.T, aspect='auto', origin='lower', cmap=cmap, norm=norm, interpolation='bilinear')
    
    plt.ylabel('hPa')

    plt.xticks(np.arange(12), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])

    plt.yticks(np.arange(27), pressure_levels)
    
    plt.gca().invert_yaxis()

    cbar = plt.colorbar(label='mm/day', ticks=levels)
    cbar.ax.set_yticklabels([f'{level:.2f}' for level in levels])

    # Show the plot with the correct title
#     plt.title('Hovmöller Diagram of Climatology {} Advection'.format(titles[i]))
    plt.show()


In [None]:
# mfcs = []
# advs = []
# dyns = []

# results = {}

# for df_name, df in _all_times.items():
#     identifier = df_name
#     u = df['u'].load().data[:,:,::-1,:] # m/s
#     v = df['v'].load().data[:,:,::-1,:] # m/s
#     q = df['q'].load().data[:,:,::-1,:] # kg/kg
#     levels = monthly_means['level'].load().data * 100 # convert to Pa
#     latitude = monthly_means['latitude'].load().data[::-1] # deg
#     longitude = monthly_means['longitude'].load().data # deg
    
#     zonals = []
#     merids = []
    
#     for i in range(u.shape[0]):   
#         zonal_adv_int, meridional_adv_int = vertically_integrated_mfc(u[i], v[i], q[i], levels, latitude, longitude)
#         zonals.append(np.mean(zonal_adv_int))
#         merids.append(np.mean(meridional_adv_int))
        
#     results[identifier] = {
#         'zonal': np.array(zonals),
#         'meridional': np.array(merids)
#     }


In [None]:
# # selecting a single time dimension for now 
# time = 100044
# print(era5sgp['time'].data[time])
# u_levels = era5sgp['u'][time].load().data[:,::-1,:]
# v_levels = era5sgp['v'][time].load().data[:,::-1,:]
# q_levels = era5sgp['q'][time].load().data[:,::-1,:]
# levels = era5sgp['level'].load().data
# latitude = era5sgp['latitude'].load().data[::-1]
# longitude = era5sgp['longitude'].load().data

# # Calculate vertically integrated MFC, dynamical convergence, and moisture advection
# mfc, dyn, adv = vertically_integrated_mfc(u_levels, v_levels, q_levels, levels, latitude, longitude)

# plt.hist(mfc)
# print(np.max(mfc))
# print(np.max(dyn))
# print(np.max(adv))
# plt.show()

In [None]:
# # manually checking at grid point (30,-105) -(v * dqdy)

# v = monthly_means['v'].data[:,:,::-1,:]

# q = monthly_means['q'].data[:,::-1,:]

# v = v[0][0][0][0]

# R = 6371000

# cos_factor = 1/(R*np.cos(np.radians(30)))

# dist = 0.00436

# dqdy = ((np.cos(np.radians(lats[1])))*q[0][1][0] - (np.cos(np.radians(lats[0])))*q[0][0][0])/dist

# adv = -(cos_factor)*(v*dqdy)

# q[0][1][0]