In [1]:
import numpy as np
import xarray as xr
import pandas as pd
from metpy.units import units
import metpy.constants as mpconst
#import metpy.calc as mpcalc
from glob import glob
import gc
import holoviews as hv
import hvplot.xarray
import cartopy.crs as ccrs
import geoviews as gv
import matplotlib.ticker as tkr
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
hv.extension('bokeh')
#%matplotlib inline

### Column integrate a variable that has nan values

In [2]:
def mass_weighted_vertical_integral_w_nan(variable_to_integrate, pressure_model_level_midpoint_Pa, pressure_model_level_interface_Pa, max_pressure_integral_array_Pa, min_pressure_integral_array_Pa):
    
    # Accepts both integers and arrays as min and max pressure limits
    
    # Define constants
    
    g = 9.8 # [m s^-2]
    
    # Set all model interfaces less than minimum pressure equal to minimum pressure, and more than maximum pressure to maximum pressure. This way, when you calculate "dp", these layers will not have mass.
    
    pressure_model_level_interface_Pa = pressure_model_level_interface_Pa.where(pressure_model_level_interface_Pa < max_pressure_integral_array_Pa, other = max_pressure_integral_array_Pa)
    pressure_model_level_interface_Pa = pressure_model_level_interface_Pa.where(pressure_model_level_interface_Pa > min_pressure_integral_array_Pa, other = min_pressure_integral_array_Pa)

    # Calculate delta pressure for each model level
    
    dp = pressure_model_level_midpoint_Pa.copy()
    dp.values = xr.DataArray(pressure_model_level_interface_Pa.isel(ilev = slice(1, len(pressure_model_level_interface_Pa.ilev))).values - pressure_model_level_interface_Pa.isel(ilev = slice(0, -1)).values) # Slice indexing is (inclusive start, exclusive stop)
    
    # Set dp = nan at levels missing data so mass of those levels not included in calculation of dp_total
    
    dp = dp.where(~xr.ufuncs.isnan(variable_to_integrate), drop=False, other=np.nan)

    # Mass weight each layer
    
    ci_variable = variable_to_integrate * dp / g
    
    # Integrate over levels
    
    ci_variable = ci_variable.sum('lev', min_count=1)
    dp_total = dp.sum('lev', min_count=1)
    
    # Set ci_variable to nan wherever dp_total is zero or nan
    
    ci_variable = ci_variable.where(~(dp_total==0), drop = False, other=np.nan)
    ci_variable = ci_variable.where(~xr.ufuncs.isnan(dp_total), drop = False, other=np.nan)
    
    # Calculate mass weighted vertical average over layer integrated over
    
    mwa_variable = ci_variable * g / dp_total
    
    return ci_variable, dp_total, mwa_variable

In [6]:
def haversine(lat1, lon1, lat2, lon2, R=6371.0):
    lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    distance = R * c  # Earth radius in kilometers
    return distance * 1000  # Convert to meters


# Define input directories and file names
<br>
h0:  monthly mean  (1 time step per monthly file)
<br>
h1:  daily mean  (30 time steps per 30-day file)
<br>
h2:  6-hourly  (120 time steps per 30-day file)
<br>
h3:  3-hourly  (240 time steps per 30-day file)
<br>
h4:  hourly  (240 time steps per 10-day file)

In [3]:
# Years to analyze
start_year = (1980)
end_year = (1980)

################
###.  ERA5.  ###
################

# Atmosphere

input_file_string_specific_humidity = '/Projects/era5_regrid/2p5/shum.2p5.*.nc' # Specific humidity
input_file_string_temperature = '/Projects/era5_regrid/2p5/air.2p5.*.nc' # Temperature
input_file_string_surface_pressure = '/Projects/era5_regrid/2p5/pres.sfc.2p5.*.nc' # Surface Pressure
input_file_string_v_wnd = '/Projects/era5_regrid/2p5/vwnd.2p5.*.nc' # v wind
input_file_string_u_wnd = '/Projects/era5_regrid/2p5/uwnd.2p5.*.nc' # u wind
input_file_string_hgt = '/Projects/era5_regrid/2p5/hgt.2p5.*.nc' # geopotential height
input_file_string_omega = '/Projects/era5_regrid/2p5/omega.2p5.*.nc' #vertical velocity

# Land
input_file_string_land_frac = '/glade/work/bwolding/Datasets/Data_for_Glade/ERAi/land_sea_mask.erai.2p5.nc' # Land Fraction 

### Load in data and calculations

In [42]:
# Define constants

g = 9.8 #[m s^-2]
L = 2.26e6 #[J/kg]
cp = 1005 #[J/kg-K]

#########################################
# Define paths of files we wish to load #
#########################################
    
# glob expands paths with * to a list of files, like the unix shell #

paths_specific_humidity = glob(input_file_string_specific_humidity)
paths_temperature = glob(input_file_string_temperature)
paths_surface_pressure = glob(input_file_string_surface_pressure)
paths_v_wnd = glob(input_file_string_v_wnd)
paths_u_wnd = glob(input_file_string_u_wnd)
paths_hgt = glob(input_file_string_hgt)
paths_omega = glob(input_file_string_omega)


for year in range(start_year, end_year + 1):
        
    print(year)
    
    # Define year strings #
        
    current_year_string = str(year)
            
    # Limit paths #
        
    year_limited_paths_specific_humidity = []
    year_limited_paths_temperature = []
    year_limited_paths_u_wnd = []
    year_limited_paths_v_wnd = []
    year_limited_paths_hgt = []
    year_limited_paths_omega = []
        
    for string in paths_specific_humidity:
                        
        if (current_year_string in string):
                
            year_limited_paths_specific_humidity += [string]
            
    for string in paths_temperature:
                        
        if (current_year_string in string):
                
            year_limited_paths_temperature += [string]
            
    for string in paths_u_wnd:
                        
        if (current_year_string in string):
                
            year_limited_paths_u_wnd += [string]

    for string in paths_v_wnd:
                        
        if (current_year_string in string):
                
            year_limited_paths_v_wnd += [string]

    for string in paths_hgt:
                        
        if (current_year_string in string):
                
            year_limited_paths_hgt += [string]

    for string in paths_omega:
                        
        if (current_year_string in string):
                
            year_limited_paths_omega += [string]




#####################
####  Load Data  ####
#####################

# Data is "lazy loaded", nothing is actually loaded until we "look" at data in some way #

dataset_specific_humidity = xr.open_mfdataset(year_limited_paths_specific_humidity, combine="by_coords")
dataset_temperature = xr.open_mfdataset(year_limited_paths_temperature, combine="by_coords")
dataset_u_wnd = xr.open_mfdataset(year_limited_paths_u_wnd, combine="by_coords")
dataset_v_wnd = xr.open_mfdataset(year_limited_paths_v_wnd, combine="by_coords")
dataset_hgt = xr.open_mfdataset(year_limited_paths_hgt, combine="by_coords")
dataset_omega = xr.open_mfdataset(year_limited_paths_omega, combine="by_coords")

#####################
####  Load Data  ####
#####################
              
# Make data arrays, loading only the year of interest #
full_lat = dataset_temperature['lat']
full_lon = dataset_temperature['lon']


#PS = dataset_surface_pressure['pres'].sel(time = slice(str(year)+'-01-01', str(year)+'-12-31'), lat = slice(15, -15)) # [Pa]
Q = dataset_specific_humidity['shum'].sel(time = slice(str(year)+'-01-01', str(year)+'-12-31'),lat = slice(15, -15), level = slice(70, 1000)) # [Kg/Kg]
T = dataset_temperature['air'].sel(time = slice(str(year)+'-01-01', str(year)+'-12-31'),lat = slice(15, -15), level = slice(70, 1000)) # [K]
U_wnd = dataset_u_wnd['uwnd'].sel(time = slice(str(year)+'-01-01', str(year)+'-12-31'),lat = slice(15, -15), level = slice(70, 1000)) # [K]
V_wnd = dataset_v_wnd['vwnd'].sel(time = slice(str(year)+'-01-01', str(year)+'-12-31'),lat = slice(15, -15), level = slice(70, 1000)) # [K]
HGT = dataset_hgt['hgt'].sel(time = slice(str(year)+'-01-01', str(year)+'-12-31'),lat = slice(15, -15), level = slice(70, 1000)) # [K]
OMEGA = dataset_omega['omega'].sel(time = slice(str(year)+'-01-01', str(year)+'-12-31'),lat = slice(15, -15), level = slice(70, 1000)) # [K]

# Actually load data #

Q.load()
T.load()
U_wnd.load()
V_wnd.load()
HGT.load()
OMEGA.load()




1980


In [43]:
MSE = cp*T + L*Q + g*HGT

MSE

In [44]:
latitudes = MSE['lat'].values
longitudes = MSE['lon'].values

dMSE_dlat = MSE.differentiate('lat') / (5*100000)
dMSE_dlon = MSE.differentiate('lon') / (5*100000)

udMSE_dx = U_wnd*dMSE_dlon
vdMSE_dy = V_wnd*dMSE_dlat

HADV = udMSE_dx + vdMSE_dy

In [45]:
dMSE_dp = MSE.differentiate(coord= 'level')/100

VADV = OMEGA*dMSE_dp

VADV



In [46]:
# Specify the dimension representing pressure levels
pressure_dim = 'level'  # Replace with your dataset's dimension name

# Calculate the pressure spacing
pressure_levels = MSE[pressure_dim].values
pressure_spacing = np.diff(pressure_levels)*100/g  # Calculate the differences between adjacent pressure levels
pressure_spacing = np.insert(pressure_spacing, 0, pressure_spacing[0])  # Add the first spacing back for consistent dimensions

# Broadcast pressure_spacing to match the dimensions of data_variable
pressure_spacing_broadcasted = xr.DataArray(pressure_spacing, dims=pressure_dim)


# Compute the mass-weighted vertical integral
col_MSE = (MSE* pressure_spacing_broadcasted).sum(dim=pressure_dim)
col_HADV = (HADV* pressure_spacing_broadcasted).sum(dim=pressure_dim)
col_VADV = (VADV* pressure_spacing_broadcasted).sum(dim=pressure_dim)

pressure_spacing_broadcasted, col_HADV

#commenting to see what happens



(<xarray.DataArray (level: 28)>
 array([306.12244898, 306.12244898, 255.10204082, 255.10204082,
        255.10204082, 255.10204082, 255.10204082, 255.10204082,
        510.20408163, 510.20408163, 510.20408163, 510.20408163,
        510.20408163, 510.20408163, 510.20408163, 510.20408163,
        510.20408163, 510.20408163, 255.10204082, 255.10204082,
        255.10204082, 255.10204082, 255.10204082, 255.10204082,
        255.10204082, 255.10204082, 255.10204082, 255.10204082])
 Dimensions without coordinates: level,
 <xarray.DataArray (time: 2928, lat: 13, lon: 144)>
 array([[[ 1.92705103e+01,  1.85617379e+01,  1.76468774e+01, ...,
           2.27996492e+00,  1.50200624e+00,  2.42494688e+01],
         [ 7.12486895e+00,  1.07667083e+01,  3.50620849e+01, ...,
           2.85639535e+01,  6.37411305e-02,  1.17396188e+01],
         [ 8.90755057e+01,  4.96179581e+01,  2.41543765e+01, ...,
           2.62351826e+01,  5.10775991e+01,  6.11050492e+01],
         ...,
         [-8.85790585e+00, -1

In [28]:
filepath='/Projects/era5_regrid/2p5/omega.2p5.198001.nc'

test = xr.open_dataset(filepath)
test

MSE