In [27]:
import numpy as np
import xarray as xr
import pandas as pd
#import metpy.calc as mpcalc
from glob import glob
import holoviews as hv
hv.extension('bokeh')
#%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.colors as colors
from matplotlib.colors import LinearSegmentedColormap

import cartopy.crs as ccrs
import cartopy.feature as cfeature

import sys
sys.path.append('/home/vmaithel')

from tropical_PODs.PODs.POD_utils import limit_files_to_select_years

# Define constants

g = 9.8 #[m s^-2]
L = 2.26e6 #[J/kg]
cp = 1005 #[J/kg-K]
R_e = 6.378e6 #[m]
pi = 22/7


In [28]:
def load_in_data(input_file, xarray_name, start_year, end_year, lat_lbound, lat_ubound, upper_level, lower_level):
    
    paths = glob(input_file)
    year_limited_paths = limit_files_to_select_years(paths,range(start_year,end_year+1))
    dataset = xr.open_mfdataset(year_limited_paths, combine="by_coords")
    dataset = dataset.sortby('lat', ascending=True)

    if 'level' in dataset.dims:
        data_var = dataset[xarray_name].sel(time = slice(str(start_year)+'-01-01', str(end_year)+'-12-31'), lat = slice(lat_lbound,lat_ubound), level = slice(upper_level, lower_level))
    else: 
        data_var = dataset[xarray_name].sel(time = slice(str(start_year)+'-01-01', str(end_year)+'-12-31'), lat = slice(lat_lbound,lat_ubound))

    data_var.load()

    return dataset, data_var


In [29]:
def save_output_monthly_chunks(start_year, end_year, var_data, output_dir, output_name):

    var_data = xr.DataArray(var_data, name = output_name)

    for year in range(start_year, end_year + 1):
    
        for month in range(1,13):
        
            if month == 12:
                tem = var_data.sel(time = slice(str(year)+'-'+str(month).zfill(2)+'-01',str(year)+'-'+str(month).zfill(2)+'-31'))

            else:
                last_day_of_month = pd.Timestamp(f'{str(year)}-{str(month+1).zfill(2)}-01') - pd.Timedelta(days=0.5)
                tem = var_data.sel(time = slice(str(year)+'-'+str(month).zfill(2)+'-01',last_day_of_month))


            output_file = output_dir+output_name+'.'+str(year)+str(month).zfill(2)+'.nc'
            tem.to_netcdf(output_file)

### Column integrate a variable that has nan values

In [30]:
def mass_weighted_vertical_integral_w_nan(variable_to_integrate, pressure_model_level_midpoint_Pa, pressure_model_level_interface_Pa, max_pressure_integral_array_Pa, min_pressure_integral_array_Pa):
    
    # Accepts both integers and arrays as min and max pressure limits
    
    # Define constants
    
    g = 9.8 # [m s^-2]
    
    # Set all model interfaces less than minimum pressure equal to minimum pressure, and more than maximum pressure to maximum pressure. This way, when you calculate "dp", these layers will not have mass.
    
    pressure_model_level_interface_Pa = pressure_model_level_interface_Pa.where(pressure_model_level_interface_Pa < max_pressure_integral_array_Pa, other = max_pressure_integral_array_Pa)
    pressure_model_level_interface_Pa = pressure_model_level_interface_Pa.where(pressure_model_level_interface_Pa > min_pressure_integral_array_Pa, other = min_pressure_integral_array_Pa)

    # Calculate delta pressure for each model level
    
    dp = pressure_model_level_midpoint_Pa.copy()
    dp.values = xr.DataArray(pressure_model_level_interface_Pa.isel(ilev = slice(1, len(pressure_model_level_interface_Pa.ilev))).values - pressure_model_level_interface_Pa.isel(ilev = slice(0, -1)).values) # Slice indexing is (inclusive start, exclusive stop)
    
    # Set dp = nan at levels missing data so mass of those levels not included in calculation of dp_total
    
    dp = dp.where(~xr.ufuncs.isnan(variable_to_integrate), drop=False, other=np.nan)

    # Mass weight each layer
    
    ci_variable = variable_to_integrate * dp / g
    
    # Integrate over levels
    
    ci_variable = ci_variable.sum('lev', min_count=1)
    dp_total = dp.sum('lev', min_count=1)
    
    # Set ci_variable to nan wherever dp_total is zero or nan
    
    ci_variable = ci_variable.where(~(dp_total==0), drop = False, other=np.nan)
    ci_variable = ci_variable.where(~xr.ufuncs.isnan(dp_total), drop = False, other=np.nan)
    
    # Calculate mass weighted vertical average over layer integrated over
    
    mwa_variable = ci_variable * g / dp_total
    
    return ci_variable, dp_total, mwa_variable

# Define input directories and file names
<br>
h0:  monthly mean  (1 time step per monthly file)
<br>
h1:  daily mean  (30 time steps per 30-day file)
<br>
h2:  6-hourly  (120 time steps per 30-day file)
<br>
h3:  3-hourly  (240 time steps per 30-day file)
<br>
h4:  hourly  (240 time steps per 10-day file)

In [31]:
# Years to analyze
start_year = (2005)
end_year = (2015)

lat_lbound = -15 
lat_ubound = 15

upper_level = 100
lower_level = 1000

################
###.  ERA5.  ###
################

# Atmosphere

input_file_string_specific_humidity = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/shum.2p5.*.daily.nc' # Specific humidity
input_file_string_temperature = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/air.2p5.*.daily.nc' # Temperature
input_file_string_surface_pressure = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/pres.sfc.2p5.*.daily.nc' # Surface Pressure
input_file_string_v_wnd = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/vwnd.2p5.*.daily.nc' # v wind
input_file_string_u_wnd = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/uwnd.2p5.*.daily.nc' # u wind
input_file_string_hgt = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/hgt.2p5.*.daily.nc' # geopotential height
input_file_string_omega = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/omega.2p5.*.daily.nc' #vertical velocity

input_file_string_latent_flux = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/mslhf.2p5.*.daily.nc'
input_file_string_sensible_flux = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/msshf.2p5.*.daily.nc'
input_file_string_longwave_flux_surface = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/msnlwrf.2p5.*.daily.nc'
input_file_string_longwave_flux_top = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/mtnlwrf.2p5.*.daily.nc'
input_file_string_shortwave_flux_surface = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/msnswrf.2p5.*.daily.nc'
input_file_string_shortwave_flux_top = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/mtnswrf.2p5.*.daily.nc'

#Load in data

dataset_specific_humidity, Q = load_in_data(input_file_string_specific_humidity,'shum',start_year, end_year, lat_lbound, lat_ubound, upper_level, lower_level)
dataset_temperature, T = load_in_data(input_file_string_temperature,'air',start_year, end_year, lat_lbound, lat_ubound,  upper_level, lower_level)
dataset_u_wnd, U_wnd = load_in_data(input_file_string_u_wnd,'uwnd',start_year, end_year, lat_lbound, lat_ubound,  upper_level, lower_level)
dataset_v_wnd, V_wnd = load_in_data(input_file_string_v_wnd,'vwnd',start_year, end_year, lat_lbound, lat_ubound,  upper_level, lower_level)
dataset_hgt, HGT = load_in_data(input_file_string_hgt,'hgt',start_year, end_year, lat_lbound, lat_ubound,  upper_level, lower_level)
dataset_omega, OMEGA = load_in_data(input_file_string_omega,'omega',start_year, end_year, lat_lbound, lat_ubound,  upper_level, lower_level)

dataset_lhf, LHF = load_in_data(input_file_string_latent_flux,'mslhf',start_year, end_year, lat_lbound, lat_ubound,  upper_level, lower_level) 
dataset_shf, SHF = load_in_data(input_file_string_sensible_flux,'msshf',start_year, end_year, lat_lbound, lat_ubound,  upper_level, lower_level) 
dataset_longwave_surf, LW_surf = load_in_data(input_file_string_longwave_flux_surface,'msnlwrf',start_year, end_year, lat_lbound, lat_ubound,  upper_level, lower_level) 
dataset_longwave_top, LW_top = load_in_data(input_file_string_longwave_flux_top,'mtnlwrf',start_year, end_year, lat_lbound, lat_ubound,  upper_level, lower_level) 
dataset_shortwave_surf, SW_surf = load_in_data(input_file_string_shortwave_flux_surface,'msnswrf',start_year, end_year, lat_lbound, lat_ubound,  upper_level, lower_level) 
dataset_shortwave_top, SW_top = load_in_data(input_file_string_shortwave_flux_top,'mtnswrf',start_year, end_year, lat_lbound, lat_ubound,  upper_level, lower_level) 

# Land
#input_file_string_land_frac = '/glade/work/bwolding/Datasets/Data_for_Glade/ERAi/land_sea_mask.erai.2p5.nc' # Land Fraction 

### Load in data and calculations

In [32]:
T

In [33]:
MSE = cp*T + L*Q + g*HGT



In [34]:

latitudes = np.radians(MSE['lat'].values)
longitudes = np.radians(MSE['lon'].values)

print('#1',latitudes)

lon_grid, lat_grid = np.meshgrid(longitudes, latitudes)

print('#2 lat_grid', lat_grid)

dlon = np.gradient(lon_grid)
dlon_grid = dlon[1]
dlon_grid = R_e * np.cos(lat_grid) * dlon_grid
print('#3',dlon_grid)


dlat = np.gradient(lat_grid)
dlat_grid = dlat[0]
dlat_grid = R_e * dlat_grid

print('#4',dlat_grid)


#1 [-0.2617994  -0.21816616 -0.17453292 -0.1308997  -0.08726646 -0.04363323
  0.          0.04363323  0.08726646  0.1308997   0.17453292  0.21816616
  0.2617994 ]
#2 lat_grid [[-0.2617994  -0.2617994  -0.2617994  ... -0.2617994  -0.2617994
  -0.2617994 ]
 [-0.21816616 -0.21816616 -0.21816616 ... -0.21816616 -0.21816616
  -0.21816616]
 [-0.17453292 -0.17453292 -0.17453292 ... -0.17453292 -0.17453292
  -0.17453292]
 ...
 [ 0.17453292  0.17453292  0.17453292 ...  0.17453292  0.17453292
   0.17453292]
 [ 0.21816616  0.21816616  0.21816616 ...  0.21816616  0.21816616
   0.21816616]
 [ 0.2617994   0.2617994   0.2617994  ...  0.2617994   0.2617994
   0.2617994 ]]
#3 [[268810.16 268810.16 268810.2  ... 268810.1  268810.1  268811.56]
 [271696.1  271696.1  271696.12 ... 271696.06 271696.06 271697.53]
 [274064.84 274064.84 274064.84 ... 274064.78 274064.78 274066.28]
 ...
 [274064.84 274064.84 274064.84 ... 274064.78 274064.78 274066.28]
 [271696.1  271696.1  271696.12 ... 271696.06 271696.06 271

In [35]:
dMSE_dlat = np.gradient(MSE,axis = 2)
dMSE_dlon = np.gradient(MSE,axis = 3)

dlat_full = np.tile(dlat_grid, (MSE.shape[0],MSE.shape[1],1,1))
dlon_full = np.tile(dlon_grid, (MSE.shape[0],MSE.shape[1],1,1))

dMSE_dlat = dMSE_dlat/dlat_full
dMSE_dlon = dMSE_dlon/dlon_full

udMSE_dx = -U_wnd*dMSE_dlon #zonal advection
vdMSE_dy = -V_wnd*dMSE_dlat #meridional advection

HADV = udMSE_dx + vdMSE_dy

# negative signs for 


In [36]:
dMSE_dp = MSE.differentiate(coord= 'level')/100
VADV = -OMEGA*dMSE_dp

#VADV



In [37]:
HADV

In [38]:
def column_vertical_integral(data_to_integrate):

    # Specify the dimension representing pressure levels
    pressure_dim = 'level'  # Replace with your dataset's dimension name
    
    # Calculate the pressure spacing
    pressure_levels = MSE[pressure_dim].values
    pressure_spacing = np.diff(pressure_levels)*100/g  # Calculate the differences between adjacent pressure levels
    
    # Broadcast pressure_spacing to match the dimensions of data_variable
    pressure_spacing_broadcasted = xr.DataArray(pressure_spacing, dims=pressure_dim)

    #calculate mean value of data in each pressure layer

    data_to_integrate_level_mean = xr.DataArray((data_to_integrate.isel(level = slice(0,-1)).values + data_to_integrate.isel(level = slice(1,None)).values)/2)
    data_to_integrate_level_mean = xr.DataArray(data_to_integrate_level_mean, dims = data_to_integrate.dims)
    
    integrated_data = (data_to_integrate_level_mean*pressure_spacing_broadcasted).sum(dim=pressure_dim)
    
    # Assign coordinate values for integrated data
    tem_coord_1 = data_to_integrate['time'].values
    tem_coord_2 = data_to_integrate['lat'].values
    tem_coord_3 = data_to_integrate['lon'].values

    integrated_data = integrated_data.assign_coords(coords = dict(time = ("time", tem_coord_1), lat = ("lat", tem_coord_2), lon = ("lon", tem_coord_3)))

 
    return integrated_data


In [39]:

# Compute the mass-weighted vertical integral

col_MSE = column_vertical_integral(MSE)
col_HADV = column_vertical_integral(HADV)
col_VADV = column_vertical_integral(VADV)
col_HADV_zonal = column_vertical_integral(udMSE_dx)
col_HADV_meridional = column_vertical_integral(vdMSE_dy)




print(col_MSE)
#commenting to see what happens



<xarray.DataArray (time: 4017, lat: 13, lon: 144)>
array([[[3.02636798e+09, 3.04080202e+09, 3.05951375e+09, ...,
         3.02007345e+09, 3.02002447e+09, 3.01840631e+09],
        [3.03447367e+09, 3.03994125e+09, 3.05522676e+09, ...,
         3.02473299e+09, 3.02900344e+09, 3.03165549e+09],
        [3.04163749e+09, 3.04203559e+09, 3.04293610e+09, ...,
         3.03731708e+09, 3.03646983e+09, 3.03705129e+09],
        ...,
        [3.01605319e+09, 3.01879974e+09, 3.01815912e+09, ...,
         3.03984477e+09, 3.01988019e+09, 3.01584180e+09],
        [3.00761609e+09, 3.01078241e+09, 3.01448019e+09, ...,
         3.01056569e+09, 3.00798123e+09, 3.00646317e+09],
        [2.99014929e+09, 2.99131678e+09, 2.99340411e+09, ...,
         2.99915080e+09, 2.99430748e+09, 2.98905844e+09]],

       [[3.06918273e+09, 3.07619310e+09, 3.07151037e+09, ...,
         3.02872837e+09, 3.04175451e+09, 3.05645514e+09],
        [3.06986515e+09, 3.07615235e+09, 3.07533437e+09, ...,
         3.03126742e+09, 3.04186

In [40]:
#### Surface and radiative flux calculation

surface_flux = - LHF - SHF

net_rad_flux = -LW_surf - SW_surf + SW_top + LW_top

In [41]:
### MSE tendency calculation

MSE_tend = col_MSE.differentiate(coord= 'time',datetime_unit="s")

### Save budget terms in monthly chunks

In [42]:
# Output directory for datasets
odir_datasets = '/home/vmaithel/MSE_budgets/2p5_vijit/'

name_string_MSE = 'MSE'
name_string_colMSE = 'col_MSE'
save_output_monthly_chunks(start_year, end_year, MSE, odir_datasets, name_string_MSE)
save_output_monthly_chunks(start_year, end_year, col_MSE, odir_datasets, name_string_colMSE)

name_string_MSE_tend = 'MSE_tend'
save_output_monthly_chunks(start_year, end_year, MSE_tend, odir_datasets, name_string_MSE_tend)

name_string_VADV = 'VADV'
name_string_colVADV = 'col_VADV'
save_output_monthly_chunks(start_year, end_year, VADV, odir_datasets, name_string_VADV)
save_output_monthly_chunks(start_year, end_year, col_VADV, odir_datasets, name_string_colVADV)

name_string_HADV = 'HADV'
name_string_colHADV = 'col_HADV'
save_output_monthly_chunks(start_year, end_year, HADV, odir_datasets, name_string_HADV)
save_output_monthly_chunks(start_year, end_year, col_HADV, odir_datasets, name_string_colHADV)

name_string_HADVzonal = 'HADV_zonal'
name_string_colHADVzonal = 'col_HADV_zonal'
save_output_monthly_chunks(start_year, end_year, udMSE_dx, odir_datasets, name_string_HADVzonal)
save_output_monthly_chunks(start_year, end_year, col_HADV_zonal, odir_datasets, name_string_colHADVzonal)

name_string_HADVmeridional = 'HADV_meridional'
name_string_colHADVmeridional = 'col_HADV_meridional'
save_output_monthly_chunks(start_year, end_year, vdMSE_dy, odir_datasets, name_string_HADVmeridional)
save_output_monthly_chunks(start_year, end_year, col_HADV_meridional, odir_datasets, name_string_colHADVmeridional)

name_string_surf_flux = 'SF'
save_output_monthly_chunks(start_year, end_year, surface_flux, odir_datasets, name_string_surf_flux)

name_string_rad_flux_net = 'RadF_net'
save_output_monthly_chunks(start_year, end_year, net_rad_flux, odir_datasets, name_string_rad_flux_net)


In [44]:
test = MSE.groupby('time.dayofyear').mean(dim='time')

