# Loading modules

In [1]:
import xarray as xr
import numpy as np
import sys
from glob import glob
import gc
import pandas as pd
import matplotlib.pyplot as plt

sys.path.append('../../')
"""
local scripts, if loading from a different directory include that with a '.' between
directory name and script name
"""
from tropical_PODs.PODs.POD_utils import calculate_saturation_specific_humidity
from tropical_PODs.PODs.POD_utils import mass_weighted_vertical_integral_w_nan

# Define input directories and file names

In [2]:
# Years to analyze
start_year = (2011)
end_year = (2011)

################
###.  ERA5.  ###
################

# Atmosphere

ifile_specific_humidity = '../../tropical_PODs/data/shum.2p5.*.nc' # ERA5 Specific Humidity
ifile_temperature = '../../tropical_PODs/data/air.2p5.*.nc' # ERA5 Temperature
ifile_surface_pressure = '../../tropical_PODs/data/pres.sfc.2p5.*.nc' # ERA5 Surface Pressure
ifile_precipitation = '../../tropical_PODs/data/3B-DAY.MS.MRG.3IMERG.V06.*' # IMERG Precipitation

# Land
ifile_land_frac = '../../tropical_PODs/data/land_sea_mask.erai.2p5.nc' # ERAi Land Fraction 


# Define output directories and file names

In [3]:
# Output directory for datasets string list
odir_datasets = '../../tropical_PODs/examples/ofiles_examples/' # ERA5 2p5_1d

# Driver for calculations

In [4]:
# Define constants
    
g = 9.8 # [m s^-2]

#########################################
# Define paths of files we wish to load #
#########################################
    
# glob expands paths with * to a list of files, like the unix shell #

paths_specific_humidity = glob(ifile_specific_humidity)
paths_temperature = glob(ifile_temperature)
paths_surface_pressure = glob(ifile_surface_pressure)
paths_precipitation = glob(ifile_precipitation)
paths_land = glob(ifile_land_frac)
        
for year in range(start_year, end_year + 1):
        
    print(year)
            
    # Define year strings #
        
    previous_year_string = str(year - 1)
    current_year_string = str(year)
    next_year_string = str(year + 1)
            
    while len(previous_year_string) < 4:
        previous_year_string = '0' + previous_year_string
                
    while len(current_year_string) < 4:
        current_year_string = '0' + current_year_string
                
    while len(next_year_string) < 4:
        next_year_string = '0' + next_year_string
            
    # Limit paths #
        
    year_limited_paths_specific_humidity = []
    year_limited_paths_temperature = []
    year_limited_paths_surface_pressure = []
    year_limited_paths_precipitation = []
            
    for string in paths_specific_humidity:
                        
        if (current_year_string in string):
                
            year_limited_paths_specific_humidity += [string]
            
    for string in paths_temperature:
                        
        if (current_year_string in string):
                
            year_limited_paths_temperature += [string]
            
    for string in paths_surface_pressure:
                        
        if (current_year_string in string):
                
            year_limited_paths_surface_pressure += [string]
                
    for string in paths_precipitation:
                        
        if (current_year_string in string):
                
            year_limited_paths_precipitation += [string]
    
    #####################
    ####  Load Data  ####
    #####################

    # Data is "lazy loaded", nothing is actually loaded until we "look" at data in some way #

    dataset_specific_humidity = xr.open_mfdataset(year_limited_paths_specific_humidity, combine="by_coords")
    dataset_temperature = xr.open_mfdataset(year_limited_paths_temperature, combine="by_coords")
    dataset_surface_pressure = xr.open_dataset(year_limited_paths_surface_pressure[0])
    dataset_precipitation = xr.open_mfdataset(year_limited_paths_precipitation, combine="by_coords")
    dataset_land = xr.open_dataset(paths_land[0])

    #####################
    ####  Load Data  ####
    #####################
              
    # Make data arrays, loading only the year of interest #
    full_lat = dataset_surface_pressure['lat']
    full_lon = dataset_surface_pressure['lon']
    land_sea_mask = dataset_land['land_sea_mask']

    PS = dataset_surface_pressure['pres'].sel(time = slice(previous_year_string+'-12-31', next_year_string+'-01-01'), lat = slice(10, -10)) # [Pa]
    Q = dataset_specific_humidity['shum'].sel(time = slice(previous_year_string+'-12-31', next_year_string+'-01-01'),lat = slice(10, -10), level = slice(70, 1000)) # [Kg/Kg]
    T = dataset_temperature['air'].sel(time = slice(previous_year_string+'-12-31', next_year_string+'-01-01'),lat = slice(10, -10), level = slice(70, 1000)) # [K]
    precipitation_rate = dataset_precipitation['precipAvg'].sel(time = slice(previous_year_string+'-12-31', next_year_string+'-01-01'), lat = slice(-10, 10)) * (24) # Currently [mm/hr]. Convert to [mm/day]

    # Actually load data #
    land_sea_mask.load()
    PS.load()
    Q.load()
    T.load()
    precipitation_rate.load()

    # Clean up environment #
    
    gc.collect();
 
    ################################
    ####  Average Data to Daily ####
    ################################
    
    ###   Test for Averaging Method   ###
            
    #PS.sel(time=slice('1998-01-01','1998-01-01'),lat=5,lon=75).plot()
    #print(PS.resample(time='1D').mean('time').sel(time=slice('1998-01-01','1998-01-01'),lat=5,lon=75))
    #print(PS.sel(time=slice('1998-01-01','1998-01-01'),lat=5,lon=75).mean('time'))
    #print(PS.resample(time='1D').mean('time').sel(time=slice('1998-01-01','1998-01-01'),lat=5,lon=75).values == PS.sel(time=slice('1998-01-01','1998-01-01'),lat=5,lon=75).mean('time').values)
    
    ###   Perform Averaging   ###
            
    PS = PS.resample(time='1D').mean('time')
    Q = Q.resample(time='1D').mean('time')
    T = T.resample(time='1D').mean('time')
    precipitation_rate = precipitation_rate.resample(time='1D').mean('time')

    precipitation_rate['time'] = precipitation_rate.indexes['time'].to_datetimeindex() # IMERG time was saved as CFtime, and we need to convert to datetime for xarray

    # ### Update time to reflect center of daily average   ###
    
    # PS = PS.assign_coords({'time':PS['time']+pd.to_timedelta(10.5, unit='H')})
    # Q = Q.assign_coords({'time':Q['time']+pd.to_timedelta(10.5, unit='H')})
    # T = T.assign_coords({'time':T['time']+pd.to_timedelta(10.5, unit='H')})


2011


  precipitation_rate['time'] = precipitation_rate.indexes['time'].to_datetimeindex() # IMERG time was saved as CFtime, and we need to convert to datetime for xarray


In [5]:
print(T)
print(precipitation_rate)

<xarray.DataArray 'air' (time: 365, level: 28, lat: 9, lon: 144)>
array([[[[194.76683, 194.78355, 195.18907, ..., 195.5117 , 195.2788 ,
          194.97557],
         [195.58446, 196.01256, 196.55687, ..., 196.5554 , 196.24864,
          195.91283],
         [197.46617, 197.82664, 197.57896, ..., 197.98375, 197.68016,
          197.22826],
         ...,
         [200.12938, 199.89183, 199.4818 , ..., 200.1964 , 199.81944,
          200.0161 ],
         [199.7307 , 199.79503, 199.67833, ..., 199.14049, 199.01964,
          199.41734],
         [198.60997, 198.56541, 199.06664, ..., 198.09691, 198.15404,
          198.49779]],

        [[195.3222 , 195.3377 , 195.00018, ..., 195.95612, 195.43024,
          195.33478],
         [195.16656, 194.76129, 194.4079 , ..., 195.35431, 195.25311,
          195.27203],
         [194.23444, 193.69586, 193.2829 , ..., 194.13788, 194.25006,
          194.26898],
...
         [294.32788, 293.9131 , 294.1748 , ..., 294.06177, 294.0005 ,
          294.29

In [None]:

    ###############################################
    ####  Modify "landfrac" Variable as Needed ####
    ###############################################
    
    print("Modifying landfrac as needed")

    landfrac = land_sea_mask.rename({'Latitude':'lat','Longitude':'lon'})
    landfrac = landfrac.rename('landfrac')
    print(landfrac)
    
    # The landfrac variable does not have lat/lon coordinates. Assign those of variables and check to make sure they make sense #
    
    #print(landfrac.coords['lat'])
    landfrac.coords['lat'] = full_lat.coords['lat']
    landfrac.coords['lon'] = full_lon.coords['lon']

    landfrac = landfrac.transpose()
    
    # Clean up environment #
    
    gc.collect();
    
    #####################################
    ####  Modify variables as needed ####
    #####################################
    
    PS = PS.rename('PS')
    PS = PS.transpose('time','lat','lon')
    PS = PS.sortby('lat', ascending=True) # Re-order lat to match code for other datasets
    #print(PS)
    
    Q = Q.rename({'level':'lev'})
    Q = Q.rename('Q')
    Q = Q.transpose('time','lev','lat','lon')
    Q = Q.sortby('lat', ascending=True) # Re-order lat to match code for other datasets
    #print(Q)
    
    T = T.rename({'level':'lev'})
    T = T.rename('T')
    T = T.transpose('time','lev','lat','lon')
    T = T.sortby('lat', ascending=True) # Re-order lat to match code for other datasets
    #print(T)

    precipitation_rate = precipitation_rate.rename('precipitation_rate')
    precipitation_rate = precipitation_rate.transpose('time','lat','lon')
    precipitation_rate = precipitation_rate.sortby('lat', ascending=True) # Re-order lat to match code for other datasets
    
    landfrac = landfrac.sortby('lat', ascending=True) # Re-order lat to match code for other datasets
    
    # Clean up environment #
    
    gc.collect();

    #########################################
    ####  Calculate True Model Pressure  ####
    #########################################

    print("Calculating true model pressure")
    
    # Set upper most interface equal to uppermost level midpoint, and lowest interface equal to surface pressure.
    # This will still permit the desired vertical integral, just choose appropriate upper and lower integration limits
    
    # Model level midpoint

    true_pressure_midpoint = Q['lev'] * 100. # To convert to Pa
    true_pressure_midpoint = true_pressure_midpoint.rename('true_pressure_midpoint_Pa')
    true_pressure_midpoint = true_pressure_midpoint.expand_dims({'lat':Q['lat'], 'lon':Q['lon'], 'time':Q['time']})
    true_pressure_midpoint = true_pressure_midpoint.transpose('time','lev','lat','lon')
    
    # Model level interfaces
    
    true_pressure_interface = np.empty((len(Q.time),len(Q.lat),len(Q.lon),len(Q.lev)+1))

    for interface_level_counter in range(len(Q.lev) + 1):
        if interface_level_counter == 0:
            true_pressure_interface[:,:,:,interface_level_counter] = Q['lev'].isel(lev=0).values # Set upper most interface equal to uppermost level midpoint
        elif interface_level_counter == (len(Q.lev)):
            true_pressure_interface[:,:,:,interface_level_counter] = PS # Set lowest interface equal to surface pressure
        else:
            true_pressure_interface[:,:,:,interface_level_counter] = (Q['lev'].isel(lev=interface_level_counter-1).values + Q['lev'].isel(lev=interface_level_counter).values) / 2.,  # Set middle interfaces equal to half way points between level midpoints
            
    coords = {'time':Q['time'], 'lat':Q['lat'], 'lon':Q['lon'], 'ilev':np.arange(1,len(Q.lev) + 2)}
    dims = ['time', 'lat', 'lon', 'ilev']
    true_pressure_interface = xr.DataArray(true_pressure_interface,dims=dims,coords=coords) * 100. # To convert to Pa
    true_pressure_interface.attrs['units'] = 'Pa'      
    
    true_pressure_interface = true_pressure_interface.transpose('time','ilev','lat','lon')

    # Clean up environment #
    
    gc.collect();
  
    ########################
    ###  Calculate CSF  ####
    ########################
    
    ####  Calculate Saturation Specific Humidity  ####

    print("Calculating saturation specific humidity")

    saturation_specific_humidity = xr.apply_ufunc(calculate_saturation_specific_humidity, true_pressure_midpoint, T,
                                            output_dtypes=[Q.dtype])
    
    # Clean up environment #
    
    gc.collect();

    ####  Column Integrate Variables  ####
        
    upper_level_integration_limit_Pa = 10000 # [Pa]
        
    lower_level_integration_limit_Pa = 100000 # [Pa]

    print('Column Integrating')
        
    ci_q, _, _ = mass_weighted_vertical_integral_w_nan(Q, true_pressure_midpoint, true_pressure_interface, lower_level_integration_limit_Pa, upper_level_integration_limit_Pa)
    #print(ci_q)
    #print(ci_q.min())
    #print(ci_q.max())
    #print(ci_q.mean())
    #plt.figure()
    #ci_q.isel(time = 0).plot()
        
    ci_q_sat, _, _ = mass_weighted_vertical_integral_w_nan(saturation_specific_humidity, true_pressure_midpoint, true_pressure_interface, lower_level_integration_limit_Pa, upper_level_integration_limit_Pa)
    #print(ci_q_sat)
    #print(ci_q_sat.min())
    #print(ci_q_sat.max())
    #print(ci_q_sat.mean())
    #plt.figure()
    #ci_q_sat.isel(time = 0).plot()
        
    csf = ci_q / ci_q_sat
    print(csf)
    print(csf.min())
    print(csf.max())
    plt.figure()
    csf.isel(time = 0).plot()
    
    # Name variables #

    csf.name = 'csf'
    csf.attrs['Units'] = '[Kg Kg^-1]'

    # Clean up environment #

    del Q, T, true_pressure_midpoint, true_pressure_interface, saturation_specific_humidity, ci_q, ci_q_sat
        
    gc.collect();
   
    #################################
    ####  Output Data as NetCDF  ####
    #################################

    # Output dataset to NetCDF #

    csf.sel(time = slice(current_year_string+'-01-01', current_year_string+'-12-31').to_netcdf(odir_datasets + 'initial_test_CSF_' + current_year_string + '.nc')



In [None]:
###############################################
        ####  Limit to Oceanic (<10% Land) Points  ####
        ###############################################
        
        print('Applying Land/Ocean Mask')
        
        # Create ocean mask #

        is_valid_ocean_mask = (landfrac < 0.1)
#         is_valid_ocean_mask_TRMM_2A23 = (landfrac.interp_like(TRMM_2A23_shallow) < 0.1)

        #is_valid_ocean_mask.plot()

        # Apply ocean mask to appropriate variables, setting invalid locations to nan #
        
        precipitation_rate = precipitation_rate.where(is_valid_ocean_mask, other = np.nan)
        
#         rain_MCS = rain_MCS.where(is_valid_ocean_mask, other = np.nan)
#         rain_nonMCS = rain_nonMCS.where(is_valid_ocean_mask, other = np.nan)
#         rain_nonDeep = rain_nonDeep.where(is_valid_ocean_mask, other = np.nan)
        
#         TRMM_2A23_shallow = TRMM_2A23_shallow.where(is_valid_ocean_mask_TRMM_2A23, other = np.nan)
#         TRMM_2A23_conv = TRMM_2A23_conv.where(is_valid_ocean_mask_TRMM_2A23, other = np.nan)
#         TRMM_2A23_strat = TRMM_2A23_strat.where(is_valid_ocean_mask_TRMM_2A23, other = np.nan)

        CAPE_DIB_1000_to_600 = CAPE_DIB_1000_to_600.where(is_valid_ocean_mask, other = np.nan)
        CAPE_DIBDBL_1000_to_600 = CAPE_DIBDBL_1000_to_600.where(is_valid_ocean_mask, other = np.nan)
        CAPE_NOMIX_1000_to_600 = CAPE_NOMIX_1000_to_600.where(is_valid_ocean_mask, other = np.nan)
        
        T_1000_hPa= T_1000_hPa.where(is_valid_ocean_mask, other = np.nan)
        Q_1000_hPa= Q_1000_hPa.where(is_valid_ocean_mask, other = np.nan)
        DSE_1000_hPa= DSE_1000_hPa.where(is_valid_ocean_mask, other = np.nan)
        MSE_1000_hPa= MSE_1000_hPa.where(is_valid_ocean_mask, other = np.nan)
        T_850_hPa= T_850_hPa.where(is_valid_ocean_mask, other = np.nan)
        Q_850_hPa= Q_850_hPa.where(is_valid_ocean_mask, other = np.nan)
        DSE_850_hPa= DSE_850_hPa.where(is_valid_ocean_mask, other = np.nan)
        MSE_850_hPa= MSE_850_hPa.where(is_valid_ocean_mask, other = np.nan)
        T_600_hPa= T_600_hPa.where(is_valid_ocean_mask, other = np.nan)
        Q_600_hPa= Q_600_hPa.where(is_valid_ocean_mask, other = np.nan)
        DSE_600_hPa= DSE_600_hPa.where(is_valid_ocean_mask, other = np.nan)
        MSE_600_hPa= MSE_600_hPa.where(is_valid_ocean_mask, other = np.nan)
        mwa_T_1000_850= mwa_T_1000_850.where(is_valid_ocean_mask, other = np.nan)
        mwa_Q_1000_850= mwa_Q_1000_850.where(is_valid_ocean_mask, other = np.nan)
        mwa_DSE_1000_850= mwa_DSE_1000_850.where(is_valid_ocean_mask, other = np.nan)
        mwa_MSE_1000_850= mwa_MSE_1000_850.where(is_valid_ocean_mask, other = np.nan)
        mwa_T_850_600= mwa_T_850_600.where(is_valid_ocean_mask, other = np.nan)
        mwa_Q_850_600= mwa_Q_850_600.where(is_valid_ocean_mask, other = np.nan)
        mwa_DSE_850_600= mwa_DSE_850_600.where(is_valid_ocean_mask, other = np.nan)
        mwa_MSE_850_600= mwa_MSE_850_600.where(is_valid_ocean_mask, other = np.nan)
        mwa_T_1000_950= mwa_T_1000_950.where(is_valid_ocean_mask, other = np.nan)
        mwa_Q_1000_950= mwa_Q_1000_950.where(is_valid_ocean_mask, other = np.nan)
        mwa_DSE_1000_950= mwa_DSE_1000_950.where(is_valid_ocean_mask, other = np.nan)
        mwa_MSE_1000_950= mwa_MSE_1000_950.where(is_valid_ocean_mask, other = np.nan)
        mwa_T_950_800= mwa_T_950_800.where(is_valid_ocean_mask, other = np.nan)
        mwa_Q_950_800= mwa_Q_950_800.where(is_valid_ocean_mask, other = np.nan)
        mwa_DSE_950_800= mwa_DSE_950_800.where(is_valid_ocean_mask, other = np.nan)
        mwa_MSE_950_800= mwa_MSE_950_800.where(is_valid_ocean_mask, other = np.nan)
        mwa_T_800_600= mwa_T_800_600.where(is_valid_ocean_mask, other = np.nan)
        mwa_Q_800_600= mwa_Q_800_600.where(is_valid_ocean_mask, other = np.nan)
        mwa_DSE_800_600= mwa_DSE_800_600.where(is_valid_ocean_mask, other = np.nan)
        mwa_MSE_800_600= mwa_MSE_800_600.where(is_valid_ocean_mask, other = np.nan)
        mwa_T_600_300= mwa_T_600_300.where(is_valid_ocean_mask, other = np.nan)
        mwa_Q_600_300= mwa_Q_600_300.where(is_valid_ocean_mask, other = np.nan)
        mwa_DSE_600_300= mwa_DSE_600_300.where(is_valid_ocean_mask, other = np.nan)
        mwa_MSE_600_300= mwa_MSE_600_300.where(is_valid_ocean_mask, other = np.nan)
        mwa_T_300_100= mwa_T_300_100.where(is_valid_ocean_mask, other = np.nan)
        mwa_Q_300_100= mwa_Q_300_100.where(is_valid_ocean_mask, other = np.nan)
        mwa_DSE_300_100= mwa_DSE_300_100.where(is_valid_ocean_mask, other = np.nan)
        mwa_MSE_300_100= mwa_MSE_300_100.where(is_valid_ocean_mask, other = np.nan)
                
        csf = csf.where(is_valid_ocean_mask, other = np.nan)