# Loading modules

In [38]:
import xarray as xr
import numpy as np
import sys
from glob import glob
import gc
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

import sys
sys.path.append('/home/vmaithel')

g = 9.8 #[m s^-2]
L = 2.26e6 #[J/kg]
cp = 1005 #[J/kg-K]
R_e = 6.378e6 #[m]
pi = 22/7
"""
local scripts, if loading from a different directory include that with a '.' between
directory name and script name
"""
from tropical_PODs.PODs.POD_utils import calculate_saturation_specific_humidity
from tropical_PODs.PODs.POD_utils import mass_weighted_vertical_integral_w_nan
from tropical_PODs.PODs.POD_utils import limit_files_to_select_years
from tropical_PODs.PODs.POD_utils import calculate_one_variable_binned_ivar_composites
from tropical_PODs.PODs.POD_utils import calculate_two_variable_binned_ivar_composites
from tropical_PODs.PODs.POD_utils import calculate_two_variable_binned_coevolution_composites
from tropical_PODs.PODs.POD_utils import process_multiyear_one_variable_binned_ivar_composites
from tropical_PODs.PODs.POD_utils import process_multiyear_two_variable_binned_ivar_composites
from tropical_PODs.PODs.POD_utils import process_multiyear_two_variable_binned_coevolution_composites
from tropical_PODs.PODs.plotting_utils import plot_one_variable_binned_ivar
from tropical_PODs.PODs.plotting_utils import plot_two_variables_binned_ivar

# Define input directories and file names

In [39]:
# Years to analyze
start_year = (2011)
end_year = (2012)

################
###.  ERA5.  ###
################

# Atmosphere

ifile_specific_humidity = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/shum.2p5.*.nc' # ERA5 Specific Humidity
ifile_temperature = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/air.2p5.*.nc' # ERA5 Temperature
ifile_surface_pressure = '/Projects/era5_regrid/2p5_benedict/ERA5_2.5deg_daily/pres.sfc.2p5.*.nc' # ERA5 Surface Pressure
ifile_precipitation = '/Projects/era5_regrid/IMERG/3B-DAY.MS.MRG.3IMERG.V06.*' # IMERG Precipitation


# Define output directories

In [40]:
# Output directory for datasets
odir_datasets = '/home/vmaithel/MSE_budgets/cape_data/'

# Output directory for plots
odir_plots = '/home/vmaithel/plots/'


# Driver for calculations

In [41]:
# Define constants
    
g = 9.8 # [m s^-2]

#########################################
# Define paths of files we wish to load #
#########################################
    
# glob expands paths with * to a list of files, like the unix shell #

paths_specific_humidity = glob(ifile_specific_humidity)
paths_temperature = glob(ifile_temperature)
paths_surface_pressure = glob(ifile_surface_pressure)
paths_precipitation = glob(ifile_precipitation)

for year in range(start_year, end_year + 1):
        
    print(year)

    # Define year strings #

    previous_year_string = str(year - 1)
    current_year_string = str(year)
    next_year_string = str(year + 1)
            
    while len(previous_year_string) < 4:
        previous_year_string = '0' + previous_year_string
                
    while len(current_year_string) < 4:
        current_year_string = '0' + current_year_string
                
    while len(next_year_string) < 4:
        next_year_string = '0' + next_year_string

    # Limit paths to previous, current, and next year #

    year_limited_paths_specific_humidity = limit_files_to_select_years(paths_specific_humidity, range(year - 1, year + 2))
    year_limited_paths_temperature = limit_files_to_select_years(paths_temperature, range(year - 1, year + 2))
    year_limited_paths_surface_pressure = limit_files_to_select_years(paths_surface_pressure, range(year - 1, year + 2))
    year_limited_paths_precipitation = limit_files_to_select_years(paths_precipitation, range(year - 1, year + 2))

    print(len(year_limited_paths_specific_humidity))
     
    #####################
    ####  Load Data  ####
    #####################

    # Data is "lazy loaded", nothing is actually loaded until we "look" at data in some way #

    dataset_specific_humidity = xr.open_mfdataset(year_limited_paths_specific_humidity, combine="by_coords")
    dataset_temperature = xr.open_mfdataset(year_limited_paths_temperature, combine="by_coords")
    dataset_surface_pressure = xr.open_mfdataset(year_limited_paths_surface_pressure, combine = "by_coords")
    dataset_precipitation = xr.open_mfdataset(year_limited_paths_precipitation, combine="by_coords")

    #####################
    ####  Load Data  ####
    #####################
              
    # Make data arrays, loading only the year of interest #
    full_lat = dataset_surface_pressure['lat']
    full_lon = dataset_surface_pressure['lon']

    PS = dataset_surface_pressure['pres'].sel(time = slice(previous_year_string+'-12-31', next_year_string+'-01-01'), lat = slice(-10, 10)) # [Pa]
    Q = dataset_specific_humidity['shum'].sel(time = slice(previous_year_string+'-12-31', next_year_string+'-01-01'),lat = slice(10, -10), level = slice(70, 1000)) # [Kg/Kg]
    T = dataset_temperature['air'].sel(time = slice(previous_year_string+'-12-31', next_year_string+'-01-01'),lat = slice(10, -10), level = slice(70, 1000)) # [K]
    precipitation_rate = dataset_precipitation['precipAvg'].sel(time = slice(previous_year_string+'-12-31', next_year_string+'-01-01'), lat = slice(-10, 10)) * (24) # Currently [mm/hr]. Convert to [mm/day]

    # Actually load data #
    PS.load()
    Q.load()
    T.load()
    precipitation_rate.load()

    # Clean up environment #
    
    gc.collect();
 
    ################################
    ####  Average Data to Daily ####
    ################################
    
    ###   Test for Averaging Method   ###
            
    #PS.sel(time=slice('1998-01-01','1998-01-01'),lat=5,lon=75).plot()
    #print(PS.resample(time='1D').mean('time').sel(time=slice('1998-01-01','1998-01-01'),lat=5,lon=75))
    #print(PS.sel(time=slice('1998-01-01','1998-01-01'),lat=5,lon=75).mean('time'))
    #print(PS.resample(time='1D').mean('time').sel(time=slice('1998-01-01','1998-01-01'),lat=5,lon=75).values == PS.sel(time=slice('1998-01-01','1998-01-01'),lat=5,lon=75).mean('time').values)
    
    ###   Perform Averaging   ###
            
    PS = PS.resample(time='1D').mean('time')
    Q = Q.resample(time='1D').mean('time')
    T = T.resample(time='1D').mean('time')
    precipitation_rate = precipitation_rate.resample(time='1D').mean('time')

    precipitation_rate['time'] = precipitation_rate.indexes['time'].to_datetimeindex() # IMERG time was saved as CFtime, and we need to convert to datetime for xarray

    # ### Update time to reflect center of daily average   ###
    
    # PS = PS.assign_coords({'time':PS['time']+pd.to_timedelta(10.5, unit='H')})
    # Q = Q.assign_coords({'time':Q['time']+pd.to_timedelta(10.5, unit='H')})
    # T = T.assign_coords({'time':T['time']+pd.to_timedelta(10.5, unit='H')})

    # Clean up environment #
    
    gc.collect();
    
    #####################################
    ####  Modify variables as needed ####
    #####################################
    
    PS = PS.rename('PS')
    PS = PS.transpose('time','lat','lon')
    PS = PS.sortby('lat', ascending=True) # Re-order lat to match code for other datasets
    #print(PS)
    
    Q = Q.rename({'level':'lev'})
    Q = Q.rename('Q')
    Q = Q.transpose('time','lev','lat','lon')
    Q = Q.sortby('lat', ascending=True) # Re-order lat to match code for other datasets
    #print(Q)
    
    T = T.rename({'level':'lev'})
    T = T.rename('T')
    T = T.transpose('time','lev','lat','lon')
    T = T.sortby('lat', ascending=True) # Re-order lat to match code for other datasets
    #print(T)

    precipitation_rate = precipitation_rate.rename('precipitation_rate')
    precipitation_rate = precipitation_rate.transpose('time','lat','lon')
    precipitation_rate = precipitation_rate.sortby('lat', ascending=True) # Re-order lat to match code for other datasets
        
    # Clean up environment #
    
    gc.collect();

    #########################################
    ####  Calculate True Model Pressure  ####
    #########################################

    print("Calculating true model pressure")
    
    # Set upper most interface equal to uppermost level midpoint, and lowest interface equal to surface pressure.
    # This will still permit the desired vertical integral, just choose appropriate upper and lower integration limits
    
    # Model level midpoint

    true_pressure_midpoint = Q['lev'] * 100. # To convert to Pa
    true_pressure_midpoint = true_pressure_midpoint.rename('true_pressure_midpoint_Pa')
    true_pressure_midpoint = true_pressure_midpoint.expand_dims({'lat':Q['lat'], 'lon':Q['lon'], 'time':Q['time']})
    true_pressure_midpoint = true_pressure_midpoint.transpose('time','lev','lat','lon')
    
    # Model level interfaces
    
    true_pressure_interface = np.empty((len(Q.time),len(Q.lat),len(Q.lon),len(Q.lev)+1))

    for interface_level_counter in range(len(Q.lev) + 1):
        if interface_level_counter == 0:
            true_pressure_interface[:,:,:,interface_level_counter] = Q['lev'].isel(lev=0).values # Set upper most interface equal to uppermost level midpoint
        elif interface_level_counter == (len(Q.lev)):
            true_pressure_interface[:,:,:,interface_level_counter] = PS # Set lowest interface equal to surface pressure
        else:
            true_pressure_interface[:,:,:,interface_level_counter] = (Q['lev'].isel(lev=interface_level_counter-1).values + Q['lev'].isel(lev=interface_level_counter).values) / 2.,  # Set middle interfaces equal to half way points between level midpoints
            
    coords = {'time':Q['time'], 'lat':Q['lat'], 'lon':Q['lon'], 'ilev':np.arange(1,len(Q.lev) + 2)}
    dims = ['time', 'lat', 'lon', 'ilev']
    true_pressure_interface = xr.DataArray(true_pressure_interface,dims=dims,coords=coords) * 100. # To convert to Pa
    true_pressure_interface.attrs['units'] = 'Pa'      
    
    true_pressure_interface = true_pressure_interface.transpose('time','ilev','lat','lon')

    # Clean up environment #
    
    gc.collect();
  
    ########################
    ###  Calculate CSF  ####
    ########################
    
    ####  Calculate Saturation Specific Humidity  ####

    print("Calculating saturation specific humidity")

    saturation_specific_humidity = xr.apply_ufunc(calculate_saturation_specific_humidity, true_pressure_midpoint, T,
                                            output_dtypes=[Q.dtype])
    
    # Clean up environment #
    
    gc.collect();

    ####  Column Integrate Variables  ####
        
    upper_level_integration_limit_Pa = 10000 # [Pa]
        
    lower_level_integration_limit_Pa = 100000 # [Pa]

    print('Column Integrating')
        
    ci_q, _, _ = mass_weighted_vertical_integral_w_nan(Q, true_pressure_midpoint, true_pressure_interface, lower_level_integration_limit_Pa, upper_level_integration_limit_Pa)
    #print(ci_q)
    #print(ci_q.min())
    #print(ci_q.max())
    #print(ci_q.mean())
    #plt.figure()
    #ci_q.isel(time = 0).plot()
        
    ci_q_sat, _, _ = mass_weighted_vertical_integral_w_nan(saturation_specific_humidity, true_pressure_midpoint, true_pressure_interface, lower_level_integration_limit_Pa, upper_level_integration_limit_Pa)
    #print(ci_q_sat)
    #print(ci_q_sat.min())
    #print(ci_q_sat.max())
    #print(ci_q_sat.mean())
    #plt.figure()
    #ci_q_sat.isel(time = 0).plot()
        
    csf = ci_q / ci_q_sat
    #print(csf)
    #print(csf.min())
    #print(csf.max())
    #plt.figure()
    #csf.isel(time = 0).plot()
    
    # Name variables #

    csf.name = 'csf'
    csf.attrs['Units'] = '[Kg Kg^-1]'

    # Clean up environment #

    del Q, T, true_pressure_midpoint, true_pressure_interface, saturation_specific_humidity, ci_q, ci_q_sat
        
    gc.collect();
   
    #################################
    ####  Output Data as NetCDF  ####
    #################################

    # Output dataset to NetCDF #

    csf.sel(time = slice(current_year_string+'-01-01', current_year_string+'-12-31')).to_netcdf(odir_datasets + 'CSF_' + current_year_string + '.nc')


2011
36


  precipitation_rate['time'] = precipitation_rate.indexes['time'].to_datetimeindex() # IMERG time was saved as CFtime, and we need to convert to datetime for xarray


Calculating true model pressure
Calculating saturation specific humidity
Column Integrating
2012
36


  precipitation_rate['time'] = precipitation_rate.indexes['time'].to_datetimeindex() # IMERG time was saved as CFtime, and we need to convert to datetime for xarray


Calculating true model pressure
Calculating saturation specific humidity
Column Integrating


In [43]:
print(csf.lat)

<xarray.DataArray 'lat' (lat: 9)>
array([-10. ,  -7.5,  -5. ,  -2.5,   0. ,   2.5,   5. ,   7.5,  10. ],
      dtype=float32)
Coordinates:
  * lat      (lat) float32 -10.0 -7.5 -5.0 -2.5 0.0 2.5 5.0 7.5 10.0
Attributes:
    long_name:  latitude
    units:      degrees_north
