In [5]:
# Load packages
# import numpy as np
import xarray as xr
from pathlib import Path
import cftime
import pandas as pd
import os, re, glob, datetime
# Set global option
xr.set_options(keep_attrs=True)

<xarray.core.options.set_options at 0x7f42f5e979a0>

In [7]:
def get_nth_word_custom_delimiter(string, delimiter, n):
    """
    Function: cut string by delimiter and grab nth element
    
    Input: string
    
    Output: nth element in the string
    """
    # Split string by delimiter
    words = string.split(delimiter)
    # Grab nth element in the string
    if 1 <= n <= len(words):
        return words[n-1]
    else:
        return "Invalid value of N."

In [84]:
def build_parameter(data):
    """
    Function: Extract parameter to build nested directory from xarray dataset
    
    Input: Xarray dataset
    1) WRFTools*
    or
    2) <parameter>_<start year>
    
    Output: nested directory and scenario as string
    """
    # Build paths for different folder types
    # For WRFTools
    if 'WRFTools' in data.attrs['experiment']:
        par = get_nth_word_custom_delimiter(data.attrs['experiment'],'_',2) 
        path = 'WRFTools/'+par+'/na24/'
        scen = ''                                                            # Empty for WRFTools

    else:
    # Path of other simulations 
        full_par = data.attrs['experiment']
        force_d = get_nth_word_custom_delimiter(full_par,'_',1)              # Forcing dataset
        scen = get_nth_word_custom_delimiter(full_par,'_',2)                 # Scenario
        grid = get_nth_word_custom_delimiter(full_par,'_',3)                 # Grid
        if grid == 'NA24':                                                   # convert to small case
            grid = 'na24'
        phys = get_nth_word_custom_delimiter(full_par,'_',4)[0:-1]           # Physical configuration
        path = force_d+'/'+grid+'/'+phys+'/'
    return path,scen

In [98]:
def climate_normals(dir_input, freq, sub_dir='wrfavg', dir_ouput=None):
    # Info for function 
    """
    Function:Compute monthly or seasonal normals and create netcdf 
             from monthly average data
                
    Input arguments: 
    dir_input: directory of simulation folder
    sub_dir: directory of subfolder where monthly average netcdfs are stored 
    dir_out: directory of folder for outputs, currently working directory by default
    freq: Frequency for normals, month = Monthly, season = Seasonal 
    """
   
    # Open datasets with dask
    raw_data = {file.stem :xr.open_dataset(file,chunks={'time':-1},decode_times=False) for file in Path(dir_input).glob(sub_dir+'/*monthly.nc')}
    
    # Compute Normals for each dataset
    first_loop = True
    for key in raw_data.keys():
        print('\n',key,freq,'start')
        data = raw_data[key]
        # Grab first year
        start_year = data.attrs['begin_date'][0:4]
        # Grab wrf subcategory
        wrf_cat = get_nth_word_custom_delimiter(data.attrs['description'],' ',1)
        # Convert time to datetime64
        data['time'] = pd.date_range(start=start_year+'-01-01', periods=data.sizes['time'], freq='MS')

        # Find End Year    
        if first_loop:
            end_year = str(data.time[-1].values)[0:4]
            path, scen = build_parameter(data)
            # Build output directory 
            if dir_ouput != None:
                out_dir = os.path.join(dir_ouput,path)
            else:
                out_dir = path
            # Check if directory exists and create if false
            if os.path.exists(out_dir) == False:
                os.makedirs(out_dir)
            else:
                print('Directory exists\n')
                print(out_dir)
            first_loop = False
        # Build full path for output file
        # For missing scenario
        if scen == '':
            out_file = out_dir+wrf_cat+'_'+freq[0:3]+'-norm-'+start_year+'-'+end_year+'.nc'
        else:
            out_file = out_dir+wrf_cat+'_'+scen+'_'+freq[0:3]+'-norm-'+start_year+'-'+end_year+'.nc'
        # Check if file exists
        if os.path.isfile(out_file):
            print(out_file,'File exists\n')

        # Ignore temporary file in folder
        elif 'tmp_' in key:
            print(key,'skip\n')
        else:
            # Group dataset by months or seasons and compute mean
            data_norm = data.groupby('time.'+freq).mean('time')
            # Export normals as netcdfs
            data_norm.to_netcdf(out_file)
        print('done\n')

In [96]:
# dirs='/scratch/a/aerler/aerler/Mani/wrf/ERA5_HS_NA24_new43-flk-1979/'
dirs='/scratch/a/aerler/aerler/Mani/ERAI_AND_ERA5_RUN_CASES/WRFTools_RC92/'

In [95]:
climate_normals(dir_input=dirs,freq='month',dir_ouput='/project/p/peltier/edmundn/climate_normals_Py/')
# climate_normals(dir_input=dirs,dir_ouput='/project/p/peltier/edmundn/climate_normals_Py/',freq='S')


 wrfhydro_d01_monthly month start
/project/p/peltier/edmundn/climate_normals_Py/ERA5/na24/new43-flk-197/

 wrfplev3d_d01_monthly month start

 wrflsm_d01_monthly month start

 wrfsrfc_d01_monthly month start

 wrfxtrm_d01_monthly month start

 wrfrad_d01_monthly month start


In [24]:
test=xr.open_dataset('/scratch/a/aerler/aerler/Mani/wrf/ERA5_HS_NA24_new43-flk-1979/wrfavg/wrfhydro_d01_monthly.nc',decode_times=False)

In [45]:
test.attrs

{'TITLE': ' OUTPUT FROM WRF V4.3 MODEL',
 'START_DATE': '1979-01-01_00:00:00',
 'WEST-EAST_GRID_DIMENSION': 371,
 'SOUTH-NORTH_GRID_DIMENSION': 318,
 'BOTTOM-TOP_GRID_DIMENSION': 67,
 'DX': 24459.04,
 'DY': 24459.04,
 'AERCU_OPT': 0,
 'AERCU_FCT': 1.0,
 'IDEAL_CASE': 0,
 'DIFF_6TH_SLOPEOPT': 0,
 'AUTO_LEVELS_OPT': 2,
 'DIFF_6TH_THRESH': 0.1,
 'DZBOT': 50.0,
 'DZSTRETCH_S': 1.3,
 'DZSTRETCH_U': 1.1,
 'GRIDTYPE': 'C',
 'DIFF_OPT': 2,
 'KM_OPT': 4,
 'DAMP_OPT': 3,
 'DAMPCOEF': 0.2,
 'KHDIF': 0.0,
 'KVDIF': 0.0,
 'MP_PHYSICS': 8,
 'RA_LW_PHYSICS': 4,
 'RA_SW_PHYSICS': 4,
 'SF_SFCLAY_PHYSICS': 5,
 'SF_SURFACE_PHYSICS': 4,
 'BL_PBL_PHYSICS': 6,
 'CU_PHYSICS': 3,
 'SF_LAKE_PHYSICS': 0,
 'SURFACE_INPUT_SOURCE': 3,
 'SST_UPDATE': 1,
 'GRID_FDDA': 0,
 'GFDDA_INTERVAL_M': 0,
 'GFDDA_END_H': 0,
 'GRID_SFDDA': 0,
 'SGFDDA_INTERVAL_M': 0,
 'SGFDDA_END_H': 0,
 'HYPSOMETRIC_OPT': 2,
 'USE_THETA_M': 1,
 'GWD_OPT': 1,
 'SF_URBAN_PHYSICS': 1,
 'SF_SURFACE_MOSAIC': 0,
 'SF_OCEAN_PHYSICS': 0,
 'WEST-EAST_P

In [46]:
test2=xr.open_dataset('/scratch/a/aerler/aerler/Mani/ERAI_AND_ERA5_RUN_CASES/WRFTools_RC92/wrfavg/wrfhydro_d01_monthly.nc',decode_times=False)
test2.attrs

{'TITLE': ' OUTPUT FROM WRF V4.3 MODEL',
 'START_DATE': '1979-01-01_00:00:00',
 'WEST-EAST_GRID_DIMENSION': 371,
 'SOUTH-NORTH_GRID_DIMENSION': 318,
 'BOTTOM-TOP_GRID_DIMENSION': 67,
 'DX': 24459.04,
 'DY': 24459.04,
 'AERCU_OPT': 0,
 'AERCU_FCT': 1.0,
 'IDEAL_CASE': 0,
 'DIFF_6TH_SLOPEOPT': 0,
 'AUTO_LEVELS_OPT': 2,
 'DIFF_6TH_THRESH': 0.1,
 'DZBOT': 50.0,
 'DZSTRETCH_S': 1.3,
 'DZSTRETCH_U': 1.1,
 'GRIDTYPE': 'C',
 'DIFF_OPT': 2,
 'KM_OPT': 4,
 'DAMP_OPT': 3,
 'DAMPCOEF': 0.2,
 'KHDIF': 0.0,
 'KVDIF': 0.0,
 'MP_PHYSICS': 8,
 'RA_LW_PHYSICS': 4,
 'RA_SW_PHYSICS': 4,
 'SF_SFCLAY_PHYSICS': 2,
 'SF_SURFACE_PHYSICS': 2,
 'BL_PBL_PHYSICS': 2,
 'CU_PHYSICS': 6,
 'SF_LAKE_PHYSICS': 0,
 'SURFACE_INPUT_SOURCE': 3,
 'SST_UPDATE': 1,
 'GRID_FDDA': 0,
 'GFDDA_INTERVAL_M': 0,
 'GFDDA_END_H': 0,
 'GRID_SFDDA': 0,
 'SGFDDA_INTERVAL_M': 0,
 'SGFDDA_END_H': 0,
 'HYPSOMETRIC_OPT': 2,
 'USE_THETA_M': 1,
 'GWD_OPT': 1,
 'SF_URBAN_PHYSICS': 0,
 'SF_SURFACE_MOSAIC': 0,
 'SF_OCEAN_PHYSICS': 0,
 'WEST-EAST_P

In [47]:
if 'WRFTools' in test2.attrs['experiment']:
    print('yes')

yes
