In [1]:
# Load packages
import numpy as np
import xarray as xr
from pathlib import Path
import cftime
import pandas as pd
from matplotlib import pyplot as plt
import os, re, glob,datetime
# Set global option
xr.set_options(keep_attrs=True)

<xarray.core.options.set_options at 0x7f3bc0bcf250>

In [2]:
def normal_for_large_data(raw_data):
    """
    Function: Compute monthly or seasonal normals for wrfavg/*monthly*.nc by each data variable separately
    
    Input: raw_data as xarray dask dataset 
    
    Output: xarray dataset 
    """
# Build empty dictionary
    raw_dataarray={}
# Compute normals by variables
    for var in list(raw_data.data_vars):
# Skip for times
        if 'Times' in var:
            pass
        else:
            raw_dataarray[var]=raw_data[var].compute()
# Merge dataarrays back into 1 dataset
    first_loop=True
    for key in raw_dataarray.keys():
        if first_loop:
            output=raw_dataarray[key]
            first_loop=False
        else:
            output=xr.merge([output,raw_dataarray[key]])
# Add attributes back to output
    for item in raw_data.attrs.items():
        output.attrs[item[0]]=item[1]
    return output

In [3]:
def get_nth_word_custom_delimiter(string, delimiter, n):
    """
    Function: break strings by delimiter and grab nth element
    
    Input: full parameter of simulations
    
    Output: nth element in the parameter
    """
# Split string by delimiter
    words = string.split(delimiter)
# Grab nth element in the string
    if 1 <= n <= len(words):
        return words[n-1]
    else:
        return "Invalid value of N."

In [8]:
def build_parameter(dir_input):
    """
    Function: Extract parameter to build nested directory and start year from directory and stepfile
              based on types
    
    Input: Folder directory as strings
    1) WRFTools*
    or
    2) <parameter>_<start year>
    
    Output: nested directory and start year as strings
    """
# Extract info based on directory type
# Start year from stepfile 
    time=open(dir_input+'stepfile')
    lines=time.readlines()
    start_year=lines[0][0:4]
    if 'WRFTools' in dir_input:
# Path for WRFTools simulations
        par_start=dir_input.find('WRFTools')
        path='WRFTools/'+dir_input[par_start+9:-1]                      # Nested directory
        scen=''                                                         # Empty for WRFTools
    else:
# Path of other simulations 
        par_start=dir_input.find('wrf/')+4
        par_end=dir_input.find(start_year)-1
        full_par=dir_input[par_start:par_end]
        force_d=get_nth_word_custom_delimiter(full_par,'_',1)            # Forcing dataset
        scen=get_nth_word_custom_delimiter(full_par,'_',2)               # Scenario
        grid=get_nth_word_custom_delimiter(full_par,'_',3)               # Grid
        if grid=='NA24':
            grid='na24'
        phys=get_nth_word_custom_delimiter(full_par,'_',4)               # Physical configuration
        path=force_d+'/'+grid+'/'+phys+'/'
    return path,start_year,scen,grid

In [9]:
def climate_normals(dir_input,sub_dir='wrfavg',dir_ouput=None,freq='M'):
# Info for function 
    """
    Function:Compute monthly or seasonal normals and create netcdf 
             from monthly average data
                
    Input arguments: 
    dir_input: directory of simulation folder
    sub_dir: directory of subfolder where monthly average netcdfs are stored 
    dir_out: directory of folder for outputs, currently working directory by default
    freq: Frequency for normals, M for Monthly, S for Seasonal
    """
# Extract input info from directory
    path,start_year,scen,grid=build_parameter(dir_input)
# Open datasets with dask
    raw_data={file.stem :xr.open_dataset(file,chunks={'time':-1},decode_times=False) for file in Path(dir_input).glob(sub_dir+'/*monthly.nc')}
    
# Compute Normals for each dataset
    First_loop=True
    for key in raw_data.keys():
        print('\n',key,'start')
        data=raw_data[key]
# Grab wrf subcategory
        wrf_cat=get_nth_word_custom_delimiter(key,'_',1)
# Convert time to datetime64
        data['time'] = pd.date_range(start=start_year+'-01-01', periods=data.sizes['time'], freq='MS')

# Find End Year    
        if First_loop:
            end_year=str(data.time[-1].values)[0:4]
# Build output directory 
            if dir_ouput == None:
                cwd_start=os.getcwd().find('/project')
                cwd=os.getcwd()
                out_dir=os.path.join(cwd[cwd_start::],path)
            else:
                out_dir=os.path.join(dir_ouput,path)
# Check if directory exists and create if false
            if os.path.exists(out_dir)==False:
#                 os.makedirs(out_dir)
                print(out_dir)
            else:
                print('Directory exists\n')
            First_loop=False
# Build full path for output file
# Check if file exists
        if freq =='M' and os.path.isfile(out_dir+wrf_cat+'_'+scen+'_mon-norm-'+start_year+'-'+end_year+'.nc'):
            print(key,'File exists\n')
        elif freq =='S' and os.path.isfile(out_dir+wrf_cat+'_'+scen+'_sea-norm-'+start_year+'-'+end_year+'.nc'):
            print(key,'File exists\n')
# Ignore temporary file in folder
        elif 'tmp_' in key:
            print(key,'skip\n')
        else:
# Group dataset by months or seasons and compute mean
            if freq =='M':
                data_norm=data.groupby('time.month').mean('time')
            elif freq == 'S':
                data_norm=data.groupby('time.season').mean('time')
# Compute seasonal or monthly normals by dataarray for plev.nc
            if 'plev' in key:
                data_norm=normal_for_large_data(data_norm)
            else:
                pass
# Create netcdf for monthly normals
            if freq=='M':
                data_norm.to_netcdf(out_dir+wrf_cat+'_'+scen+'_mon-norm-'+start_year+'-'+end_year+'.nc')
            elif freq =='S':
                data_norm.to_netcdf(out_dir+wrf_cat+'_'+scen+'_sea-norm-'+start_year+'-'+end_year+'.nc')
            print('done\n')