In [1]:
import numpy as np
import os, glob
from netCDF4 import Dataset

import timeit
from pandas import to_datetime, date_range
import wrf, xarray, sys
import pandas as pd

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

### define paths

dataPath = '/glade/scratch/doubrawa/final_data/les/'
outPath  = '/glade/scratch/doubrawa/post_processing/'

### define some parameters

n_processors       = 1800
domainId           = 4
prefix             = "LES_25m" if domainId==4 else "LES_100m"

### horizontal size of staggered and non-staggered grids

n_east_west        = 1200
n_east_west_stag   = n_east_west + 1
n_south_north      = 1200
n_south_north_stag = n_south_north + 1

### variables of interest, and how they will be called in the wrfout file

xarray_mapping = {"w":"wa","u":"U","v":"V","theta":"theta"}
units = {"w":"m s-1","u":"m s-1","v":"m s-1","theta":"K"}
names = {"w":"vertical velocity","u":"zonal velocity","v":"meridional velocity","theta":"potential temperature"}   

## total desired variables 

variables_static = ["XLAT","XLONG","ter"]
variables_2d     = ["RMOL","HFX","UST","LH","QFX"]
variables_3d     = ["U","V","wa","theta","z"]
variables        = variables_2d + variables_3d   

In [2]:
## what day to focus on
day  = 21

In [3]:
# loop through all hours!
for hour in range(17,24,1):
    
    desired_times = pd.date_range(start='2015-03-{1:d} {0:d}:00'.format(hour,day),end='2015-03-{1:d} {0:d}:05'.format(hour,day), freq='10min')

    # find out how many history files there are per processor in this folder
    file_paths = sorted(glob.glob(os.path.join(dataPath,
                 "03{0:d}15".format(day,hour),
                 "03{0:d}15_{1:d}UTC".format(day,hour),
                 'wrfout_d0{0}*_0000'.format(domainId))))
    
    if day==21:
        # when the hour 00 is requested for march 21 (simulation from first batch of domingo's runs), then we need to grab the 00 time from the file of the previous half hour
        file_paths_hour_before = sorted(glob.glob(os.path.join(dataPath,
                                "03{0:d}15".format(day,hour-1),
                                "03{0:d}15_{1:d}UTC".format(day,hour-1),
                                'wrfout_d0{0}*_0000'.format(domainId))))
        [file_paths.append(f) for f in file_paths_hour_before]

    # for each desired timestamp, find out which ncfile to open
    map_desired_time_to_ncfile = {}
    for file_path in file_paths:
        wrfnc = xarray.open_dataset(file_path)
        wrf_datetimes = np.asarray([ to_datetime(ii) for ii in wrfnc.XTIME.data ])    
        for desired_time in desired_times:
            if desired_time in wrf_datetimes:
                map_desired_time_to_ncfile[desired_time] = file_path


    # only once every time we run this code, we will need to read in the static files
    data_static = {}
    for var in variables_static:
        data_static[var] = np.zeros((n_south_north,n_east_west))                

    first = True

    for desired_time in map_desired_time_to_ncfile.keys():

        print('----------------------')
        file_prefix = map_desired_time_to_ncfile[desired_time][0:-4]

        # for this time, allocate space
        data         = {}
        for var in ["U"]:
            data[var] = np.zeros((72,n_south_north,n_east_west_stag))
        for var in ["V"]:
            data[var] = np.zeros((72,n_south_north_stag,n_east_west))    
        for var in variables_2d:
            data[var] = np.zeros((n_south_north,n_east_west))
        for var in ["wa","theta","z"]:
            data[var] = np.zeros((72,n_south_north,n_east_west))           

        for processor in range(n_processors):

            file_name = glob.glob(file_prefix+"{0:04d}".format(processor))[0]

            # print out which file is being read
            sys.stdout.write('\r'+file_name) 

            # open the netcdf file with xarray
            wrfnc = xarray.open_dataset(file_name)
            wrf_datetimes = np.asarray([ to_datetime(ii) for ii in wrfnc.XTIME.data ])

            # open it in a different way also to use the wrf package
            wrfnc_for_wrf = Dataset(file_name,'r')              

            # find out what index corresponds to the desired time
            if processor==0:
                dt_between_desired_and_actual = np.min([ ii.seconds for ii in (wrf_datetimes - desired_time) ])
                dt_idx = np.argmin([ ii.seconds for ii in (wrf_datetimes - desired_time) ])

            # for this time and this processor, get all the variables:
            for var in variables:

                try:
                    data_tmp = wrf.getvar(wrfnc_for_wrf, var, timeidx=dt_idx).data
                except:
                    data_tmp = wrfnc[var].isel(Time=dt_idx).data       

                we_0 = getattr(wrfnc,'WEST-EAST_PATCH_START_UNSTAG') - 1        
                we_1 = getattr(wrfnc,'WEST-EAST_PATCH_END_UNSTAG')                

                sn_0 = getattr(wrfnc,'SOUTH-NORTH_PATCH_START_UNSTAG') - 1       
                sn_1 = getattr(wrfnc,'SOUTH-NORTH_PATCH_END_UNSTAG')               

                if data_tmp.ndim==3:
                    if var=='U':
                        we_0 = getattr(wrfnc,'WEST-EAST_PATCH_START_STAG') - 1        
                        we_1 = getattr(wrfnc,'WEST-EAST_PATCH_END_STAG')                                
                    if var=='V':
                        sn_0 = getattr(wrfnc,'SOUTH-NORTH_PATCH_START_STAG') - 1       
                        sn_1 = getattr(wrfnc,'SOUTH-NORTH_PATCH_END_STAG')                                                                       
                    data[var][:, sn_0:sn_1, we_0:we_1] = data_tmp.copy()
                else:
                    data[var][sn_0:sn_1, we_0:we_1] = data_tmp.copy()            

            # only once every time we run this code, we will need to read in the static files
            if first:
                for var in variables_static:

                    try:
                        data_tmp = wrf.getvar(wrfnc_for_wrf, var, timeidx=dt_idx).data
                    except:
                        data_tmp = wrfnc[var].isel(Time=dt_idx).data       

                    we_0 = getattr(wrfnc,'WEST-EAST_PATCH_START_UNSTAG') - 1        
                    we_1 = getattr(wrfnc,'WEST-EAST_PATCH_END_UNSTAG')                

                    sn_0 = getattr(wrfnc,'SOUTH-NORTH_PATCH_START_UNSTAG') - 1       
                    sn_1 = getattr(wrfnc,'SOUTH-NORTH_PATCH_END_UNSTAG')               

                    data_static[var][sn_0:sn_1, we_0:we_1] = data_tmp.copy()             

        # remove terrain from z
        data["z"] = data["z"] - data_static["ter"]        

        # unstagger u and v
        data["U"] = 0.5*(data["U"][:,:,0:n_east_west_stag-1] + data["U"][:,:,1:n_east_west_stag+1])
        data["V"] = 0.5*(data["V"][:,0:n_south_north_stag-1,:] + data["V"][:,1:n_south_north_stag+1,:])

        # get profile of planar averages
        data_mean = {}
        for var in ["U","V","wa","theta","z"]:
            data_mean[var] = np.mean(data[var],axis=(1,2))

        # get profile of planar perturbations
        data_prime  = {}
        for var in data_mean.keys():
            data_prime[var] = data[var] - data_mean[var][:,None,None]   

        # compute fluxes
        fluxes = ["U_U","V_V","wa_wa","U_wa","V_wa","U_V","wa_theta"]
        data_fluxes = {}
        for flux in fluxes:
            var1 = flux.split("_")[0]
            var2 = flux.split("_")[1]
            data_fluxes[flux] = np.mean(data_prime[var1]*data_prime[var2],axis=(1,2))        

        # organize fluxes into a dataframe and save to csv file
        df  = pd.DataFrame(data_mean).set_index("z")
        df["z_std_xy"] = np.std(data["z"],axis=(1,2))
        df2 = pd.DataFrame(data_fluxes).set_index(df.index)
        df  = pd.concat([df,df2],axis=1)
        column_mapping = {"U":"u",
         "V":"v",
         "wa":"w",
         "theta":"theta",
         "z_std_xy":"z_std_xy",
         "U_U":"u_u",
         "V_V":"v_v",
         "wa_wa":"w_w",
         "U_wa":"u_w",
         "V_wa":"v_w",
         "U_V":"u_v",
         "wa_theta":"w_theta",
         "wa_theta0":"w_theta0"}
        df.columns = [ column_mapping[col_old] for col_old in df.columns ]
        fName = os.path.join(outPath,"{0}_SPATIAL_AVERAGED_PROFILES_{1:%Y-%m-%d_%H:%M:%S}.csv".format(prefix,desired_time))
        print(fName)
        df.to_csv(fName)      

        # Save planar averages of two-dimensional quantities
        means_2d = {}
        for var in ["RMOL","HFX"]:
            means_2d[var] = np.mean(data[var])
        a = pd.Series(means_2d)
        fName = os.path.join(outPath,"WRF_{0}_SPATIAL_AVERAGED_2D_{1:%Y-%m-%d_%H:%M:%S}.csv".format(prefix,desired_time))   
        a.to_csv(fName)

        # Prepare planes of XLAT, XLONG
        #
        if first:
            n_sn, n_we = data_static['XLAT'].shape
            xlat = xarray.DataArray(data_static['XLAT'], 
                             coords={"south_north":range(n_sn),"west_east":range(n_we)}, 
                             dims=("south_north","west_east"), 
                             name="2-d latitude", 
                             attrs={"unit":"deg","stagger":""})

            xlong = xarray.DataArray(data_static['XLONG'], 
                             coords={"south_north":range(n_sn),"west_east":range(n_we)}, 
                             dims=("south_north","west_east"), 
                             name="2-d longitude", 
                             attrs={"unit":"deg","stagger":""})
        first = False

        # Prepare z plane for vertical interpolation of 3-dimensional variables
        #            
        xarray_zref = xarray.DataArray(data["z"], \
                                       coords={"bottom_top":range(72),"south_north":range(n_sn),"west_east":range(n_we)},  \
                                       dims=("bottom_top","south_north","west_east"), \
                                       name="height above ground", \
                                       attrs={"unit":"m","stagger":""})

        xarray_zref["lat"] = xlat
        xarray_zref["lon"] = xlong           


        #
        # Interpolate 3-dimensional variables to desired heights
        #
        heights = [100.0,500.0]            
        for height in heights:
            xarray_dict = {}         
            for xarray_varname in ["w","u","v","theta"]:
                print(xarray_varname)
                xarray_3d = xarray.DataArray(data[xarray_mapping[xarray_varname]][None,:,:,:],     \
                                               coords={  "time":[wrf_datetimes[dt_idx]],"bottom_top":range(72),"south_north":range(n_sn),"west_east":range(n_we)}, \
                                               dims=("time","bottom_top","south_north","west_east"), 
                                               name=names[xarray_varname],
                                               attrs={"unit":units[xarray_varname],"stagger":"","height [m]":height})

                xarray_3d["lat"] = xlat
                xarray_3d["lon"] = xlong

                var_3d_now = data[xarray_mapping[xarray_varname]]
                xarray_dict[xarray_varname] = wrf.interplevel(xarray_3d, xarray_zref, height, meta=True)
                xarray_dict[xarray_varname]["z"] = height
            dataset = xarray.Dataset(xarray_dict)
            fName   = "WRF_{0}_{1}_m_AGL_{2:%Y-%m-%d_%H:%M}.nc".format(prefix,height,wrf_datetimes[dt_idx])
            fPath   = os.path.join(outPath,fName)
            print ("Saving : {0}".format(fName))
            dataset.to_netcdf(fPath)   

----------------------
/glade/scratch/doubrawa/final_data/les/032115/032115_16UTC/wrfout_d04_2015-03-21_16:30:10_1799/glade/scratch/doubrawa/post_processing/LES_25m_SPATIAL_AVERAGED_PROFILES_2015-03-21_17:00:00.csv
w
u
v
theta
Saving : WRF_LES_25m_100.0_m_AGL_2015-03-21_17:00.nc
w
u
v
theta
Saving : WRF_LES_25m_500.0_m_AGL_2015-03-21_17:00.nc
----------------------
/glade/scratch/doubrawa/final_data/les/032115/032115_17UTC/wrfout_d04_2015-03-21_17:30:10_1799/glade/scratch/doubrawa/post_processing/LES_25m_SPATIAL_AVERAGED_PROFILES_2015-03-21_18:00:00.csv
w
u
v
theta
Saving : WRF_LES_25m_100.0_m_AGL_2015-03-21_18:00.nc
w
u
v
theta
Saving : WRF_LES_25m_500.0_m_AGL_2015-03-21_18:00.nc
----------------------
/glade/scratch/doubrawa/final_data/les/032115/032115_18UTC/wrfout_d04_2015-03-21_18:30:10_1799/glade/scratch/doubrawa/post_processing/LES_25m_SPATIAL_AVERAGED_PROFILES_2015-03-21_19:00:00.csv
w
u
v
theta
Saving : WRF_LES_25m_100.0_m_AGL_2015-03-21_19:00.nc
w
u
v
theta
Saving : WRF_LES_2