# Large domain: Calculate and save quantities binned by column RH percentile

In [1]:
import xarray as xr
import glob
import numpy as np
from datetime import datetime


In [2]:
today = datetime.today().strftime('%Y-%m-%d')
cases = ['nz_32','nz_64','nz_128']
nz = [32, 64, 128]

# where to save files
pathout = 

# manually create the time frames
timeframes = [[0,25], [25,50], [100,125]]

In [3]:
### Function to Bin Input Variables by Column RH
def binbycolrh(colrh,variable,percentiles): 
    ## Initialize Data by RH
    databyrh = np.nan*np.ones((len(percentiles)-1,len(variable.z)))
    for i in range(len(percentiles)-1):
        low  = percentiles[i]
        high = percentiles[i+1]
        if high==100: high+=0.1
        ## Conditionally Average Based on Column RH
        condvar = xr.where((colrh>=low)&(colrh<high),variable,np.nan)     
        ## Save Conditionally Averaged Variable
        databyrh[i,:] = condvar.mean(dim=('x','y','time')).values  
    return databyrh

## Create datasets

In [4]:
### Create Column RH Percentile Bins
dbin = 1
binedges  = np.arange(0,100+dbin,dbin)
bincenter = binedges[0:-1]+dbin/2   
nbins = len(bincenter)

for icase,case in enumerate(cases):

    print('working on ' + case)
    
    path = '/ocean/projects/atm200007p/ajenney/rcemip-large/' + case

    # List files
    crhfiles = np.sort(glob.glob(path+'/OUT_3D/PW/*.nc'))
    qrfiles = np.sort(glob.glob(path+'/OUT_3D/QRAD/*.nc'))
    qnfiles = np.sort(glob.glob(path+'/OUT_3D/QN/*.nc'))
    fmsefiles = np.sort(glob.glob(path+'/OUT_3D/FMSE/*.nc'))
    wfiles  = np.sort(glob.glob(path+'/OUT_3D/W/*.nc'))
    rhofile = glob.glob(path+'/OUT_STAT/*.nc')[0]
    
    size_of_arrays = (len(timeframes), nbins, nz[icase])

    crh_percentiles = np.nan * np.ones((len(timeframes), len(binedges)))
    qr = np.nan * np.ones(size_of_arrays)
    qn = np.nan * np.ones(size_of_arrays)
    fmse = np.nan * np.ones(size_of_arrays)
    psi = np.nan * np.ones(size_of_arrays)
    w = np.nan * np.ones(size_of_arrays)

    for itime,[start,stop] in enumerate(timeframes):
        
        print('working on days ' + str(start) + '-' + str(stop))
        
        crh = xr.open_mfdataset(crhfiles).sel(time=slice(start,stop)).crh
        print('binning crh')
        crh_percentiles[itime,:] = np.percentile(crh,binedges)
        print('crh done')

        qrdata  = xr.open_mfdataset(qrfiles).sel(time=slice(start,stop)).QRAD
        print('binning qr')
        qr[itime,:,:] = binbycolrh(crh,qrdata,crh_percentiles[itime])
        if itime == 0: z = qrdata.z # save z on first pass
        del qrdata 
        print('qrad and z done')

        qndata  = xr.open_mfdataset(qnfiles).sel(time=slice(start,stop)).QN
        print('binning qn')
        qn[itime,:,:] = binbycolrh(crh,qndata,crh_percentiles[itime])
        del qndata
        print('qn done')

        fmsedata  = xr.open_mfdataset(fmsefiles).sel(time=slice(start,stop)).fmse
        print('binning fmse')
        fmse[itime,:,:] = binbycolrh(crh,fmsedata,crh_percentiles[itime])
        del fmsedata
        print('fmse done')

        wdata   = xr.open_mfdataset(wfiles).sel(time=slice(start,stop)).W
        print('binning w')
        wbycrh  = binbycolrh(crh,wdata,crh_percentiles[itime])  
        w[itime,:,:] = wbycrh
        del wdata

        print('calculating psi')
        rhodata = xr.open_dataset(rhofile).sel(time=slice(start,stop)).RHO.mean('time')
        for j in range(nbins):
            if j==0: psi[itime,j,:]=0
            else: psi[itime,j,:]=psi[itime,j-1,:]+(rhodata*wbycrh[j,:]*(1/nbins))
        print('psi done')
        del rhodata

    # ---- Save output to netcdf

    timeframe_str = []
    for [start,stop] in timeframes:
        timeframe_str.append(str(start) + '-' + str(stop))

    # Coordinate arrays
    da_percentile_edges = xr.DataArray(data=binedges,
                                       name='percentile_edge',
                                       attrs=dict(long_name='Percentile edge'))
    da_percentile_centers = xr.DataArray(data=bincenter,
                                       name='percentile_center',
                                       attrs=dict(long_name='Percentile center'))
    da_timeframes = xr.DataArray(data=timeframe_str,
                                 name='timeframe',
                                 attrs=dict(long_name='Time period',
                                            units='days'))

    # Data arrays 
    da_crh_percentiles = xr.DataArray(data=crh_percentiles,
                                      dims=('timeframe','percentile_edge'),
                                      coords=(da_timeframes,da_percentile_edges),
                                      attrs=dict(long_name='Column relative humidity percentile'))

    da_w = xr.DataArray(data=w,
                         dims=('timeframe','percentile_center','z'),
                         coords=(da_timeframes,da_percentile_centers,z),
                         attrs=dict(long_name='Vertical velocity',
                                    units='m/s',
                                    description='Binned by column relative humidity percentile'))
    
    da_qr = xr.DataArray(data=qr,
                         dims=('timeframe','percentile_center','z'),
                         coords=(da_timeframes,da_percentile_centers,z),
                         attrs=dict(long_name='Radiative heating rate',
                                    units='K/day',
                                    description='Binned by column relative humidity percentile'))

    da_qn = xr.DataArray(data=qn,
                         dims=('timeframe','percentile_center','z'),
                         coords=(da_timeframes,da_percentile_centers,z),
                         attrs=dict(long_name='Non-precipitating condensate mixing ratio',
                                    units='g/kg',
                                    description='Binned by column relative humidity percentile'))

    da_fmse = xr.DataArray(data=fmse,
                           dims=('timeframe','percentile_center','z'),
                           coords=(da_timeframes,da_percentile_centers,z),
                           attrs=dict(long_name='Frozen moist static energy',
                                    units='J/kg',
                                    description='Binned by column relative humidity percentile'))

    da_psi = xr.DataArray(data=psi,
                          dims=('timeframe','percentile_center','z'),
                          coords=(da_timeframes,da_percentile_centers,z),
                          attrs=dict(long_name='Mass streamfunction',
                                    units='kg/m2/s',
                                    description='Calculated from vertical velocity binned by column relative humidity percentile'))

    # Merge dataArrays into a single dataset then save as netcdf
    ds_out = xr.Dataset(data_vars=dict(w=da_w,
                                       crh_percentile=da_crh_percentiles,
                                       Qr=da_qr,
                                       qn=da_qn,
                                       fmse=da_fmse,
                                       psi=da_psi),
                        attrs=dict(history='calculated on ' + today +
                                   ' by Andrea Jenney: ajenney@uci.edu',
                                   case=case))

    filename_out = pathout + 'crh_binned_vars_' + case + '_figure_timeframes.nc'
    ds_out.to_netcdf(filename_out)

working on nz_128
working on days 100-125
binning crh
crh done
binning qr
qrad and z done
binning qn
qn done
binning fmse
fmse done
binning w
calculating psi
psi done
