In [None]:
import pandas as pd
import xarray as xr
from collections import Counter

In [None]:
# Name of the metadata output file to write
metadata_output_file = '<some_path>/wrfout_metadata.csv'

# The initial variable attributes are read from one of the wrfout_* model output files
wrfout_file = '<some_path>/kyoko/OUTPUT/WY1996/wrfout_d01_1996-10-01_00:00:00'

base_dir = '<some_path>/02_conus404_metadata/wrfout_overrides'

# Directory containing the overrides files
overrides_dir = base_dir

wrf_long_name_override = f'{overrides_dir}/wrfout_long_name_overrides.txt'
wrf_units_override = f'{overrides_dir}/wrfout_units_overrides.txt'
wrf_valid_range_overrides = f'{overrides_dir}/wrfout_valid_range_overrides.txt'
wrf_flag_values_overrides = f'{overrides_dir}/wrfout_flag_values_overrides.txt'
wrf_flag_meanings_overrides = f'{overrides_dir}/wrfout_flag_meanings_overrides.txt'
wrf_notes_overrides = f'{overrides_dir}/wrfout_notes_overrides.txt'
wrf_scale_factor_overrides = f'{overrides_dir}/wrfout_scale_factor_overrides.txt'

# File containing mappings of words in the wrfout_* attributes to what they should be changed to
wrf_wordmap_file = f'{overrides_dir}/wrfout_wordmap.csv'


# Variables that are integrated over 60 minutes per hourly timestep
vars_60min_accum = ['ACDEWC', 'ACDRIPR', 'ACDRIPS', 'ACECAN', 'ACEDIR', 'ACETLSM', 'ACETRAN',
                    'ACEVAC', 'ACEVB', 'ACEVC', 'ACEVG', 'ACFROC', 'ACFRZC', 'ACGHB', 'ACGHFLSM',
                    'ACGHV', 'ACINTR', 'ACINTS', 'ACIRB', 'ACIRC', 'ACIRG', 'ACLHFLSM', 'ACLWDNLSM',
                    'ACLWUPLSM', 'ACMELTC', 'ACPAHB', 'ACPAHG', 'ACPAHLSM', 'ACPAHV', 'ACPONDING',
                    'ACQLAT', 'ACQRF', 'ACRAINLSM', 'ACRAINSNOW', 'ACRUNSB', 'ACRUNSF', 'ACSAGB',
                    'ACSAGV', 'ACSAV', 'ACSHB', 'ACSHC', 'ACSHFLSM', 'ACSHG', 'ACSNBOT', 'ACSNFRO',
                    'ACSNOWLSM', 'ACSNSUB', 'ACSUBC', 'ACSWDNLSM', 'ACSWUPLSM', 'ACTHROR', 'ACTHROS',
                    'ACTR', 'GRAUPEL_ACC_NC', 'PREC_ACC_C', 'PREC_ACC_NC', 'SNOW_ACC_NC']

# Variables that are accumulated from model start
vars_model_accum = ['ACGRDFLX', 'ACHFX', 'ACLHF',
                    'ACSNOM',
                    'GRAUPELNC', 'HAILNC',
                    'I_ACLWDNB', 'I_ACLWDNBC', 'I_ACLWDNT', 'I_ACLWDNTC', 'I_ACLWUPB', 
                    'I_ACLWUPBC', 'I_ACLWUPT', 'I_ACLWUPTC', 'I_ACSWDNB', 'I_ACSWDNBC', 
                    'I_ACSWDNT', 'I_ACSWDNTC', 'I_ACSWUPB', 'I_ACSWUPBC', 'I_ACSWUPT', 
                    'I_ACSWUPTC', 'I_RAINC', 'I_RAINNC',
                    'QRFS', 'QSLAT', 'QSPRINGS',
                    'RAINSH', 'RECH', 'SNOWNC']

vars_bucket_J_accum = ['ACLWDNB', 'ACLWDNBC', 'ACLWDNT', 'ACLWDNTC', 'ACLWUPB', 'ACLWUPBC', 'ACLWUPT', 
                       'ACLWUPTC', 'ACSWDNB', 'ACSWDNBC', 'ACSWDNT', 'ACSWDNTC', 'ACSWUPB', 'ACSWUPBC', 
                       'ACSWUPT', 'ACSWUPTC']

vars_bucket_mm_accum = ['RAINC', 'RAINNC']

print(f'{len(vars_60min_accum)=}')
print(f'{len(vars_model_accum)=}')
print(f'{len(vars_bucket_mm_accum)=}')
print(f'{len(vars_bucket_J_accum)=}')

In [None]:
def read_override_file(filename):
    # Read override file
    fhdl = open(filename, 'r', encoding='ascii')
    rawdata = fhdl.read().splitlines()
    fhdl.close()

    it = iter(rawdata)
    next(it)   # Skip header

    override_map = {}
    for row in it:
        flds = row.split('\t')
        override_map[flds[0]] = flds[1]
        # print(flds)  
    return override_map

In [None]:
# Read word map file for processing the description strings
fhdl = open(wrf_wordmap_file, 'r', encoding='ascii')
rawdata = fhdl.read().splitlines()
fhdl.close()

it = iter(rawdata)
next(it)   # Skip header

word_map = {}
for row in it:
    flds = row.split('\t')
    if len(flds[2]) != 0:
        word_map[flds[0].replace('"', '')] = flds[2].replace('"', '')
    # print(flds)

In [None]:
# Read long_name override file 
long_name_map = read_override_file(wrf_long_name_override)

# wrf_valid_range_overrides
valid_range_map = read_override_file(wrf_valid_range_overrides)

# wrf_flag_values_overrides
flag_values_map = read_override_file(wrf_flag_values_overrides)

# wrf_flag_meanings_overrides
flag_meanings_map = read_override_file(wrf_flag_meanings_overrides)

# wrf_notes_overrides
notes_map = read_override_file(wrf_notes_overrides)

# wrf_scale_factor_overrides
scale_factor_map = read_override_file(wrf_scale_factor_overrides)

# Read units override file
units_map = read_override_file(wrf_units_override)

### Read dimensions, variables, and attributes from a single wrfout file

In [None]:
df = xr.open_dataset(wrfout_file, decode_coords=False, chunks={})

In [None]:
attr_cnt = Counter()
word_cnt = Counter()

wrfout_vars = {}

for vv in list(df.keys()):
    cvar = df[vv]
    wrfout_vars[vv] = {}
    
    for cattr, val in cvar.attrs.items():
        if cattr in ['description', 'units', 'coordinates']:
            attr_cnt[cattr] += 1
            
            if cattr == 'units':
                if vv in units_map:
                    # Units are overidden
                    wrfout_vars[vv][cattr] = units_map[vv]
                else:
                    wrfout_vars[vv][cattr] = val
            elif cattr == 'description':
                # Copy the original description
                wrfout_vars[vv][cattr] = val
                
                # Add a long_name attribute
                if vv in long_name_map:
                    # long_name is overidden
                    wrfout_vars[vv]['long_name'] = long_name_map[vv]
                else:
                    # Construct long_name from the word map
                    new_val = []
                    for ww in val.split(' '):
                        if ww in word_map:
                            new_val.append(word_map[ww])
                        else:
                            new_val.append(ww)
                        word_cnt[ww] += 1

                    # result = string[0].upper() + string[1:]
                    outstr = ' '.join(new_val)

                    if len(outstr) > 0:
                        outstr = outstr[0].upper() + outstr[1:]
                    wrfout_vars[vv]['long_name'] = outstr
            else:
                # Just copy other attributes
                wrfout_vars[vv][cattr] = val
                
    wrfout_vars[vv]['datatype'] = cvar.encoding['dtype'].name
    wrfout_vars[vv]['dimensions'] = ' '.join(cvar.dims)
    
    if vv == 'XTIME':
        # Units doesn't exist for XTIME so we'll create it
        wrfout_vars[vv]['units'] = units_map[vv]
        
    if vv == 'Times':
        # The Times variable is missing any sort of description
        wrfout_vars[vv]['long_name'] = long_name_map[vv]
        
    if vv in valid_range_map:
        wrfout_vars[vv]['valid_range'] = valid_range_map[vv]
    if vv in flag_values_map:
        wrfout_vars[vv]['flag_values'] = flag_values_map[vv]
    if vv in flag_meanings_map:
        wrfout_vars[vv]['flag_meanings'] = flag_meanings_map[vv]
    if vv in notes_map:
        wrfout_vars[vv]['notes'] = notes_map[vv]
    if vv in scale_factor_map:
        wrfout_vars[vv]['scale_factor'] = scale_factor_map[vv]
        
    if vv in vars_60min_accum:
        # Add accumulated and integration field
        wrfout_vars[vv]['accumulated'] = True
        wrfout_vars[vv]['integration_length'] = 'accumulated over prior 60 minutes'
        
        # Add a cell_methods field
        # wrfout_vars[vv]['cell_methods'] = 'XTIME: sum (interval: 1 minute)'
    elif vv in vars_model_accum:
        # Add accumulated and integration field
        wrfout_vars[vv]['accumulated'] = True
        wrfout_vars[vv]['integration_length'] = 'accumulated since 1979-10-01 00:00:00'
    elif vv in vars_bucket_J_accum:
        wrfout_vars[vv]['accumulated'] = True
        wrfout_vars[vv]['integration_length'] = 'accumulated since last bucket_J (1.0e9 J m-2) reset'
    elif vv in vars_bucket_mm_accum:
        wrfout_vars[vv]['accumulated'] = True
        wrfout_vars[vv]['integration_length'] = 'accumulated since last bucket_mm (100 mm) reset'
    else:
        wrfout_vars[vv]['accumulated'] = False

In [None]:
attr_cnt

## Create a dataframe of the new metadata

In [None]:
out_df = pd.DataFrame(wrfout_vars).transpose()
out_df.head()

In [None]:
# Write the new metadata to a CSV file
out_df.sort_index().to_csv(metadata_output_file, sep='\t', index_label = 'varname', 
                           columns=['long_name', 'accumulated', 'integration_length', 
                                    'description', 'notes', 'units', 'scale_factor', 'valid_range', 
                                    'flag_values', 'flag_meanings', 'dimensions', 'coordinates', 'datatype'])