Skip to content

Commit

Permalink
Add init_chem job to coupled workflow
Browse files Browse the repository at this point in the history
A new init_chem job is added to the coupled workflow to cycle tracer
concentrations from the previous cycle's restart files. The new job
doesn't run for the first cycle (since there are no previous cycle
restart files to use). If aerosol initialization is desired for the
first cycle, they need to be added manually, though the new scripts
may be reutilized to do so.

Tracer concentrations are merely copied from the restart files to the
cold-start initial conditions. There is no adjustment to ensure mass
conservation or any other property. The list of tracers copied is
determined by the new parm/chem/gocart_tracer.list file, which is a
plain-text list of NetCDF variables to copy. If any are missing in the
restart files, the job will fail.

The NetCDF4 module for python3 is required for the init job to work.
This module is not part of the default installation of python on Hera
(though it is on WCOSS), so in addition to now loading python, the
base modulefile also adds the location of a personal copy of the
needed python module to the PYTHONPATH.

Most of the new scripts are recycled from the GEFS-aerosol development,
though this method was abandoned there in favor of true warm start for
the aerosol member.

Refs: NOAA-EMC#314
  • Loading branch information
WalterKolczynski-NOAA committed Jun 7, 2021
1 parent 45def8c commit 84101b0
Show file tree
Hide file tree
Showing 8 changed files with 378 additions and 2 deletions.
56 changes: 56 additions & 0 deletions jobs/rocoto/init_chem.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/bin/bash

set -x

###############################################################
## Abstract:
## Create FV3 initial conditions from GFS intitial conditions
## RUN_ENVIR : runtime environment (emc | nco)
## HOMEgfs : /full/path/to/workflow
## EXPDIR : /full/path/to/config/files
## CDATE : current date (YYYYMMDDHH)
## CDUMP : cycle name (gdas / gfs)
## PDY : current date (YYYYMMDD)
## cyc : current cycle (HH)
###############################################################

###############################################################
# Source FV3GFS workflow modules
. $USHgfs/load_fv3gfs_modules.sh
status=$?
[[ $status -ne 0 ]] && exit $status

###############################################################
# Source relevant configs
configs="base"
for config in $configs; do
. $EXPDIR/config.${config}
status=$?
[[ $status -ne 0 ]] && exit $status
done

###############################################################
# Source machine runtime environment
. $BASE_ENV/${machine}.env init_aerosol
status=$?
[[ $status -ne 0 ]] && exit $status

# export DATAROOT="$RUNDIR/$CDATE/$CDUMP"
# [[ ! -d $DATAROOT ]] && mkdir -p $DATAROOT
# export DATA=${DATA:-${DATAROOT}/${jobid:?}}
# mkdir -p $DATA
# cd $DATA

$SCRgfs/exgfs_chem_init_aerosol.py

status=$?
if [[ $status -ne 0 ]]; then
echo "FATAL ERROR: exgfs_chem_init_aerosol.py failed with error code $status"
exit $status
fi

##############################################################
# Exit cleanly

set +x
exit 0
9 changes: 7 additions & 2 deletions modulefiles/module_base.hera
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
##

# Modules availble through hpc-stack
module use /scratch2/NCEPDEV/nwprod/hpc-stack/libs/hpc-stack/modulefiles/stack
# module use /scratch2/NCEPDEV/nwprod/hpc-stack/libs/hpc-stack/modulefiles/stack
module use /scratch1/NCEPDEV/nems/Raffaele.Montuoro/dev/nasa/libs/hpc-stack/1.1.0-nasa/modulefiles/stack

module load hpc/1.1.0
module load hpc-intel/18.0.5.274
Expand All @@ -18,6 +19,7 @@ module load crtm 2.3.0
module load prod_util/1.2.2
module load grib_util/1.2.2
module load ip/3.3.3

module load sp/2.3.3
module load w3nco/2.4.1
module load bacio/2.4.1
Expand All @@ -32,11 +34,14 @@ module load hdf5/1.10.6
module load esmf/8_1_0_beta_snapshot_27
module load w3emc/2.7.3
module load wgrib2/2.0.8
setenv WGRIB2 wgrib2
setenv "WGRIB2" "wgrib2"

# Modules not under hpc-stack
module load hpss/hpss
module load nco/4.9.1
module load gempak/7.4.2
module load ncl/6.5.0
module load cdo/1.9.5
module load intelpython/3.6.8
# netCDF4 is not in the python installation on Hera
append-path "PYTHONPATH" "/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/python_packages"
20 changes: 20 additions & 0 deletions parm/chem/gocart_tracer.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
so2
sulf
dms
msa
pp25
pp10
bc1
bc2
oc1
oc2
dust1
dust2
dust3
dust4
dust5
seas1
seas2
seas3
seas4
seas5
137 changes: 137 additions & 0 deletions scripts/exgfs_chem_init_aerosol.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#! /usr/bin/env python3

"""
"""

import os
import subprocess
import typing
from datetime import datetime, timedelta
from functools import partial

# Constants
atm_base_pattern = "{ics_dir}/%Y%m%d%H/atmos/{case}/INPUT" # Location of atmosphere ICs
atm_file_pattern = "{path}/gfs_data.{tile}.nc" # Atm IC file names
tracer_base_pattern = "{rot_dir}/{cdump}.%Y%m%d/%H/atmos/RERUN_RESTART" # Time of previous run
tracer_file_pattern = "{tracer_base}/%Y%m%d.%H0000.fv_tracer.res.{tile}.nc" # Time when restart is valid (current run)
tracer_list_file_pattern = "{parm_gfs}/chem/gocart_tracer.list" # Text list of tracer names to copy
merge_script_pattern = "{ush_gfs}/merge_fv3_chem_tile.py"
n_tiles = 6
max_lookback = 4 # Maximum number of past cycles to look for for tracer data
debug = True

# End configurable settings

# Make sure print statements are flushed immediately, otherwise
# print statments may be out-of-order with subprocess output
print = partial(print, flush=True)

tiles = list(map(lambda t: "tile{t}".format(t=t), range(1, n_tiles + 1)))


def main() -> None:
# Read in environment variables and make sure they exist
cdate = get_env_var("CDATE")
incr = int(get_env_var('FHCYC'))
cdump = get_env_var("CDUMP")
rot_dir = get_env_var("ROTDIR")
ics_dir = get_env_var("ICSDIR")
case = get_env_var("CASE")
ush_gfs = get_env_var("USHgfs")
parm_gfs = get_env_var("PARMgfs")
# data = get_env_var('DATA')

# os.chdir(data)

merge_script = merge_script_pattern.format(ush_gfs=ush_gfs)
tracer_list_file = tracer_list_file_pattern.format(parm_gfs=parm_gfs)

time = datetime.strptime(cdate, "%Y%m%d%H")
atm_source_path = time.strftime(atm_base_pattern.format(**locals()))

if(debug):
for var in ['merge_script', 'tracer_list_file', 'atm_source_path']:
print(f'{var} = {f"{var}"}')

atm_files = get_atm_files(atm_source_path)
tracer_files = get_tracer_files(time, incr, max_lookback, rot_dir, cdump)

if (tracer_files is not None):
merge_tracers(merge_script, atm_files, tracer_files, tracer_list_file)

return


# Retrieve environment variable and exit or print warning if not defined
def get_env_var(varname: str, fail_on_missing: bool = True) -> str:
if(debug):
print(f'Trying to read envvar {varname}')

var = os.environ.get(varname)
if(var is None):
if(fail_on_missing is True):
print("FATAL: Environment variable {varname} not set".format(varname=varname))
exit(100)
else:
print("WARNING: Environment variable {varname} not set, continuing using None".format(varname=varname))
if(debug):
print(f'\tValue: {var}')
return(var)


# Check if atm files exist
def get_atm_files(path: str) -> typing.List[str]:
print(f'Checking for atm files in {path}')

files = list(map(lambda tile: atm_file_pattern.format(tile=tile, path=path), tiles))
for file_name in files:
if(debug):
print(f"\tChecking for {file_name}")
if(not os.path.isfile(file_name)):
print("FATAL: Atmosphere file {file_name} not found".format(file_name=file_name))
exit(101)
elif(debug):
print(f"\t\tFound {file_name}")
return files


# Find last cycle with tracer data available via restart files
def get_tracer_files(time: datetime, incr: int, max_lookback: int, rot_dir: str, cdump: str) -> typing.List[str]:
print(f"Looking for tracer files in {rot_dir}")
for lookback in map(lambda i: incr * (i + 1), range(max_lookback)):
last_time = time - timedelta(hours=lookback)
if(debug):
print(f"\tChecking {last_time}")
tracer_base = last_time.strftime(tracer_base_pattern.format(**locals()))
files = list(map(lambda tile: time.strftime(tracer_file_pattern.format(tracer_base=tracer_base, tile=tile)), tiles))
if(debug):
print("\t\tLooking for files {files} in directory {tracer_base}")
found = [file for file in files if os.path.isfile(file)]
if(found):
if(debug):
print(f"\t\tAll files found!")
return files
else:
print(last_time.strftime("Tracer files not found for %Y%m%d_%H"))

if(not found):
print("WARNING: Unable to find tracer files, will use zero fields")
return None


# Merge tracer data into atmospheric data
def merge_tracers(merge_script: str, atm_files: typing.List[str], tracer_files: typing.List[str], tracer_list_file: str) -> None:
print(f"Merging tracers")
if(len(atm_files) != len(tracer_files)):
print("FATAL: atmosphere file list and tracer file list are not the same length")
exit(102)

for atm_file, tracer_file in zip(atm_files, tracer_files):
if(debug):
print(f"\tMerging tracers from {tracer_file} into {atm_file}")
subprocess.run([merge_script, atm_file, tracer_file, tracer_list_file], check=True)


if __name__ == "__main__":
main()
exit(0)
116 changes: 116 additions & 0 deletions ush/merge_fv3_chem_tile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/usr/bin/env python3
"""
Appends tracer data from one NetCDF file to another and updates the tracer
count.
usage: merge_fv3_chem_tile.py [-h] atm_file chem_file variable_file [out_file]
Appends tracer data from one NetCDF file to another and updates the tracer
count.
positional arguments:
atm_file File containing the atmospheric data
chem_file File containing the chemistry tracer data to be added
variable_file File containing list of tracer variable_names in the
chem_file to add to the atm_file, one tracer per line
out_file Name of file to create. If none is specified, the atm_file
will be edited in place. New file will be a copy of atm_file
with the specificed tracers listed in variable_file appended
from chem_file and ntracers updated.
optional arguments:
-h, --help show this help message and exit
"""

import os, sys, subprocess
from typing import List
from functools import partial
from shutil import copyfile
import argparse

try:
import netCDF4
except ImportError:
print("FATAL ERROR: Failed to import netCDF4 in " + __file__ + "!")
print("Make sure you are using a version of python 3 with netCDF4 installed")
sys.exit(100)

# Make sure print statements are flushed immediately, otherwise
# print statments may be out-of-order with subprocess output
print = partial(print, flush=True)

def merge_tile(base_file_name: str, append_file_name: str, tracers_to_append: List[str]) -> None:
if not os.path.isfile(base_file_name):
print("FATAL ERROR: Atmosphere file " + base_file_name + " does not exist!")
sys.exit(102)

if not os.path.isfile(append_file_name):
print("FATAL ERROR: Chemistry file " + append_file_name + " does not exist!")
sys.exit(103)

append_file = netCDF4.Dataset(append_file_name, "r")
base_file = netCDF4.Dataset(base_file_name, "r+")

# print(base_file)
# print(base_file.dimensions["ntracer"])

old_ntracer = base_file.dimensions["ntracer"].size
new_ntracer = old_ntracer + len(tracers_to_append)

# Copy over chemistry dimensions
for dim_name in append_file.dimensions:
base_file.createDimension(dim_name, append_file.dimensions[dim_name].size)

for variable_name in tracers_to_append:
print("Adding variable " + variable_name + " to file")
variable = append_file[variable_name]
base_file.createVariable(variable_name, variable.datatype, variable.dimensions)
base_file[variable_name][:] = variable[:]
base_file[variable_name].setncatts(variable.__dict__)
# print("Done adding " + variable_name)

print("Updating ntracer")

# Use ncks to rewrite file without ntracer so we can define it anew
subprocess.run(["ncks", "-x", "-v", "ntracer", "-O", base_file_name, base_file_name], check=True)

base_file = netCDF4.Dataset(base_file_name, "r+")
base_file.createDimension("ntracer", new_ntracer)
# print(base_file.dimensions["ntracer"])


def main() -> None:
parser = argparse.ArgumentParser(
description="Appends tracer data from one NetCDF file to another and updates the tracer count.")
parser.add_argument('atm_file', type=str, help="File containing the atmospheric data")
parser.add_argument('chem_file', type=str, help="File containing the chemistry tracer data to be added")
parser.add_argument('variable_file', type=str, help="File containing list of tracer variable_names in the chem_file to add to the atm_file, one tracer per line")
parser.add_argument('out_file', type=str, nargs="?", help="Name of file to create. If none is specified, the atm_file will be edited in place. New file will be a copy of atm_file with the specificed tracers listed in variable_file appended from chem_file and ntracers updated.")

args = parser.parse_args()

atm_file_name = args.atm_file
chem_file_name = args.chem_file
variable_file = args.variable_file
out_file_name = args.out_file

if out_file_name is None:
print("INFO: No out_file specified, will edit atm_file in-place")
out_file_name = atm_file_name
else:
if os.path.isfile(out_file_name):
print("WARNING: Specified out file " + out_file_name + " exists and will be overwritten")
copyfile(atm_file_name, out_file_name)

variable_file = open(variable_file)
variable_names = variable_file.read().splitlines()
variable_file.close()

merge_tile(out_file_name, chem_file_name, variable_names)

# print(variable_names)


if __name__ == "__main__":
main()
22 changes: 22 additions & 0 deletions workflow/config/init_chem.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# This file is used to generate config.init_chem

config_init_chem:
filename: config.init_chem
content: !expand |
#!/bin/ksh -x
# This file is automatically generated from the YAML-based system
# in ecf/ecfutils/. Any changes will be overwritten if
# setup_case.sh is rerun.
########## config.init_chem ##########
# Append chem tracers from previous cycle to atmosphere initial conditions
echo "BEGIN: config.init_chem"
# Task and thread configuration
export npe_init_chem={doc.partition_common.resources.run_init_chem[0].mpi_ranks}
export npe_node_init_chem={doc.partition_common.resources.run_init_chem[0].mpi_ranks}
echo "END: config.init_chem"
Loading

0 comments on commit 84101b0

Please sign in to comment.