Skip to content

Commit

Permalink
Merge pull request #1 from natalie-perlin/gaea_intel2022
Browse files Browse the repository at this point in the history
Updated modulefile for Gaea and regression tests configuration
  • Loading branch information
natalie-perlin committed Jun 23, 2023
2 parents aace46e + 1fe104e commit ff26a62
Show file tree
Hide file tree
Showing 7 changed files with 229 additions and 6 deletions.
2 changes: 1 addition & 1 deletion modulefiles/gsi_common.lua
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ local nemsio_ver=os.getenv("nemsio_ver") or "2.5.4"
local wrf_io_ver=os.getenv("wrf_io_ver") or "1.2.0"
local ncio_ver=os.getenv("ncio_ver") or "1.1.2"
local crtm_ver=os.getenv("crtm_ver") or "2.4.0"
local ncdiag_ver=os.getenv("ncdiag_ver") or "1.1.0"
local ncdiag_ver=os.getenv("ncdiag_ver") or "1.1.1"

load(pathJoin("netcdf", netcdf_ver))

Expand Down
6 changes: 5 additions & 1 deletion modulefiles/gsi_gaea.lua
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,15 @@ load("gsi_common")
local prod_util_ver=os.getenv("prod_util_ver") or "1.2.2"
load(pathJoin("prod_util", prod_util_ver))

-- Needed at runtime:
load("alps")

local MKLROOT="/opt/intel/oneapi/mkl/2022.0.2/"
prepend_path("LD_LIBRARY_PATH",pathJoin(MKLROOT,"lib/intel64"))
pushenv("MKLROOT", MKLROOT)

pushenv("GSI_BINARY_SOURCE_DIR", "/lustre/f2/dev/role.epic/contrib/GSI_data/fix")
pushenv("GSI_BINARY_SOURCE_DIR", "/lustre/f2/dev/role.epic/contrib/GSI_data/fix/20230601")

setenv("CC","cc")
setenv("FC","ftn")
setenv("CXX","CC")
Expand Down
3 changes: 2 additions & 1 deletion regression/regression_driver.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ if [ -d "$config_path" ]; then
source $config_path/local_vars.sh
fi


# source the necessary files to setup
if [ "$#" -eq 2 ]; then
export regdir=$2
Expand Down Expand Up @@ -41,7 +42,7 @@ for jn in `seq ${RSTART} ${REND}`; do
fi
rm -f ${job[$jn]}.out

/bin/sh $ush/$sub_cmd -q $queue -j ${job[$jn]} -t ${topts[$jn]} -p ${popts[$jn]} -r ${ropts[$jn]} $scripts/${regtest}.sh
/bin/sh $ush/$sub_cmd -q $queue -j ${job[$jn]} -t ${topts[$jn]} -p ${popts[$jn]} -r ${ropts[$jn]} $scripts/${regtest}.sh >& $ush/sub_cmd.${job[$jn]}.out

if [ $debug == ".true." ]; then break; fi
$scripts/regression_wait.sh ${job[$jn]} ${rcname} $check_resource
Expand Down
37 changes: 37 additions & 0 deletions regression/regression_param.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ case $machine in
sub_cmd="sub_jet"
memnode=96
numcore=40
;;
Gaea)
sub_cmd="sub_gaea"
memnode=64
numcore=36
;;
wcoss2)
sub_cmd="sub_wcoss2"
Expand Down Expand Up @@ -58,6 +63,9 @@ case $regtest in
elif [[ "$machine" = "Cheyenne" ]]; then
topts[1]="0:30:00" ; popts[1]="16/2/" ; ropts[1]="/1"
topts[2]="0:30:00" ; popts[2]="16/4/" ; ropts[2]="/2"
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:30:00" ; popts[1]="18/2/" ; ropts[1]="/1"
topts[2]="0:30:00" ; popts[2]="18/4/" ; ropts[2]="/2"
elif [[ "$machine" = "wcoss2" ]]; then
topts[1]="0:15:00" ; popts[1]="12/5/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="12/9/" ; ropts[2]="/2"
Expand Down Expand Up @@ -88,6 +96,9 @@ case $regtest in
elif [[ "$machine" = "Cheyenne" ]]; then
topts[1]="0:35:00" ; popts[1]="16/2/" ; ropts[1]="/1"
topts[2]="0:25:00" ; popts[2]="16/4/" ; ropts[2]="/2"
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:35:00" ; popts[1]="18/2/" ; ropts[1]="/1"
topts[2]="0:25:00" ; popts[2]="18/4/" ; ropts[2]="/2"
elif [[ "$machine" = "wcoss2" ]]; then
topts[1]="0:15:00" ; popts[1]="28/2/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="28/4/" ; ropts[2]="/2"
Expand All @@ -104,6 +115,8 @@ case $regtest in
popts[1]="12/5/"
elif [[ "$machine" = "Jet" ]]; then
popts[1]="12/5/"
elif [[ "$machine" = "Gaea" ]]; then
popts[1]="18/5/"
elif [[ "$machine" = "wcoss2" ]]; then
popts[1]="28/4/"
topts[1]="3:00:00"
Expand Down Expand Up @@ -131,6 +144,9 @@ case $regtest in
elif [[ "$machine" = "Cheyenne" ]]; then
topts[1]="1:59:00" ; popts[1]="6/8/" ; ropts[1]="/1"
topts[2]="0:35:00" ; popts[2]="6/10/" ; ropts[2]="/2"
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:10:00" ; popts[1]="18/8/" ; ropts[1]="/1"
topts[2]="0:10:00" ; popts[2]="18/10/" ; ropts[2]="/2"
elif [[ "$machine" = "wcoss2" ]]; then
topts[1]="0:10:00" ; popts[1]="12/8/" ; ropts[1]="/1"
topts[2]="0:10:00" ; popts[2]="12/10/" ; ropts[2]="/2"
Expand All @@ -155,6 +171,9 @@ case $regtest in
elif [[ "$machine" = "Jet" ]]; then
topts[1]="0:15:00" ; popts[1]="20/1/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="20/2/" ; ropts[2]="/1"
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:15:00" ; popts[1]="18/1/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="18/2/" ; ropts[2]="/1"
elif [[ "$machine" = "wcoss2" ]]; then
topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1"
Expand All @@ -179,6 +198,9 @@ case $regtest in
elif [[ "$machine" = "Jet" ]]; then
topts[1]="0:15:00" ; popts[1]="4/4/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="6/6/" ; ropts[2]="/1"
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:15:00" ; popts[1]="4/4/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="6/6/" ; ropts[2]="/1"
elif [[ "$machine" = "wcoss2" ]]; then
topts[1]="0:15:00" ; popts[1]="28/1/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="28/2/" ; ropts[2]="/1"
Expand Down Expand Up @@ -206,6 +228,9 @@ case $regtest in
elif [[ "$machine" = "Cheyenne" ]]; then
topts[1]="0:15:00" ; popts[1]="8/6/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="8/8/" ; ropts[2]="/1"
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:30:00" ; popts[1]="8/6/" ; ropts[1]="/1"
topts[2]="0:30:00" ; popts[2]="8/8/" ; ropts[2]="/1"
elif [[ "$machine" = "wcoss2" ]]; then
topts[1]="0:30:00" ; popts[1]="14/8/" ; ropts[1]="/1"
topts[2]="0:30:00" ; popts[2]="14/14/" ; ropts[2]="/2"
Expand Down Expand Up @@ -233,6 +258,9 @@ case $regtest in
elif [[ "$machine" = "Cheyenne" ]]; then
topts[1]="0:20:00" ; popts[1]="6/6/" ; ropts[1]="/1"
topts[2]="0:20:00" ; popts[2]="8/8/" ; ropts[2]="/1"
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:20:00" ; popts[1]="6/6/" ; ropts[1]="/1"
topts[2]="0:20:00" ; popts[2]="8/8/" ; ropts[2]="/1"
elif [[ "$machine" = "wcoss2" ]]; then
topts[1]="0:15:00" ; popts[1]="10/10/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="14/14/" ; ropts[2]="/2"
Expand Down Expand Up @@ -260,6 +288,9 @@ case $regtest in
elif [[ "$machine" = "Cheyenne" ]]; then
topts[1]="0:15:00" ; popts[1]="16/2/" ; ropts[1]="/1"
topts[2]="0:15:00" ; popts[2]="16/4/" ; ropts[2]="/2"
elif [[ "$machine" = "Gaea" ]]; then
topts[1]="0:10:00" ; popts[1]="12/3/" ; ropts[1]="/1"
topts[2]="0:10:00" ; popts[2]="12/5/" ; ropts[2]="/2"
elif [[ "$machine" = "wcoss2" ]]; then
topts[1]="0:10:00" ; popts[1]="16/2/" ; ropts[1]="/1"
topts[2]="0:10:00" ; popts[2]="16/4/" ; ropts[2]="/2"
Expand Down Expand Up @@ -317,6 +348,12 @@ elif [[ "$machine" = "Jet" ]]; then
export MPI_BUFS_PER_HOST=256
export MPI_GROUP_MAX=256
export APRUN="srun"
elif [[ "$machine" = "Gaea" ]]; then
export OMP_STACKSIZE=1024M
export MPI_BUFS_PER_PROC=256
export MPI_BUFS_PER_HOST=256
export MPI_GROUP_MAX=256
export APRUN="srun --export=ALL --mpi=pmi2 -n \$size"
elif [[ "$machine" = "Cheyenne" ]]; then
export OMP_STACKSIZE=1024M
export MPI_BUFS_PER_PROC=256
Expand Down
14 changes: 14 additions & 0 deletions regression/regression_var.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,20 @@ fi
echo "Running Regression Tests on '$machine'";

case $machine in
Gaea)
export queue="batch"
export noscrub="/lustre/f2/scratch/$LOGNAME/gsi_tmp/noscrub"
export ptmp="/lustre/f2/scratch/$LOGNAME/gsi_tmp/ptmp"
export casesdir="/lustre/f2/dev/role.epic/contrib/GSI_data/CASES/regtest"

export group="global"
if [[ "$cmaketest" = "false" ]]; then
export basedir="/lustre/f2/dev/$LOGNAME/sandbox/GSI"
fi

export check_resource="no"
export accnt="nggps_emc"
;;
Cheyenne)
export queue="economy"
export noscrub="/glade/scratch/$LOGNAME"
Expand Down
7 changes: 4 additions & 3 deletions ush/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@ set -x

# Set CONTROLPATH variable to user develop installation
CONTROLPATH="$DIR_ROOT/../develop/install/bin"

CMAKELIBS=".so .a"
CMAKE_OPTS+=" -DCMAKE_FIND_LIBRARY_SUFFIXES=${CMAKELIBS}"
# Collect BUILD Options
CMAKE_OPTS+=" -DCMAKE_BUILD_TYPE=$BUILD_TYPE"

# Install destination for built executables, libraries, CMake Package config
CMAKE_OPTS+=" -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX"
CMAKE_OPTS+=" -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}"

# Configure for GSI and EnKF
CMAKE_OPTS+=" -DGSI_MODE=$GSI_MODE -DENKF_MODE=${ENKF_MODE}"
Expand All @@ -44,7 +45,7 @@ CMAKE_OPTS+=" -DGSI_MODE=$GSI_MODE -DENKF_MODE=${ENKF_MODE}"
[[ ${REGRESSION_TESTS} =~ [yYtT] ]] && CMAKE_OPTS+=" -DBUILD_REG_TESTING=ON -DCONTROLPATH=${CONTROLPATH:-}"

# Re-use or create a new BUILD_DIR (Default: create new BUILD_DIR)
[[ ${BUILD_CLEAN:-"YES"} =~ [yYtT] ]] && rm -rf $BUILD_DIR
[[ ${BUILD_CLEAN:-"YES"} =~ [yYtT] ]] && rm -rf $BUILD_DIR && echo "Removing $BUILD_DIR"
mkdir -p $BUILD_DIR && cd $BUILD_DIR

# Configure, build, install
Expand Down
166 changes: 166 additions & 0 deletions ush/sub_gaea
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
#!/bin/sh --login
set -x
usage="\
Usage: $0 [options] executable [args]
where the options are:
-a account account (default: none)
-b binding run smt binding or not (default:NO)
-d dirin initial directory (default: cwd)
-e envars copy comma-separated environment variables
-g group group name
-i append standard input to command file
-j jobname specify jobname (default: executable basename)
-m machine machine on which to run (default: current)
-n write command file to stdout rather than submitting it
-o output specify output file (default: jobname.out)
-p procs[/nodes[/ppreq]
number of MPI tasks and optional nodes or Bblocking and
ppreq option (N or S) (defaults: serial, Bunlimited, S)
-q queue[/qpreq] queue name and optional requirement, e.g. dev/P
(defaults: 1 if serial or dev if parallel and none)
(queue 3 or 4 is dev or prod with twice tasks over ip)
(options: P=parallel, B=bigmem, b=batch)
-r rmem[/rcpu] resources memory and cpus/task (default: '1024 mb', 1)
-t timew wall time limit in [[hh:]mm:]ss format (default: 900)
-u userid userid to run under (default: self)
-v verbose mode
-w when when to run, in yyyymmddhh[mm], +hh[mm], thh[mm], or
Thh[mm] (full, incremental, today or tomorrow) format
(default: now)
Function: This command submits a job to the batch queue."
subcmd="$*"
stdin=NO
nosub=NO
account=""
binding="NO"
dirin=""
envars=""
group=""
jobname=""
machine=""
output=""
procs=0
nodes=""
ppreq=""
queue=""
qpreq=""
rmem="1024"
rcpu="1"
timew="900"
userid=""
verbose=NO
when=""
while getopts a:b:d:e:g:ij:m:no:p:q:r:t:u:vw: opt;do
case $opt in
a) account="$OPTARG";;
b) binding="$OPTARG";;
d) dirin="$OPTARG";;
e) envars="$OPTARG";;
g) group="$OPTARG";;
i) stdin=YES;;
j) jobname=$OPTARG;;
m) machine="$OPTARG";;
n) nosub=YES;;
o) output=$OPTARG;;
p) procs=$(echo $OPTARG/|cut -d/ -f1);nodes=$(echo $OPTARG/|cut -d/ -f2);ppreq=$(echo $OPTARG/|cut -d/ -f3);;
q) queue=$(echo $OPTARG/|cut -d/ -f1);qpreq=$(echo $OPTARG/|cut -d/ -f2);;
r) rmem=$(echo $OPTARG/|cut -d/ -f1);rcpu=$(echo $OPTARG/|cut -d/ -f2);;
t) timew=$OPTARG;;
u) userid=$OPTARG;;
v) verbose=YES;;
w) when=$OPTARG;;
\?) echo $0: invalid option >&2;echo "$usage" >&2;exit 1;;
esac
done
shift $(($OPTIND-1))
if [[ $# -eq 0 ]];then
echo $0: missing executable name >&2;echo "$usage" >&2;exit 1
fi
exec=$1
if [[ ! -s $exec ]]&&which $exec >/dev/null 2>&1;then
exec=$(which $exec)
fi
shift
args="$*"
bn=$(basename $exec)
export jobname=${jobname:-$bn}
output=${output:-$jobname.out}
myuser=$LOGNAME
myhost=$(hostname)

if [ -d /lustre/f2/scratch/$LOGNAME ]; then
DATA=/lustre/f2/scratch/$LOGNAME/tmp
fi
DATA=${DATA:-$ptmp/tmp}

mkdir -p $DATA

queue=${queue:-batch}
timew=${timew:-01:20:00}
task_node=${task_node:-$procs}
export size=$((nodes*task_node))
echo "In sub_gaea: task_node, nodes, size=",$task_node,$nodes,$size
envars=$envars
threads=${rcpu:-1}

export TZ=GMT
cfile=$DATA/sub$$
> $cfile
echo "#!/bin/bash -l" >> $cfile
echo "" >> $cfile
echo "#SBATCH --output=$output" >> $cfile
echo "#SBATCH --job-name=$jobname" >> $cfile
echo "#SBATCH --qos=normal" >> $cfile
echo "#SBATCH --clusters=c4" >> $cfile
echo "#SBATCH --time=$timew" >> $cfile
echo "#SBATCH --nodes=$nodes --ntasks-per-node=$procs --cpus-per-task=$threads" >> $cfile
echo "#SBATCH --account=nggps_emc" >> $cfile
echo "#SBATCH --mem=0" >> $cfile

echo "" >>$cfile
echo "export OMP_NUM_THREADS=$threads" >> $cfile
echo "" >>$cfile
echo ". "$(awk '{ print $1, $2, $3, $4, $5, $6, $7, $8, $9 }' $regdir/regression_var.out) >>$cfile
echo "" >>$cfile

echo "source /lustre/f2/dev/role.epic/contrib/Lmod_init.sh" >> $cfile
echo "module use $gsisrc/modulefiles" >> $cfile
echo "module load gsi_gaea" >> $cfile
echo "module list" >> $cfile
echo "" >>$cfile

cat $exec >> $cfile

if [[ $nosub = YES ]];then
cat $cfile
exit
elif [[ $verbose = YES ]];then
set -x
cat $cfile
fi

if [[ $stdin = YES ]];then
cat
fi >>$cfile
if [[ $nosub = YES ]];then
cat $cfile
exit
elif [[ $verbose = YES ]];then
set -x
cat $cfile
fi
sbatch=${sbatch:-sbatch}

ofile=$DATA/subout$$
>$ofile
chmod 777 $ofile
$sbatch $cfile >$ofile
rc=$?
cat $ofile
if [[ -w $SUBLOG ]];then
jobn=$(grep -i submitted $ofile|head -n1|cut -d\" -f2)
date -u +"%Y%m%d%H%M%S : $subcmd : $jobn" >>$SUBLOG
fi
#rm $cfile $ofile
#[[ $MKDATA = YES ]] && rmdir $DATA
exit $rc

0 comments on commit ff26a62

Please sign in to comment.