From fc5913d176b3b7215c67486e614b7d1f5ab1056b Mon Sep 17 00:00:00 2001
From: John Halley Gotway <johnhg@ucar.edu>
Date: Tue, 30 Jan 2024 16:14:36 -0700
Subject: [PATCH 1/6] Per #2805, update STATAnalysisJob::dump_stat_line() to
 take an optional argument to indicate whether the set_hdr options should be
 applied. Update Stat-Analysis to only call dump_stat_line() with set_hdr for
 filter jobs.

---
 src/libcode/vx_analysis_util/stat_job.cc      | 51 +++++++++++++++----
 src/libcode/vx_analysis_util/stat_job.h       |  7 ++-
 .../core/stat_analysis/stat_analysis_job.cc   |  2 +-
 3 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/src/libcode/vx_analysis_util/stat_job.cc b/src/libcode/vx_analysis_util/stat_job.cc
index 515200d984..ea781a5fc0 100644
--- a/src/libcode/vx_analysis_util/stat_job.cc
+++ b/src/libcode/vx_analysis_util/stat_job.cc
@@ -1450,14 +1450,6 @@ void STATAnalysisJob::parse_job_command(const char *jobstring) {
          i+=2;
       }
       else if(jc_array[i] == "-set_hdr") {
-         n = METHdrTable.header(met_version, "STAT", na_str)->col_offset(to_upper(jc_array[i+1]).c_str());
-         if(is_bad_data(n)) {
-            mlog << Error << "\nSTATAnalysisJob::parse_job_command() -> "
-                 << "no match found for header column named: \""
-                 << to_upper((string)jc_array[i+1]) << "\"\n\n";
-            if(line) { delete [] line; line = (char *) 0; }
-            throw(1);
-         }
          hdr_name.add_css(to_upper(jc_array[i+1]));
          hdr_value.add_css(jc_array[i+2]);
          i+=2;
@@ -1650,6 +1642,28 @@ void STATAnalysisJob::parse_job_command(const char *jobstring) {
 
    } // end for
 
+   // Validate set_hdr column names
+   if(hdr_name.n() > 0) {
+
+      string lt_str = (line_type.n() == 1 ?
+                       line_type[0] : na_str);
+
+      for(i=0; i<hdr_name.n(); i++) {
+
+         n = METHdrTable.header(met_version, "STAT", lt_str.c_str())->
+                col_offset(hdr_name[i].c_str());
+
+         if(is_bad_data(n)) {
+            mlog << Error << "\nSTATAnalysisJob::parse_job_command() -> "
+                 << "no match found for "
+                 << (line_type.n() == 1 ? line_type[0] : "header")
+                 << " column named \"" << hdr_name[i] << "\"\n\n";
+            if(line) { delete [] line; line = (char *) 0; }
+            throw(1);
+         }
+      } // end for
+   }
+
    // Expand out_eclv_points
    if(out_eclv_points.n() == 1) {
       for(i=2; i*out_eclv_points[0] < 1.0; i++) out_eclv_points.add(i*out_eclv_points[0]);
@@ -2130,7 +2144,8 @@ void STATAnalysisJob::close_stat_file() {
 
 ////////////////////////////////////////////////////////////////////////
 
-void STATAnalysisJob::dump_stat_line(const STATLine &line) {
+void STATAnalysisJob::dump_stat_line(const STATLine &line,
+                                     bool do_set_hdr) {
    int i;
 
    //
@@ -2268,8 +2283,24 @@ void STATAnalysisJob::dump_stat_line(const STATLine &line) {
    // Store the data line
    //
    for(i=0; i<line.n_items(); i++) {
-     dump_at.set_entry(n_dump%dump_at.nrows(), i, (string)line.get_item(i));
+     dump_at.set_entry(n_dump%dump_at.nrows(), i,
+                       (string) line.get_item(i));
+   }
+
+   //
+   // Apply -set_hdr options, if requested
+   //
+   if(do_set_hdr) {
+      for(i=0; i<hdr_name.n(); i++) {
+         dump_at.set_entry(n_dump%dump_at.nrows(),
+                           line.get_offset(hdr_name[i].c_str()),
+                           (string) hdr_value[i]);
+      }
    }
+
+   //
+   // Increment the counter
+   //
    n_dump++;
 
    //
diff --git a/src/libcode/vx_analysis_util/stat_job.h b/src/libcode/vx_analysis_util/stat_job.h
index d00d969b30..c91a1f47f4 100644
--- a/src/libcode/vx_analysis_util/stat_job.h
+++ b/src/libcode/vx_analysis_util/stat_job.h
@@ -143,11 +143,14 @@ class STATAnalysisJob {
       void set_boot_rng (const char *);
       void set_boot_seed(const char *);
 
-      void set_perc_thresh(const NumArray &, const NumArray &, const NumArray &);
+      void set_perc_thresh(const NumArray &,
+                           const NumArray &,
+                           const NumArray &);
 
       void open_dump_row_file ();
       void close_dump_row_file();
-      void dump_stat_line     (const STATLine &);
+      void dump_stat_line(const STATLine &,
+                          bool do_set_hdr = false);
 
       void open_stat_file ();
       void setup_stat_file(int n_row, int n);
diff --git a/src/tools/core/stat_analysis/stat_analysis_job.cc b/src/tools/core/stat_analysis/stat_analysis_job.cc
index ea6322a2a5..476b5a2739 100644
--- a/src/tools/core/stat_analysis/stat_analysis_job.cc
+++ b/src/tools/core/stat_analysis/stat_analysis_job.cc
@@ -321,7 +321,7 @@ void do_job_filter(const ConcatString &jobstring, LineDataFile &f,
 
       if(job.is_keeper(line)) {
 
-         job.dump_stat_line(line);
+         job.dump_stat_line(line, true);
 
          n_out++;
       }

From f3301c528706ea8e9c2c9a03d694294ced8a709a Mon Sep 17 00:00:00 2001
From: John Halley Gotway <johnhg@ucar.edu>
Date: Tue, 30 Jan 2024 16:43:49 -0700
Subject: [PATCH 2/6] Per #2805, update User's Guide with -set_hdr info.

---
 docs/Users_Guide/config_options.rst | 63 ++++++++++++++++++-----------
 docs/Users_Guide/stat-analysis.rst  | 14 +++----
 2 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/docs/Users_Guide/config_options.rst b/docs/Users_Guide/config_options.rst
index 7a11bcfe3a..067144775b 100644
--- a/docs/Users_Guide/config_options.rst
+++ b/docs/Users_Guide/config_options.rst
@@ -3774,9 +3774,18 @@ Where "job_name" is set to one of the following:
   criteria specified below and using the optional arguments below.
   The output STAT lines are written to the file specified using the
   "-dump_row" argument.
+
   Required Args: -dump_row
 
-|    
+  Optional Args:
+
+  .. code-block:: none
+
+    -set_hdr column_name value
+       May be used multiple times to override data written to the
+       output dump_row file.
+
+|
 
 * "summary"
   
@@ -3805,8 +3814,8 @@ Where "job_name" is set to one of the following:
 	   
   * Format the -column option as LINE_TYPE:COLUMN.
 
-|     
-    
+|
+
   Use the -derive job command option to automatically derive
   statistics on the fly from input contingency tables and partial
   sums.
@@ -3832,10 +3841,14 @@ Where "job_name" is set to one of the following:
 
   .. code-block:: none
 
-    -by column_name to specify case information
-    -out_alpha to override default alpha value of 0.05
-    -derive to derive statistics on the fly
-    -column_union to summarize multiple columns
+    -by column_name
+       To specify case information.
+    -out_alpha
+       To override the default alpha value.
+    -derive
+       To derive statistics on the fly.
+    -column_union
+       To summarize multiple columns.
 
 * "aggregate"
   
@@ -3852,8 +3865,8 @@ Where "job_name" is set to one of the following:
                ISC, ECNT, RPS, RHIST, PHIST, RELP, SSVAR
 	       
   Required Args: -line_type
-  
-| 
+
+|
 
 * "aggregate_stat"
   
@@ -3887,8 +3900,8 @@ Where "job_name" is set to one of the following:
   .. code-block:: none
 
     -out_thresh or -out_fcst_thresh and -out_obs_thresh
-     When -out_line_type FHO, CTC, CTS, MCTC, MCTS,
-                         PCT, PSTD, PJC, PRC
+       When -out_line_type FHO, CTC, CTS, MCTC, MCTS,
+                           PCT, PSTD, PJC, PRC
 
   Additional Optional Args for -line_type MPR:
 
@@ -3901,14 +3914,14 @@ Where "job_name" is set to one of the following:
     -out_obs_wind_thresh
     -out_wind_logic
     When -out_line_type WDIR
-	    
+
   Additional Optional Arg for:
 
   .. code-block:: none
 
     -line_type ORANK -out_line_type PHIST, SSVAR ...
     -out_bin_size
-	    
+
   Additional Optional Args for:
 
   .. code-block:: none
@@ -3917,14 +3930,14 @@ Where "job_name" is set to one of the following:
     -out_eclv_points
 
 * "ss_index"
-  
+
   The skill score index job can be configured to compute a weighted
   average of skill scores derived from a configurable set of
   variables, levels, lead times, and statistics. The skill score
   index is computed using two models, a forecast model and a
   reference model. For each statistic in the index, a skill score
   is computed as:
-  
+
   SS = 1 - (S[model]*S[model])/(S[reference]*S[reference])
 
   Where S is the statistic.
@@ -4135,17 +4148,19 @@ Where "job_name" is set to one of the following:
     "-rank_corr_flag  value"
     "-vif_flag        value"
 
-  For aggregate and aggregate_stat job types:
-
   .. code-block:: none
 
-    "-out_stat        path"   to write a .stat output file for the job
-                              including the .stat header columns. Multiple
-                              values for each header column are written as
-                              a comma-separated list.
-    "-set_hdr col_name value" may be used multiple times to explicity
-                              specify what should be written to the header
-                              columns of the output .stat file.
+    -out_stat path
+       To write a .stat output file for aggregate and aggregate_stat jobs
+       including the .stat header columns. Multiple input values for each
+       header column are written to the output as a comma-separated list
+       of unique values.
+
+    -set_hdr col_name value
+       May be used multiple times to explicity specify what should be
+       written to the header columns of the output .stat file for
+       aggregate and aggregate_stat jobs or output dump_row file
+       for filter jobs.
 
   When using the "-by" job command option, you may reference those columns
   in the "-set_hdr" job command options. For example, when computing statistics
diff --git a/docs/Users_Guide/stat-analysis.rst b/docs/Users_Guide/stat-analysis.rst
index 0a5c7ec842..92672edc26 100644
--- a/docs/Users_Guide/stat-analysis.rst
+++ b/docs/Users_Guide/stat-analysis.rst
@@ -604,7 +604,7 @@ The Stat-Analysis tool supports several additional job command options which may
 This job command option is extremely useful. It can be used multiple times to specify a list of STAT header column names. When reading each input line, the Stat-Analysis tool concatenates together the entries in the specified columns and keeps track of the unique cases. It applies the logic defined for that job to each unique subset of data. For example, if your output was run over many different model names and masking regions, specify **-by MODEL,VX_MASK** to get output for each unique combination rather than having to run many very similar jobs.
 
 .. code-block:: none
-		
+
   -column_min     col_name value
   -column_max     col_name value
   -column_eq      col_name value
@@ -615,30 +615,30 @@ This job command option is extremely useful. It can be used multiple times to sp
 The column filtering options may be used when the **-line_type** has been set to a single value. These options take two arguments, the name of the data column to be used followed by a value, string, or threshold to be applied. If multiple column_min/max/eq/thresh/str options are listed, the job will be performed on their intersection. Each input line is only retained if its value meets the numeric filtering criteria defined, matches one of the strings defined by the **-column_str** option, or does not match any of the string defined by the **-column_str_exc** option. Multiple filtering strings may be listed using commas. Defining thresholds in MET is described in :numref:`config_options`.
 
 .. code-block:: none
-		
+
   -dump_row file
 
 Each analysis job is performed over a subset of the input data. Filtering the input data down to a desired subset is often an iterative process. The **-dump_row** option may be used for each job to specify the name of an output file to which the exact subset of data used for that job will be written. When initially constructing Stat-Analysis jobs, users are strongly encouraged to use the option and check its contents to ensure that the analysis was actually done over the intended subset.
 
 .. code-block:: none
-		
+
   -out_line_type name
 
 This option specifies the desired output line type(s) for the **aggregate_stat** job type.
 
 .. code-block:: none
-		
+
   -out_stat file
   -set_hdr  col_name string
 
 The Stat-Analysis tool writes its output to either the log file or the file specified using the **-out** command line option. However the **aggregate** and **aggregate_stat** jobs create STAT output lines and the standard output written lacks the full set of STAT header columns. The **-out_stat** job command option may be used for these jobs to specify the name of an output file to which full STAT output lines should be written. When the **-out_stat** job command option is used for **aggregate** and **aggregate_stat** jobs the output is sent to the **-out_stat** file instead of the log or **-out** file.
 
-Jobs will often combine output with multiple entries in the header columns. For example, a job may aggregate output with three different values in the **VX_MASK** column, such as "mask1", "mask2", and "mask3". The output **VX_MASK** column will contain the unique values encountered concatenated together with commas: "mask1,mask2,mask3". Alternatively, the **-set_hdr** option may be used to specify what should be written to the output header columns, such as "-set_hdr VX_MASK all_three_masks".
+Jobs will often combine output with multiple entries in the header columns. For example, a job may aggregate output with three different values in the **VX_MASK** column, such as "mask1", "mask2", and "mask3". The output **VX_MASK** column will contain the unique values encountered concatenated together with commas: "mask1,mask2,mask3". Alternatively, the **-set_hdr** option may be used to specify what should be written to the output header columns, such as "-set_hdr VX_MASK all_three_masks". When **-set_hdr** is specified for **filter** jobs, it controls what is written to the **-dump_row** output file.
 
 When using the "-out_stat" option to create a .stat output file and stratifying results using one or more "-by" job command options, those columns may be referenced in the "-set_hdr" option. When using multiple "-by" options, use "CASE" to reference the full case information string:
 
 .. code-block:: none
-		
+
   -job aggregate_stat -line_type MPR -out_line_type CNT -by FCST_VAR,OBS_SID \
   -set_hdr VX_MASK OBS_SID -set_hdr DESC CASE
 
@@ -662,7 +662,7 @@ When processing input MPR lines, these options may be used to define a masking g
 When processing input MPR lines, these options are used to define the forecast, observation, or both thresholds to be applied when computing statistics. For categorical output line types (FHO, CTC, CTS, MCTC, MCTS) these define the categorical thresholds. For continuous output line types (SL1L2, SAL1L2, CNT), these define the continuous filtering thresholds and **-out_cnt_logic** defines how the forecast and observed logic should be combined.
 
 .. code-block:: none
-		
+
   -out_fcst_wind_thresh thresh
   -out_obs_wind_thresh  thresh
   -out_wind_thresh      thresh

From e8bb8cc43762f5020448dae8bda40752d5c6c094 Mon Sep 17 00:00:00 2001
From: John Halley Gotway <johnhg@ucar.edu>
Date: Tue, 30 Jan 2024 16:47:37 -0700
Subject: [PATCH 3/6] Per #2805, the -set_hdr option doesn't apply to tc_stat

---
 docs/Users_Guide/config_options.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/Users_Guide/config_options.rst b/docs/Users_Guide/config_options.rst
index 067144775b..d4bdc04a16 100644
--- a/docs/Users_Guide/config_options.rst
+++ b/docs/Users_Guide/config_options.rst
@@ -3770,8 +3770,8 @@ Where "job_name" is set to one of the following:
 
 * "filter"
   
-  To filter out the STAT or TCMPR lines matching the job filtering
-  criteria specified below and using the optional arguments below.
+  To filter out the STAT lines matching the job filtering criteria
+  specified below and using the optional arguments below.
   The output STAT lines are written to the file specified using the
   "-dump_row" argument.
 

From aa3e14ec8677c983f57337f3cd63863857ff69a9 Mon Sep 17 00:00:00 2001
From: John Halley Gotway <johnhg@ucar.edu>
Date: Thu, 1 Feb 2024 08:27:04 -0700
Subject: [PATCH 4/6] Per #2805, modify one Stat-Analysis filter job to
 demonstrate that the -set_hdr job command option is now applied to the
 -dump_row output.

---
 internal/test_unit/xml/unit_stat_analysis_ps.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/internal/test_unit/xml/unit_stat_analysis_ps.xml b/internal/test_unit/xml/unit_stat_analysis_ps.xml
index 87884061ea..9fd50dcb2a 100644
--- a/internal/test_unit/xml/unit_stat_analysis_ps.xml
+++ b/internal/test_unit/xml/unit_stat_analysis_ps.xml
@@ -76,6 +76,7 @@
       -job filter -line_type MPR -fcst_var TMP -fcst_lev Z2 -vx_mask DTC165 \
       -column_str     OBS_SID KDLN,KDHT,KDEN,KDLS,KDMA,KDMN,KDVT,KDEW \
       -column_str_exc OBS_SID KDLN,KDHT \
+      -set_hdr DESC FILTER_OBS_SID \
       -dump_row &OUTPUT_DIR;/stat_analysis_ps/POINT_STAT_FILTER_OBS_SID.stat \
       -v 1
     </param>

From a9f61cf833eb730fb1b0d50072b753304c080028 Mon Sep 17 00:00:00 2001
From: MET Tools Test Account <met_test@seneca.rap.ucar.edu>
Date: Fri, 2 Feb 2024 18:02:11 +0000
Subject: [PATCH 5/6] Per #2805, enhance the R differencing logic to handle NA
 strings in the header columns. Previously, differences that include the NA
 string were ignored. This enhances the compareStatLty() function to handle NA
 strings properly.

---
 internal/test_unit/R_test/test_util.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/internal/test_unit/R_test/test_util.R b/internal/test_unit/R_test/test_util.R
index 3d4a2d3f9b..72f9ee37aa 100644
--- a/internal/test_unit/R_test/test_util.R
+++ b/internal/test_unit/R_test/test_util.R
@@ -383,7 +383,9 @@ compareStatLty = function(stat1, stat2, lty, verb=0, strict=0){
 	# compare the information in the header columns
 	for(intCol in 2:21){
 		listMatch = apply(data.frame(dfV1[,intCol], dfV2[,intCol]), 1,
-				function(a){ a[1] == a[2] });
+	  			  function(a){ same = (a[1] == a[2]) | (is.na(a[1]) & is.na(a[2]));
+				               same[is.na(same)] = FALSE;
+				               return(same); });
 		intNumDiff = sum( !listMatch[ !is.na(listMatch) ] );
 		if( 0 < intNumDiff ){
 			if( 1 <= verb ){

From 6c53a133e02f9eea7e2cb709e75dec8089ffd3b8 Mon Sep 17 00:00:00 2001
From: MET Tools Test Account <met_test@seneca.rap.ucar.edu>
Date: Fri, 2 Feb 2024 18:10:09 +0000
Subject: [PATCH 6/6] Per #2805, no real code change. Just whitespace.

---
 internal/test_unit/R_test/test_util.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/internal/test_unit/R_test/test_util.R b/internal/test_unit/R_test/test_util.R
index 72f9ee37aa..dfc869cb60 100644
--- a/internal/test_unit/R_test/test_util.R
+++ b/internal/test_unit/R_test/test_util.R
@@ -383,9 +383,10 @@ compareStatLty = function(stat1, stat2, lty, verb=0, strict=0){
 	# compare the information in the header columns
 	for(intCol in 2:21){
 		listMatch = apply(data.frame(dfV1[,intCol], dfV2[,intCol]), 1,
-	  			  function(a){ same = (a[1] == a[2]) | (is.na(a[1]) & is.na(a[2]));
-				               same[is.na(same)] = FALSE;
-				               return(same); });
+				function(a){
+					same = (a[1] == a[2]) | (is.na(a[1]) & is.na(a[2]));
+					same[is.na(same)] = FALSE;
+					return(same); });
 		intNumDiff = sum( !listMatch[ !is.na(listMatch) ] );
 		if( 0 < intNumDiff ){
 			if( 1 <= verb ){