From c03ac54fbf82cc7458a0a6594b655c1dd86d4e37 Mon Sep 17 00:00:00 2001
From: Ed Savage <ed.savage@elastic.co>
Date: Thu, 16 Aug 2018 16:34:05 +0100
Subject: [PATCH 1/2] [ML] Remove old per-partition normalization code

Per-partition normalization is an old, undocumented feature that was
never used by clients. It has been superseded by per-partition maximum
scoring (see #32748).

This PR removes the now redundant code.

Relates https://github.com/elastic/elasticsearch/pull/32816
---
 bin/autodetect/CCmdLineParser.cc              |  6 --
 bin/autodetect/CCmdLineParser.h               |  1 -
 bin/autodetect/Main.cc                        |  9 +--
 bin/normalize/CCmdLineParser.cc               |  8 +-
 bin/normalize/CCmdLineParser.h                |  3 +-
 bin/normalize/Main.cc                         |  9 +--
 include/api/CHierarchicalResultsWriter.h      | 10 +--
 include/api/CJsonOutputWriter.h               |  7 --
 include/api/CResultNormalizer.h               |  9 ---
 include/model/CAnomalyDetectorModelConfig.h   |  9 ---
 lib/api/CHierarchicalResultsWriter.cc         | 29 --------
 lib/api/CJsonOutputWriter.cc                  | 47 ------------
 lib/api/CResultNormalizer.cc                  | 35 +--------
 lib/api/unittest/CJsonOutputWriterTest.cc     | 74 -------------------
 lib/api/unittest/CJsonOutputWriterTest.h      |  1 -
 lib/model/CAnomalyDetectorModelConfig.cc      | 14 +---
 lib/model/CHierarchicalResultsNormalizer.cc   |  3 +-
 .../CAnomalyDetectorModelConfigTest.cc        |  2 -
 18 files changed, 15 insertions(+), 261 deletions(-)

diff --git a/bin/autodetect/CCmdLineParser.cc b/bin/autodetect/CCmdLineParser.cc
index 2f48cbbeb7..5fa131cffe 100644
--- a/bin/autodetect/CCmdLineParser.cc
+++ b/bin/autodetect/CCmdLineParser.cc
@@ -53,7 +53,6 @@ bool CCmdLineParser::parse(int argc,
                            std::size_t& bucketResultsDelay,
                            bool& multivariateByFields,
                            std::string& multipleBucketspans,
-                           bool& perPartitionNormalization,
                            TStrVec& clauseTokens) {
     try {
         boost::program_options::options_description desc(DESCRIPTION);
@@ -119,8 +118,6 @@ bool CCmdLineParser::parse(int argc,
                         "Optional flag to enable multi-variate analysis of correlated by fields")
             ("multipleBucketspans",  boost::program_options::value<std::string>(),
                         "Optional comma-separated list of additional bucketspans - must be direct multiples of the main bucketspan")
-            ("perPartitionNormalization",
-                        "Optional flag to enable per partition normalization")
         ;
         // clang-format on
 
@@ -237,9 +234,6 @@ bool CCmdLineParser::parse(int argc,
         if (vm.count("multipleBucketspans") > 0) {
             multipleBucketspans = vm["multipleBucketspans"].as<std::string>();
         }
-        if (vm.count("perPartitionNormalization") > 0) {
-            perPartitionNormalization = true;
-        }
 
         boost::program_options::collect_unrecognized(
             parsed.options, boost::program_options::include_positional)
diff --git a/bin/autodetect/CCmdLineParser.h b/bin/autodetect/CCmdLineParser.h
index cb80c82075..ed8ec94dfd 100644
--- a/bin/autodetect/CCmdLineParser.h
+++ b/bin/autodetect/CCmdLineParser.h
@@ -65,7 +65,6 @@ class CCmdLineParser {
                       std::size_t& bucketResultsDelay,
                       bool& multivariateByFields,
                       std::string& multipleBucketspans,
-                      bool& perPartitionNormalization,
                       TStrVec& clauseTokens);
 
 private:
diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc
index dcf905e0a6..930399fcdf 100644
--- a/bin/autodetect/Main.cc
+++ b/bin/autodetect/Main.cc
@@ -89,7 +89,6 @@ int main(int argc, char** argv) {
     std::size_t bucketResultsDelay(0);
     bool multivariateByFields(false);
     std::string multipleBucketspans;
-    bool perPartitionNormalization(false);
     TStrVec clauseTokens;
     if (ml::autodetect::CCmdLineParser::parse(
             argc, argv, limitConfigFile, modelConfigFile, fieldConfigFile,
@@ -97,10 +96,9 @@ int main(int argc, char** argv) {
             summaryCountFieldName, delimiter, lengthEncodedInput, timeField,
             timeFormat, quantilesStateFile, deleteStateFiles, persistInterval,
             maxQuantileInterval, inputFileName, isInputFileNamedPipe, outputFileName,
-            isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe,
-            persistFileName, isPersistFileNamedPipe, maxAnomalyRecords, memoryUsage,
-            bucketResultsDelay, multivariateByFields, multipleBucketspans,
-            perPartitionNormalization, clauseTokens) == false) {
+            isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, persistFileName,
+            isPersistFileNamedPipe, maxAnomalyRecords, memoryUsage, bucketResultsDelay,
+            multivariateByFields, multipleBucketspans, clauseTokens) == false) {
         return EXIT_FAILURE;
     }
 
@@ -148,7 +146,6 @@ int main(int argc, char** argv) {
         ml::model::CAnomalyDetectorModelConfig::defaultConfig(
             bucketSpan, summaryMode, summaryCountFieldName, latency,
             bucketResultsDelay, multivariateByFields, multipleBucketspans);
-    modelConfig.perPartitionNormalization(perPartitionNormalization);
     modelConfig.detectionRules(ml::model::CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMapCRef(
         fieldConfig.detectionRules()));
     modelConfig.scheduledEvents(ml::model::CAnomalyDetectorModelConfig::TStrDetectionRulePrVecCRef(
diff --git a/bin/normalize/CCmdLineParser.cc b/bin/normalize/CCmdLineParser.cc
index 4bb00e6ea7..30162d6bdb 100644
--- a/bin/normalize/CCmdLineParser.cc
+++ b/bin/normalize/CCmdLineParser.cc
@@ -30,8 +30,7 @@ bool CCmdLineParser::parse(int argc,
                            bool& isOutputFileNamedPipe,
                            std::string& quantilesState,
                            bool& deleteStateFiles,
-                           bool& writeCsv,
-                           bool& perPartitionNormalization) {
+                           bool& writeCsv) {
     try {
         boost::program_options::options_description desc(DESCRIPTION);
         // clang-format off
@@ -60,8 +59,6 @@ bool CCmdLineParser::parse(int argc,
                         "If this flag is set then delete the normalizer state files once they have been read")
             ("writeCsv",
                         "Write the results in CSV format (default is lineified JSON)")
-            ("perPartitionNormalization",
-                        "Optional flag to enable per partition normalization")
         ;
         // clang-format on
 
@@ -114,9 +111,6 @@ bool CCmdLineParser::parse(int argc,
         if (vm.count("writeCsv") > 0) {
             writeCsv = true;
         }
-        if (vm.count("perPartitionNormalization") > 0) {
-            perPartitionNormalization = true;
-        }
     } catch (std::exception& e) {
         std::cerr << "Error processing command line: " << e.what() << std::endl;
         return false;
diff --git a/bin/normalize/CCmdLineParser.h b/bin/normalize/CCmdLineParser.h
index 4268165918..b67c98fc2a 100644
--- a/bin/normalize/CCmdLineParser.h
+++ b/bin/normalize/CCmdLineParser.h
@@ -43,8 +43,7 @@ class CCmdLineParser {
                       bool& isOutputFileNamedPipe,
                       std::string& quantilesState,
                       bool& deleteStateFiles,
-                      bool& writeCsv,
-                      bool& perPartitionNormalization);
+                      bool& writeCsv);
 
 private:
     static const std::string DESCRIPTION;
diff --git a/bin/normalize/Main.cc b/bin/normalize/Main.cc
index b2a9ed359e..3f6ee0cb71 100644
--- a/bin/normalize/Main.cc
+++ b/bin/normalize/Main.cc
@@ -54,12 +54,10 @@ int main(int argc, char** argv) {
     std::string quantilesStateFile;
     bool deleteStateFiles(false);
     bool writeCsv(false);
-    bool perPartitionNormalization(false);
     if (ml::normalize::CCmdLineParser::parse(
-            argc, argv, modelConfigFile, logProperties, logPipe, bucketSpan,
-            lengthEncodedInput, inputFileName, isInputFileNamedPipe,
-            outputFileName, isOutputFileNamedPipe, quantilesStateFile,
-            deleteStateFiles, writeCsv, perPartitionNormalization) == false) {
+            argc, argv, modelConfigFile, logProperties, logPipe, bucketSpan, lengthEncodedInput,
+            inputFileName, isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe,
+            quantilesStateFile, deleteStateFiles, writeCsv) == false) {
         return EXIT_FAILURE;
     }
 
@@ -93,7 +91,6 @@ int main(int argc, char** argv) {
         LOG_FATAL(<< "Ml model config file '" << modelConfigFile << "' could not be loaded");
         return EXIT_FAILURE;
     }
-    modelConfig.perPartitionNormalization(perPartitionNormalization);
 
     // There's a choice of input and output formats for the numbers to be normalised
     using TInputParserUPtr = std::unique_ptr<ml::api::CInputParser>;
diff --git a/include/api/CHierarchicalResultsWriter.h b/include/api/CHierarchicalResultsWriter.h
index 18872150df..8e88aeaa87 100644
--- a/include/api/CHierarchicalResultsWriter.h
+++ b/include/api/CHierarchicalResultsWriter.h
@@ -51,12 +51,7 @@ class API_EXPORT CHierarchicalResultsWriter : public model::CHierarchicalResults
     using TStr1Vec = core::CSmallVector<std::string, 1>;
 
 public:
-    enum EResultType {
-        E_SimpleCountResult,
-        E_PopulationResult,
-        E_PartitionResult,
-        E_Result
-    };
+    enum EResultType { E_SimpleCountResult, E_PopulationResult, E_Result };
     //! Type which wraps up the results of anomaly detection.
     struct API_EXPORT SResults {
         //! Construct for population results
@@ -168,9 +163,6 @@ class API_EXPORT CHierarchicalResultsWriter : public model::CHierarchicalResults
     //! pivot.
     void writePivotResult(const model::CHierarchicalResults& results, const TNode& node);
 
-    //! Write partition result if \p node is a partition level result
-    void writePartitionResult(const model::CHierarchicalResults& results, const TNode& node);
-
     //! Write out a simple count result if \p node is simple
     //! count.
     void writeSimpleCountResult(const TNode& node);
diff --git a/include/api/CJsonOutputWriter.h b/include/api/CJsonOutputWriter.h
index f83e24ca72..5a4db20daa 100644
--- a/include/api/CJsonOutputWriter.h
+++ b/include/api/CJsonOutputWriter.h
@@ -162,9 +162,6 @@ class API_EXPORT CJsonOutputWriter : public COutputHandler {
         // when the number to write is limited
         double s_LowestBucketInfluencerScore;
 
-        //! Partition scores
-        TDocumentWeakPtrVec s_PartitionScoreDocuments;
-
         //! scheduled event descriptions
         TStr1Vec s_ScheduledEventDescriptions;
     };
@@ -304,10 +301,6 @@ class API_EXPORT CJsonOutputWriter : public COutputHandler {
     void addInfluences(const CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec& influenceResults,
                        TDocumentWeakPtr weakDoc);
 
-    //! Write partition score & probability
-    void addPartitionScores(const CHierarchicalResultsWriter::TResults& results,
-                            TDocumentWeakPtr weakDoc);
-
 private:
     //! The job ID
     std::string m_JobId;
diff --git a/include/api/CResultNormalizer.h b/include/api/CResultNormalizer.h
index 91296d3a07..d439d7b3d2 100644
--- a/include/api/CResultNormalizer.h
+++ b/include/api/CResultNormalizer.h
@@ -93,15 +93,6 @@ class API_EXPORT CResultNormalizer {
                          std::string& valueFieldName,
                          double& probability);
 
-    bool parseDataFields(const TStrStrUMap& dataRowFields,
-                         std::string& level,
-                         std::string& partition,
-                         std::string& partitionValue,
-                         std::string& person,
-                         std::string& function,
-                         std::string& valueFieldName,
-                         double& probability);
-
     template<typename T>
     bool parseDataField(const TStrStrUMap& dataRowFields,
                         const std::string& fieldName,
diff --git a/include/model/CAnomalyDetectorModelConfig.h b/include/model/CAnomalyDetectorModelConfig.h
index d6764b5561..035ba5baf2 100644
--- a/include/model/CAnomalyDetectorModelConfig.h
+++ b/include/model/CAnomalyDetectorModelConfig.h
@@ -424,12 +424,6 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig {
     const TDoubleDoublePrVec& normalizedScoreKnotPoints() const;
     //@}
 
-    //! Check if we should create one normalizer per partition field value.
-    bool perPartitionNormalization() const;
-
-    //! Set whether we should create one normalizer per partition field value.
-    void perPartitionNormalization(bool value);
-
     //! Sets the reference to the detection rules map
     void detectionRules(TIntDetectionRuleVecUMapCRef detectionRules);
 
@@ -500,9 +494,6 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig {
     //! and the normalized anomaly score with these knot points.
     //! \see DEFAULT_NORMALIZED_SCORE_KNOT_POINTS for details.
     TDoubleDoublePrVec m_NormalizedScoreKnotPoints;
-
-    //! If true then create one normalizer per partition field value.
-    bool m_PerPartitionNormalisation;
     //@}
 
     //! A reference to the map containing detection rules per
diff --git a/lib/api/CHierarchicalResultsWriter.cc b/lib/api/CHierarchicalResultsWriter.cc
index 3daafb4cd0..5a844e9aea 100644
--- a/lib/api/CHierarchicalResultsWriter.cc
+++ b/lib/api/CHierarchicalResultsWriter.cc
@@ -126,7 +126,6 @@ void CHierarchicalResultsWriter::visit(const model::CHierarchicalResults& result
     } else {
         this->writePopulationResult(results, node);
         this->writeIndividualResult(results, node);
-        this->writePartitionResult(results, node);
         this->writeSimpleCountResult(node);
     }
 }
@@ -258,34 +257,6 @@ void CHierarchicalResultsWriter::writeIndividualResult(const model::CHierarchica
         node.s_Spec.s_Detector, node.s_BucketLength, EMPTY_STRING_LIST));
 }
 
-void CHierarchicalResultsWriter::writePartitionResult(const model::CHierarchicalResults& results,
-                                                      const TNode& node) {
-    if (!m_ModelConfig.perPartitionNormalization() || this->isSimpleCount(node) ||
-        this->isPopulation(node) || !this->isPartition(node) ||
-        !this->shouldWriteResult(m_Limits, results, node, false)) {
-        return;
-    }
-
-    model_t::EFeature feature =
-        node.s_AnnotatedProbability.s_AttributeProbabilities.empty()
-            ? model_t::E_IndividualCountByBucketAndPerson
-            : node.s_AnnotatedProbability.s_AttributeProbabilities[0].s_Feature;
-
-    TDouble1Vec emptyDoubleVec;
-
-    m_ResultWriterFunc(TResults(
-        E_PartitionResult, *node.s_Spec.s_PartitionFieldName,
-        *node.s_Spec.s_PartitionFieldValue, *node.s_Spec.s_ByFieldName,
-        *node.s_Spec.s_PersonFieldValue, EMPTY_STRING, node.s_BucketStartTime,
-        *node.s_Spec.s_FunctionName, model_t::outputFunctionName(feature),
-        node.s_AnnotatedProbability.s_BaselineBucketCount,
-        node.s_AnnotatedProbability.s_CurrentBucketCount, emptyDoubleVec, emptyDoubleVec,
-        node.s_RawAnomalyScore, node.s_NormalizedAnomalyScore, node.probability(),
-        *node.s_Spec.s_ValueFieldName, node.s_AnnotatedProbability.s_Influences,
-        node.s_Spec.s_UseNull, model::function_t::isMetric(node.s_Spec.s_Function),
-        node.s_Spec.s_Detector, node.s_BucketLength, EMPTY_STRING_LIST));
-}
-
 void CHierarchicalResultsWriter::writePivotResult(const model::CHierarchicalResults& results,
                                                   const TNode& node) {
     if (this->isSimpleCount(node) ||
diff --git a/lib/api/CJsonOutputWriter.cc b/lib/api/CJsonOutputWriter.cc
index 0acdd8e92c..a075f2639e 100644
--- a/lib/api/CJsonOutputWriter.cc
+++ b/lib/api/CJsonOutputWriter.cc
@@ -70,7 +70,6 @@ const std::string EXAMPLES("examples");
 const std::string BUCKET_SPAN("bucket_span");
 const std::string PROCESSING_TIME("processing_time_ms");
 const std::string TIME_INFLUENCER("bucket_time");
-const std::string PARTITION_SCORES("partition_scores");
 const std::string SCHEDULED_EVENTS("scheduled_events");
 const std::string QUANTILES("quantiles");
 
@@ -191,14 +190,6 @@ bool CJsonOutputWriter::acceptResult(const CHierarchicalResultsWriter::TResults&
         return true;
     }
 
-    if (results.s_ResultType == CHierarchicalResultsWriter::E_PartitionResult) {
-        TDocumentWeakPtr partitionDoc = m_Writer.makeStorableDoc();
-        this->addPartitionScores(results, partitionDoc);
-        bucketData.s_PartitionScoreDocuments.push_back(partitionDoc);
-
-        return true;
-    }
-
     ++bucketData.s_RecordCount;
 
     TDocumentWeakPtrIntPrVec& detectorDocumentsToWrite = bucketData.s_DocumentsToWrite;
@@ -513,26 +504,6 @@ void CJsonOutputWriter::writeBucket(bool isInterim,
         m_Writer.EndArray();
     }
 
-    if (!bucketData.s_PartitionScoreDocuments.empty()) {
-        // Write the array of partition-anonaly score pairs
-        m_Writer.String(PARTITION_SCORES);
-        m_Writer.StartArray();
-        for (TDocumentWeakPtrVecItr partitionScoresIter =
-                 bucketData.s_PartitionScoreDocuments.begin();
-             partitionScoresIter != bucketData.s_PartitionScoreDocuments.end();
-             ++partitionScoresIter) {
-            TDocumentWeakPtr weakDoc = *partitionScoresIter;
-            TDocumentPtr docPtr = weakDoc.lock();
-            if (!docPtr) {
-                LOG_ERROR(<< "Inconsistent program state. JSON document unavailable.");
-                continue;
-            }
-
-            m_Writer.write(*docPtr);
-        }
-        m_Writer.EndArray();
-    }
-
     m_Writer.String(PROCESSING_TIME);
     m_Writer.Uint64(bucketProcessingTime);
 
@@ -816,24 +787,6 @@ void CJsonOutputWriter::addInfluencerFields(bool isBucketInfluencer,
     }
 }
 
-void CJsonOutputWriter::addPartitionScores(const CHierarchicalResultsWriter::TResults& results,
-                                           TDocumentWeakPtr weakDoc) {
-    TDocumentPtr docPtr = weakDoc.lock();
-    if (!docPtr) {
-        LOG_ERROR(<< "Inconsistent program state. JSON document unavailable.");
-        return;
-    }
-
-    m_Writer.addDoubleFieldToObj(PROBABILITY, results.s_Probability, *docPtr);
-    m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_NAME,
-                                     results.s_PartitionFieldName, *docPtr);
-    m_Writer.addStringFieldCopyToObj(PARTITION_FIELD_VALUE,
-                                     results.s_PartitionFieldValue, *docPtr, true);
-    m_Writer.addDoubleFieldToObj(INITIAL_RECORD_SCORE,
-                                 results.s_NormalizedAnomalyScore, *docPtr);
-    m_Writer.addDoubleFieldToObj(RECORD_SCORE, results.s_NormalizedAnomalyScore, *docPtr);
-}
-
 void CJsonOutputWriter::limitNumberRecords(size_t count) {
     m_RecordOutputLimit = count;
 }
diff --git a/lib/api/CResultNormalizer.cc b/lib/api/CResultNormalizer.cc
index a20f183c6d..1545e86e86 100644
--- a/lib/api/CResultNormalizer.cc
+++ b/lib/api/CResultNormalizer.cc
@@ -75,18 +75,8 @@ bool CResultNormalizer::handleRecord(const TStrStrUMap& dataRowFields) {
     std::string valueFieldName;
     double probability(0.0);
 
-    bool isValidRecord(false);
-    if (m_ModelConfig.perPartitionNormalization()) {
-        isValidRecord = parseDataFields(dataRowFields, level, partition, partitionValue,
-                                        person, function, valueFieldName, probability);
-    } else {
-        isValidRecord = parseDataFields(dataRowFields, level, partition, person,
-                                        function, valueFieldName, probability);
-    }
-
-    std::string partitionKey = m_ModelConfig.perPartitionNormalization()
-                                   ? partition + partitionValue
-                                   : partition;
+    bool isValidRecord = parseDataFields(dataRowFields, level, partition, person,
+                                         function, valueFieldName, probability);
 
     if (isValidRecord) {
         const model::CAnomalyScore::CNormalizer* levelNormalizer = nullptr;
@@ -96,10 +86,10 @@ bool CResultNormalizer::handleRecord(const TStrStrUMap& dataRowFields) {
         if (level == ROOT_LEVEL) {
             levelNormalizer = &m_Normalizer.bucketNormalizer();
         } else if (level == LEAF_LEVEL) {
-            levelNormalizer = m_Normalizer.leafNormalizer(partitionKey, person,
+            levelNormalizer = m_Normalizer.leafNormalizer(partition, person,
                                                           function, valueFieldName);
         } else if (level == PARTITION_LEVEL) {
-            levelNormalizer = m_Normalizer.partitionNormalizer(partitionKey);
+            levelNormalizer = m_Normalizer.partitionNormalizer(partition);
         } else if (level == BUCKET_INFLUENCER_LEVEL) {
             levelNormalizer = m_Normalizer.influencerBucketNormalizer(person);
         } else if (level == INFLUENCER_LEVEL) {
@@ -148,22 +138,5 @@ bool CResultNormalizer::parseDataFields(const TStrStrUMap& dataRowFields,
            this->parseDataField(dataRowFields, VALUE_FIELD_NAME, valueFieldName) &&
            this->parseDataField(dataRowFields, PROBABILITY_NAME, probability);
 }
-
-bool CResultNormalizer::parseDataFields(const TStrStrUMap& dataRowFields,
-                                        std::string& level,
-                                        std::string& partition,
-                                        std::string& partitionValue,
-                                        std::string& person,
-                                        std::string& function,
-                                        std::string& valueFieldName,
-                                        double& probability) {
-    return this->parseDataField(dataRowFields, LEVEL, level) &&
-           this->parseDataField(dataRowFields, PARTITION_FIELD_NAME, partition) &&
-           this->parseDataField(dataRowFields, PARTITION_FIELD_VALUE, partitionValue) &&
-           this->parseDataField(dataRowFields, PERSON_FIELD_NAME, person) &&
-           this->parseDataField(dataRowFields, FUNCTION_NAME, function) &&
-           this->parseDataField(dataRowFields, VALUE_FIELD_NAME, valueFieldName) &&
-           this->parseDataField(dataRowFields, PROBABILITY_NAME, probability);
-}
 }
 }
diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc
index 2dc3d67664..6dc129fb20 100644
--- a/lib/api/unittest/CJsonOutputWriterTest.cc
+++ b/lib/api/unittest/CJsonOutputWriterTest.cc
@@ -71,8 +71,6 @@ CppUnit::Test* CJsonOutputWriterTest::suite() {
     suiteOfTests->addTest(new CppUnit::TestCaller<CJsonOutputWriterTest>(
         "CJsonOutputWriterTest::testPersistNormalizer",
         &CJsonOutputWriterTest::testPersistNormalizer));
-    suiteOfTests->addTest(new CppUnit::TestCaller<CJsonOutputWriterTest>(
-        "CJsonOutputWriterTest::testPartitionScores", &CJsonOutputWriterTest::testPartitionScores));
     suiteOfTests->addTest(new CppUnit::TestCaller<CJsonOutputWriterTest>(
         "CJsonOutputWriterTest::testReportMemoryUsage",
         &CJsonOutputWriterTest::testReportMemoryUsage));
@@ -1548,78 +1546,6 @@ void CJsonOutputWriterTest::testPersistNormalizer() {
     CPPUNIT_ASSERT(quantileState.HasMember("timestamp"));
 }
 
-void CJsonOutputWriterTest::testPartitionScores() {
-    ml::model::CAnomalyDetectorModelConfig modelConfig =
-        ml::model::CAnomalyDetectorModelConfig::defaultConfig();
-
-    std::ostringstream sstream;
-    {
-        ml::core::CJsonOutputStreamWrapper outputStream(sstream);
-        ml::api::CJsonOutputWriter writer("job", outputStream);
-
-        std::string emptyString;
-        ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec influences;
-
-        std::string partitionFieldName("part1");
-
-        for (int i = 0; i < 4; ++i) {
-            // For the first iteration use an empty string for the value
-            std::string partitionFieldValue;
-            if (i > 0) {
-                partitionFieldValue = 'p' + ml::core::CStringUtils::typeToString(i);
-            }
-            ml::api::CHierarchicalResultsWriter::SResults result(
-                ml::api::CHierarchicalResultsWriter::E_PartitionResult,
-                partitionFieldName, partitionFieldValue, emptyString,
-                emptyString, emptyString, 1, emptyString, emptyString, 42.0, 79,
-                TDouble1Vec(1, 6953.0), TDouble1Vec(1, 10090.0), 0.0,
-                double(i), // normalised anomaly score
-                0.1, emptyString, influences, false, true, 1, 100, EMPTY_STRING_LIST);
-
-            writer.acceptResult(result);
-        }
-
-        writer.endOutputBatch(false, 1ul);
-    }
-
-    rapidjson::Document doc;
-    doc.Parse<rapidjson::kParseDefaultFlags>(sstream.str().c_str());
-
-    LOG_DEBUG(<< sstream.str());
-
-    const rapidjson::Value& bucketWrapper = doc[rapidjson::SizeType(0)];
-    CPPUNIT_ASSERT(bucketWrapper.HasMember("bucket"));
-    const rapidjson::Value& bucket = bucketWrapper["bucket"];
-    CPPUNIT_ASSERT(bucket.HasMember("partition_scores"));
-    const rapidjson::Value& partitionScores = bucket["partition_scores"];
-
-    CPPUNIT_ASSERT(partitionScores.IsArray());
-    CPPUNIT_ASSERT_EQUAL(rapidjson::SizeType(4), partitionScores.Size());
-
-    for (rapidjson::SizeType i = 0; i < partitionScores.Size(); ++i) {
-        const rapidjson::Value& pDoc = partitionScores[i];
-        CPPUNIT_ASSERT(pDoc.IsObject());
-        CPPUNIT_ASSERT(pDoc.HasMember("probability"));
-        CPPUNIT_ASSERT_DOUBLES_EQUAL(0.1, pDoc["probability"].GetDouble(), 0.01);
-        CPPUNIT_ASSERT(pDoc.HasMember("record_score"));
-        CPPUNIT_ASSERT_DOUBLES_EQUAL(double(i), pDoc["record_score"].GetDouble(), 0.01);
-        CPPUNIT_ASSERT(pDoc.HasMember("initial_record_score"));
-        CPPUNIT_ASSERT_DOUBLES_EQUAL(double(i),
-                                     pDoc["initial_record_score"].GetDouble(), 0.01);
-
-        CPPUNIT_ASSERT(pDoc.HasMember("partition_field_name"));
-        CPPUNIT_ASSERT_EQUAL(std::string("part1"),
-                             std::string(pDoc["partition_field_name"].GetString()));
-        std::string fieldValue;
-        if (i > 0) {
-            fieldValue = 'p' + ml::core::CStringUtils::typeToString(i);
-        }
-        CPPUNIT_ASSERT(pDoc.HasMember("partition_field_value"));
-        CPPUNIT_ASSERT_EQUAL(
-            fieldValue, std::string(pDoc["partition_field_value"].GetString()));
-    }
-}
-
 void CJsonOutputWriterTest::testReportMemoryUsage() {
     std::ostringstream sstream;
     {
diff --git a/lib/api/unittest/CJsonOutputWriterTest.h b/lib/api/unittest/CJsonOutputWriterTest.h
index 2d61a299ad..e4a513944e 100644
--- a/lib/api/unittest/CJsonOutputWriterTest.h
+++ b/lib/api/unittest/CJsonOutputWriterTest.h
@@ -22,7 +22,6 @@ class CJsonOutputWriterTest : public CppUnit::TestFixture {
     void testWriteInfluencers();
     void testWriteInfluencersWithLimit();
     void testPersistNormalizer();
-    void testPartitionScores();
     void testReportMemoryUsage();
     void testWriteScheduledEvent();
     void testThroughputWithScopedAllocator();
diff --git a/lib/model/CAnomalyDetectorModelConfig.cc b/lib/model/CAnomalyDetectorModelConfig.cc
index c93fdf6c8c..d7b8b09499 100644
--- a/lib/model/CAnomalyDetectorModelConfig.cc
+++ b/lib/model/CAnomalyDetectorModelConfig.cc
@@ -173,8 +173,7 @@ CAnomalyDetectorModelConfig::CAnomalyDetectorModelConfig()
       m_NoiseMultiplier(DEFAULT_NOISE_MULTIPLIER),
       m_NormalizedScoreKnotPoints(boost::begin(DEFAULT_NORMALIZED_SCORE_KNOT_POINTS),
                                   boost::end(DEFAULT_NORMALIZED_SCORE_KNOT_POINTS)),
-      m_PerPartitionNormalisation(false), m_DetectionRules(EMPTY_RULES_MAP),
-      m_ScheduledEvents(EMPTY_EVENTS) {
+      m_DetectionRules(EMPTY_RULES_MAP), m_ScheduledEvents(EMPTY_EVENTS) {
     for (std::size_t i = 0u; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) {
         for (std::size_t j = 0u; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) {
             m_AggregationStyleParams[i][j] = DEFAULT_AGGREGATION_STYLE_PARAMS[i][j];
@@ -732,14 +731,6 @@ CAnomalyDetectorModelConfig::normalizedScoreKnotPoints() const {
     return m_NormalizedScoreKnotPoints;
 }
 
-bool CAnomalyDetectorModelConfig::perPartitionNormalization() const {
-    return m_PerPartitionNormalisation;
-}
-
-void CAnomalyDetectorModelConfig::perPartitionNormalization(bool value) {
-    m_PerPartitionNormalisation = value;
-}
-
 void CAnomalyDetectorModelConfig::detectionRules(TIntDetectionRuleVecUMapCRef detectionRules) {
     m_DetectionRules = detectionRules;
 }
@@ -769,7 +760,6 @@ const std::string MAXIMUM_ANOMALOUS_PROBABILITY_PROPERTY("maximumanomalousprobab
 const std::string NOISE_PERCENTILE_PROPERTY("noisepercentile");
 const std::string NOISE_MULTIPLIER_PROPERTY("noisemultiplier");
 const std::string NORMALIZED_SCORE_KNOT_POINTS("normalizedscoreknotpoints");
-const std::string PER_PARTITION_NORMALIZATION_PROPERTY("perPartitionNormalization");
 }
 
 bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptree& propertyTree) {
@@ -1007,8 +997,6 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre
             }
             points.emplace_back(100.0, 100.0);
             this->normalizedScoreKnotPoints(points);
-        } else if (propName == PER_PARTITION_NORMALIZATION_PROPERTY) {
-
         } else {
             LOG_WARN(<< "Ignoring unknown property " << propName);
         }
diff --git a/lib/model/CHierarchicalResultsNormalizer.cc b/lib/model/CHierarchicalResultsNormalizer.cc
index 13cffb0e25..4cf620baa6 100644
--- a/lib/model/CHierarchicalResultsNormalizer.cc
+++ b/lib/model/CHierarchicalResultsNormalizer.cc
@@ -110,8 +110,7 @@ void CHierarchicalResultsNormalizer::visit(const CHierarchicalResults& /*results
                                            bool pivot) {
     CNormalizerFactory factory(m_ModelConfig);
     TNormalizerPtrVec normalizers;
-    this->elements(node, pivot, factory, normalizers,
-                   m_ModelConfig.perPartitionNormalization());
+    this->elements(node, pivot, factory, normalizers);
 
     if (normalizers.empty()) {
         return;
diff --git a/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc b/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc
index 1534258147..e1de2a01a9 100644
--- a/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc
+++ b/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc
@@ -116,7 +116,6 @@ void CAnomalyDetectorModelConfigTest::testNormal() {
         CPPUNIT_ASSERT_EQUAL(
             std::string("[(0, 0), (70, 1.5), (85, 1.6), (90, 1.7), (95, 2), (97, 10), (98, 20), (99.5, 50), (100, 100)]"),
             core::CContainerPrinter::print(config.normalizedScoreKnotPoints()));
-        CPPUNIT_ASSERT_EQUAL(false, config.perPartitionNormalization());
     }
     {
         CAnomalyDetectorModelConfig config = CAnomalyDetectorModelConfig::defaultConfig();
@@ -140,7 +139,6 @@ void CAnomalyDetectorModelConfigTest::testNormal() {
             config.factory(1, function_t::E_PopulationRare).get()));
         CPPUNIT_ASSERT(dynamic_cast<const CCountingModelFactory*>(
             config.factory(CSearchKey::simpleCountKey()).get()));
-        CPPUNIT_ASSERT_EQUAL(false, config.perPartitionNormalization());
     }
 }
 

From 2bc03bdb4c3a102ef449587b88e8e74a6823c62e Mon Sep 17 00:00:00 2001
From: Ed Savage <ed.savage@elastic.co>
Date: Fri, 17 Aug 2018 10:21:50 +0100
Subject: [PATCH 2/2] Further cleanup of perPartitionNormaliztion

---
 include/model/CHierarchicalResultsLevelSet.h  | 31 ++++++--------
 .../CHierarchicalResultsLevelSetTest.cc       | 42 ++++---------------
 .../CHierarchicalResultsLevelSetTest.h        |  2 +-
 3 files changed, 22 insertions(+), 53 deletions(-)

diff --git a/include/model/CHierarchicalResultsLevelSet.h b/include/model/CHierarchicalResultsLevelSet.h
index ea7b99f168..5b9ff72f50 100644
--- a/include/model/CHierarchicalResultsLevelSet.h
+++ b/include/model/CHierarchicalResultsLevelSet.h
@@ -162,11 +162,7 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor {
 
     //! Get and possibly add a normalizer for \p node.
     template<typename FACTORY>
-    void elements(const TNode& node,
-                  bool pivot,
-                  const FACTORY& factory,
-                  TTypePtrVec& result,
-                  bool distinctLeavesPerPartition = false) {
+    void elements(const TNode& node, bool pivot, const FACTORY& factory, TTypePtrVec& result) {
         result.clear();
         if (this->isSimpleCount(node)) {
             return;
@@ -193,39 +189,38 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor {
             return;
         }
 
-        std::string partitionKey = distinctLeavesPerPartition
-                                       ? *node.s_Spec.s_PartitionFieldName +
-                                             *node.s_Spec.s_PartitionFieldValue
-                                       : *node.s_Spec.s_PartitionFieldName;
-
         if (this->isLeaf(node)) {
-            TWord word = ms_Dictionary.word(partitionKey, *node.s_Spec.s_PersonFieldName,
-                                            *node.s_Spec.s_FunctionName,
-                                            *node.s_Spec.s_ValueFieldName);
+            TWord word = ms_Dictionary.word(
+                *node.s_Spec.s_PartitionFieldName, *node.s_Spec.s_PersonFieldName,
+                *node.s_Spec.s_FunctionName, *node.s_Spec.s_ValueFieldName);
             TWordTypePrVecItr i = element(m_LeafSet, word);
             if (i == m_LeafSet.end() || i->first != word) {
                 i = m_LeafSet.insert(
-                    i, TWordTypePr(word, factory.make(partitionKey, *node.s_Spec.s_PersonFieldName,
+                    i, TWordTypePr(word, factory.make(*node.s_Spec.s_PartitionFieldName,
+                                                      *node.s_Spec.s_PersonFieldName,
                                                       *node.s_Spec.s_FunctionName,
                                                       *node.s_Spec.s_ValueFieldName)));
             }
             result.push_back(&i->second);
         }
         if (this->isPerson(node)) {
-            TWord word = ms_Dictionary.word(partitionKey, *node.s_Spec.s_PersonFieldName);
+            TWord word = ms_Dictionary.word(*node.s_Spec.s_PartitionFieldName,
+                                            *node.s_Spec.s_PersonFieldName);
             TWordTypePrVecItr i = element(m_PersonSet, word);
             if (i == m_PersonSet.end() || i->first != word) {
                 i = m_PersonSet.insert(
-                    i, TWordTypePr(word, factory.make(partitionKey, *node.s_Spec.s_PersonFieldName)));
+                    i, TWordTypePr(word, factory.make(*node.s_Spec.s_PartitionFieldName,
+                                                      *node.s_Spec.s_PersonFieldName)));
             }
             result.push_back(&i->second);
         }
         if (this->isPartition(node)) {
-            TWord word = ms_Dictionary.word(partitionKey);
+            TWord word = ms_Dictionary.word(*node.s_Spec.s_PartitionFieldName);
 
             TWordTypePrVecItr i = element(m_PartitionSet, word);
             if (i == m_PartitionSet.end() || i->first != word) {
-                i = m_PartitionSet.insert(i, TWordTypePr(word, factory.make(partitionKey)));
+                i = m_PartitionSet.insert(
+                    i, TWordTypePr(word, factory.make(*node.s_Spec.s_PartitionFieldName)));
             }
             result.push_back(&i->second);
         }
diff --git a/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc b/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc
index 766089ff48..0478612433 100644
--- a/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc
+++ b/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc
@@ -17,8 +17,8 @@ CppUnit::Test* CHierarchicalResultsLevelSetTest::suite() {
     CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CHierarchicalResultsLevelSetTest");
 
     suiteOfTests->addTest(new CppUnit::TestCaller<CHierarchicalResultsLevelSetTest>(
-        "CHierarchicalResultsLevelSetTest::testElementsWithPerPartitionNormalisation",
-        &CHierarchicalResultsLevelSetTest::testElementsWithPerPartitionNormalisation));
+        "CHierarchicalResultsLevelSetTest::testElements",
+        &CHierarchicalResultsLevelSetTest::testElements));
 
     return suiteOfTests;
 }
@@ -66,7 +66,7 @@ void print(const TestNode* node) {
     std::cout << "'" << node->s_Name << "'" << std::endl;
 }
 
-void CHierarchicalResultsLevelSetTest::testElementsWithPerPartitionNormalisation() {
+void CHierarchicalResultsLevelSetTest::testElements() {
     // This is intentionally NOT an empty string from the string store, but
     // instead a completely separate empty string, such that its pointer will be
     // different to other empty string pointers.  (In general, if you need
@@ -102,35 +102,9 @@ void CHierarchicalResultsLevelSetTest::testElementsWithPerPartitionNormalisation
 
     std::vector<TestNode*> result;
 
-    // without per partition normalization
-    {
-        CConcreteHierarchicalResultsLevelSet levelSet(root);
-        levelSet.elements(node, false, CTestNodeFactory(), result, false);
-        std::for_each(result.begin(), result.end(), print);
-        CPPUNIT_ASSERT_EQUAL(size_t(1), result.size());
-        CPPUNIT_ASSERT_EQUAL(std::string("pA"), result[0]->s_Name);
-    }
-
-    // with per partition normalization
-    {
-        CConcreteHierarchicalResultsLevelSet levelSet(root);
-        levelSet.elements(node, false, CTestNodeFactory(), result, true);
-
-        CPPUNIT_ASSERT_EQUAL(size_t(1), result.size());
-        CPPUNIT_ASSERT_EQUAL(std::string("pAv1"), result[0]->s_Name);
-
-        ml::model::hierarchical_results_detail::SResultSpec specB;
-        specB.s_PartitionFieldName = PARTITION_B;
-        specB.s_PartitionFieldValue = PARTITION_VALUE_1;
-
-        CConcreteHierarchicalResultsLevelSet::TNode nodeB(specB, emptyAnnotatedProb);
-        nodeB.s_Parent = &parent;
-        nodeB.s_Children.push_back(&child);
-
-        levelSet.elements(nodeB, false, CTestNodeFactory(), result, true);
-
-        std::for_each(result.begin(), result.end(), print);
-        CPPUNIT_ASSERT_EQUAL(size_t(1), result.size());
-        CPPUNIT_ASSERT_EQUAL(std::string("pBv1"), result[0]->s_Name);
-    }
+    CConcreteHierarchicalResultsLevelSet levelSet(root);
+    levelSet.elements(node, false, CTestNodeFactory(), result);
+    std::for_each(result.begin(), result.end(), print);
+    CPPUNIT_ASSERT_EQUAL(size_t(1), result.size());
+    CPPUNIT_ASSERT_EQUAL(std::string("pA"), result[0]->s_Name);
 }
diff --git a/lib/model/unittest/CHierarchicalResultsLevelSetTest.h b/lib/model/unittest/CHierarchicalResultsLevelSetTest.h
index e710fc4828..ab446be374 100644
--- a/lib/model/unittest/CHierarchicalResultsLevelSetTest.h
+++ b/lib/model/unittest/CHierarchicalResultsLevelSetTest.h
@@ -10,7 +10,7 @@
 
 class CHierarchicalResultsLevelSetTest : public CppUnit::TestFixture {
 public:
-    void testElementsWithPerPartitionNormalisation();
+    void testElements();
 
     static CppUnit::Test* suite();
 };