Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
* Reduce the peak memory used by boosted tree training and fix an overcounting bug
estimating maximum memory usage. (See {ml-pull}781[#781].)
* Stratified fractional cross validation for regression. (See {ml-pull}784[#784].)
* Added `geo_point` supported output for `lat_long` function records. (See {ml-pull}809[#809], {pull}47050[#47050].)

== {es} version 7.5.0

Expand Down
40 changes: 40 additions & 0 deletions lib/api/CJsonOutputWriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <model/CHierarchicalResultsNormalizer.h>
#include <model/ModelTypes.h>

#include <api/CFieldConfig.h>
#include <api/CModelSizeStatsJsonWriter.h>
#include <api/CModelSnapshotJsonWriter.h>

Expand Down Expand Up @@ -73,6 +74,9 @@ const std::string PROCESSING_TIME("processing_time_ms");
const std::string TIME_INFLUENCER("bucket_time");
const std::string SCHEDULED_EVENTS("scheduled_events");
const std::string QUANTILES("quantiles");
const std::string GEO_RESULTS("geo_results");
const std::string ACTUAL_POINT("actual_point");
const std::string TYPICAL_POINT("typical_point");

//! Get a numeric field from a JSON document.
//! Assumes the document contains the field.
Expand Down Expand Up @@ -553,6 +557,24 @@ void CJsonOutputWriter::addMetricFields(const CHierarchicalResultsWriter::TResul
results.s_FunctionDescription, *docPtr);
m_Writer.addDoubleArrayFieldToObj(TYPICAL, results.s_BaselineMean, *docPtr);
m_Writer.addDoubleArrayFieldToObj(ACTUAL, results.s_CurrentMean, *docPtr);
if (results.s_FunctionName == CFieldConfig::FUNCTION_LAT_LONG) {
rapidjson::Value geoResults = m_Writer.makeObject();
auto geoPointToString = [](const auto& point) -> std::string {
std::ostringstream result;
// We don't want scientific notation and geo points only have precision up to 12 digits
result << std::fixed << std::setprecision(12) << point[0] << "," << point[1];
return result.str();
};
if (results.s_BaselineMean.size() == 2) {
m_Writer.addStringFieldCopyToObj(
TYPICAL_POINT, geoPointToString(results.s_BaselineMean), geoResults);
}
if (results.s_CurrentMean.size() == 2) {
m_Writer.addStringFieldCopyToObj(
ACTUAL_POINT, geoPointToString(results.s_CurrentMean), geoResults);
}
m_Writer.addMember(GEO_RESULTS, geoResults, *docPtr);
}
}

void CJsonOutputWriter::addPopulationFields(const CHierarchicalResultsWriter::TResults& results,
Expand Down Expand Up @@ -653,6 +675,24 @@ void CJsonOutputWriter::addPopulationCauseFields(const CHierarchicalResultsWrite
results.s_FunctionDescription, *docPtr);
m_Writer.addDoubleArrayFieldToObj(TYPICAL, results.s_PopulationAverage, *docPtr);
m_Writer.addDoubleArrayFieldToObj(ACTUAL, results.s_FunctionValue, *docPtr);
if (results.s_FunctionName == CFieldConfig::FUNCTION_LAT_LONG) {
rapidjson::Value geoResults = m_Writer.makeObject();
auto geoPointToString = [](const auto& point) -> std::string {
std::ostringstream result;
// We don't want scientific notation and geo points only have precision up to 12 digits
result << std::fixed << std::setprecision(12) << point[0] << "," << point[1];
return result.str();
};
if (results.s_BaselineMean.size() == 2) {
m_Writer.addStringFieldCopyToObj(
TYPICAL_POINT, geoPointToString(results.s_PopulationAverage), geoResults);
}
if (results.s_FunctionValue.size() == 2) {
m_Writer.addStringFieldCopyToObj(
ACTUAL_POINT, geoPointToString(results.s_FunctionValue), geoResults);
}
m_Writer.addMember(GEO_RESULTS, geoResults, *docPtr);
}
}

void CJsonOutputWriter::addInfluences(const CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec& influenceResults,
Expand Down
140 changes: 140 additions & 0 deletions lib/api/unittest/CJsonOutputWriterTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1056,6 +1056,146 @@ BOOST_AUTO_TEST_CASE(testSimpleWrite) {
std::string(object2["field_name"].GetString()));
}

BOOST_AUTO_TEST_CASE(testGeoResultsWrite) {
ml::api::CJsonOutputWriter::TStrStrUMap emptyFields;

std::string partitionFieldName("tfn");
std::string partitionFieldValue("");
std::string overFieldName("ofn");
std::string overFieldValue("ofv");
std::string byFieldName("airline");
std::string byFieldValue("GAL");
std::string correlatedByFieldValue("BAW");
std::string fieldName("location");
std::string function("lat_long");
std::string functionDescription("lat_long(location)");
ml::api::CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec influences;
std::string emptyString;
// The output writer won't close the JSON structures until is is destroyed
{
std::ostringstream sstream;
{
ml::core::CJsonOutputStreamWrapper outputStream(sstream);
ml::api::CJsonOutputWriter writer("job", outputStream);
TDouble1Vec actual(2, 0.0);
actual[0] = 40.0;
actual[1] = -40.0;
TDouble1Vec typical(2, 0.0);
typical[0] = 90.0;
typical[1] = -90.0;
ml::api::CHierarchicalResultsWriter::SResults result(
ml::api::CHierarchicalResultsWriter::E_Result,
partitionFieldName, partitionFieldValue, byFieldName,
byFieldValue, correlatedByFieldValue, 1, function,
functionDescription, 2.24, 79, typical, actual, 10.0, 10.0, 0.5,
0.0, fieldName, influences, false, true, 1, 1, EMPTY_STRING_LIST);
BOOST_TEST_REQUIRE(writer.acceptResult(result));
BOOST_TEST_REQUIRE(writer.endOutputBatch(false, 1U));
}
rapidjson::Document arrayDoc;
arrayDoc.Parse<rapidjson::kParseDefaultFlags>(sstream.str().c_str());
// Debug print record
{
rapidjson::StringBuffer strbuf;
using TStringBufferPrettyWriter = rapidjson::PrettyWriter<rapidjson::StringBuffer>;
TStringBufferPrettyWriter writer(strbuf);
arrayDoc.Accept(writer);
LOG_DEBUG(<< "Results:\n" << strbuf.GetString());
}
BOOST_TEST_REQUIRE(arrayDoc.IsArray());
BOOST_REQUIRE_EQUAL(rapidjson::SizeType(2), arrayDoc.Size());
BOOST_TEST_REQUIRE(arrayDoc[rapidjson::SizeType(0)].HasMember("records"));
const rapidjson::Value& record =
arrayDoc[rapidjson::SizeType(0)]["records"][rapidjson::SizeType(0)];

BOOST_TEST_REQUIRE(record.HasMember("typical"));
BOOST_TEST_REQUIRE(record.HasMember("actual"));
BOOST_TEST_REQUIRE(record.HasMember("geo_results"));
auto geoResultsObject = record["geo_results"].GetObject();
BOOST_TEST_REQUIRE(geoResultsObject.HasMember("actual_point"));
BOOST_REQUIRE_EQUAL(std::string("40.000000000000,-40.000000000000"),
(geoResultsObject["actual_point"].GetString()));
BOOST_TEST_REQUIRE(geoResultsObject.HasMember("typical_point"));
BOOST_REQUIRE_EQUAL(std::string("90.000000000000,-90.000000000000"),
(geoResultsObject["typical_point"].GetString()));
}

{
std::ostringstream sstream;
{
ml::core::CJsonOutputStreamWrapper outputStream(sstream);
ml::api::CJsonOutputWriter writer("job", outputStream);
TDouble1Vec actual(1, 500);
TDouble1Vec typical(1, 64);
ml::api::CHierarchicalResultsWriter::SResults result(
ml::api::CHierarchicalResultsWriter::E_Result,
partitionFieldName, partitionFieldValue, byFieldName,
byFieldValue, correlatedByFieldValue, 1, function,
functionDescription, 2.24, 79, typical, actual, 10.0, 10.0, 0.5,
0.0, fieldName, influences, false, true, 1, 1, EMPTY_STRING_LIST);
BOOST_TEST_REQUIRE(writer.acceptResult(result));
BOOST_TEST_REQUIRE(writer.endOutputBatch(false, 1U));
}
rapidjson::Document arrayDoc;
arrayDoc.Parse<rapidjson::kParseDefaultFlags>(sstream.str().c_str());
// Debug print record
{
rapidjson::StringBuffer strbuf;
using TStringBufferPrettyWriter = rapidjson::PrettyWriter<rapidjson::StringBuffer>;
TStringBufferPrettyWriter writer(strbuf);
arrayDoc.Accept(writer);
LOG_DEBUG(<< "Results:\n" << strbuf.GetString());
}
BOOST_TEST_REQUIRE(arrayDoc.IsArray());
BOOST_REQUIRE_EQUAL(rapidjson::SizeType(2), arrayDoc.Size());
BOOST_TEST_REQUIRE(arrayDoc[rapidjson::SizeType(0)].HasMember("records"));
const rapidjson::Value& record =
arrayDoc[rapidjson::SizeType(0)]["records"][rapidjson::SizeType(0)];

BOOST_TEST_REQUIRE(record.IsObject());
BOOST_TEST_REQUIRE(record.HasMember("geo_results"));
auto geoResultsObject = record["geo_results"].GetObject();
BOOST_TEST_REQUIRE(!geoResultsObject.HasMember("actual_point"));
BOOST_TEST_REQUIRE(!geoResultsObject.HasMember("typical_point"));
}

{
std::ostringstream sstream;
{
ml::core::CJsonOutputStreamWrapper outputStream(sstream);
ml::api::CJsonOutputWriter writer("job", outputStream);
TDouble1Vec actual(1, 500);
TDouble1Vec typical(1, 64);
ml::api::CHierarchicalResultsWriter::SResults result(
ml::api::CHierarchicalResultsWriter::E_Result,
partitionFieldName, partitionFieldValue, byFieldName,
byFieldValue, correlatedByFieldValue, 1, "mean",
functionDescription, 2.24, 79, typical, actual, 10.0, 10.0, 0.5,
0.0, fieldName, influences, false, true, 1, 1, EMPTY_STRING_LIST);
BOOST_TEST_REQUIRE(writer.acceptResult(result));
BOOST_TEST_REQUIRE(writer.endOutputBatch(false, 1U));
}
rapidjson::Document arrayDoc;
arrayDoc.Parse<rapidjson::kParseDefaultFlags>(sstream.str().c_str());
// Debug print record
{
rapidjson::StringBuffer strbuf;
using TStringBufferPrettyWriter = rapidjson::PrettyWriter<rapidjson::StringBuffer>;
TStringBufferPrettyWriter writer(strbuf);
arrayDoc.Accept(writer);
LOG_DEBUG(<< "Results:\n" << strbuf.GetString());
}
BOOST_TEST_REQUIRE(arrayDoc.IsArray());
BOOST_REQUIRE_EQUAL(rapidjson::SizeType(2), arrayDoc.Size());
BOOST_TEST_REQUIRE(arrayDoc[rapidjson::SizeType(0)].HasMember("records"));
const rapidjson::Value& record =
arrayDoc[rapidjson::SizeType(0)]["records"][rapidjson::SizeType(0)];

BOOST_TEST_REQUIRE(record.IsObject());
BOOST_REQUIRE_EQUAL(false, record.HasMember("geo_results"));
}
}

BOOST_AUTO_TEST_CASE(testWriteNonAnomalousBucket) {
std::ostringstream sstream;

Expand Down