[DOCS] Fixes code snippet testing for machine learning (#31189)

elastic · Jun 19, 2018 · 5971eb8 · 5971eb8
1 parent b44e1c1
commit 5971eb8
Show file tree

Hide file tree

Showing 17 changed files with 179 additions and 74 deletions.
diff --git a/x-pack/docs/build.gradle b/x-pack/docs/build.gradle
@@ -9,13 +9,6 @@ apply plugin: 'elasticsearch.docs-test'
  * only remove entries from this list. When it is empty we'll remove it
  * entirely and have a party! There will be cake and everything.... */
 buildRestTests.expectedUnconvertedCandidates = [
-        'en/ml/functions/count.asciidoc',
-        'en/ml/functions/geo.asciidoc',
-        'en/ml/functions/info.asciidoc',
-        'en/ml/functions/metric.asciidoc',
-        'en/ml/functions/rare.asciidoc',
-        'en/ml/functions/sum.asciidoc',
-        'en/ml/functions/time.asciidoc',
         'en/rest-api/watcher/put-watch.asciidoc',
         'en/security/authentication/user-cache.asciidoc',
         'en/security/authorization/field-and-document-access-control.asciidoc',
@@ -56,7 +49,6 @@ buildRestTests.expectedUnconvertedCandidates = [
         'en/watcher/troubleshooting.asciidoc',
         'en/rest-api/license/delete-license.asciidoc',
         'en/rest-api/license/update-license.asciidoc',
-        'en/ml/api-quickref.asciidoc',
         'en/rest-api/ml/delete-snapshot.asciidoc',
         'en/rest-api/ml/forecast.asciidoc',
         'en/rest-api/ml/get-bucket.asciidoc',

diff --git a/x-pack/docs/en/ml/aggregations.asciidoc b/x-pack/docs/en/ml/aggregations.asciidoc
@@ -1,5 +1,6 @@
+[role="xpack"]
 [[ml-configuring-aggregation]]
-=== Aggregating Data For Faster Performance
+=== Aggregating data for faster performance
 
 By default, {dfeeds} fetch data from {es} using search and scroll requests.
 It can be significantly more efficient, however, to aggregate data in {es}

diff --git a/x-pack/docs/en/ml/api-quickref.asciidoc b/x-pack/docs/en/ml/api-quickref.asciidoc
@@ -1,12 +1,14 @@
+[role="xpack"]
 [[ml-api-quickref]]
-== API Quick Reference
+== API quick reference
 
 All {ml} endpoints have the following base:
 
 [source,js]
 ----
 /_xpack/ml/
 ----
+// NOTCONSOLE
 
 The main {ml} resources can be accessed with a variety of endpoints:
 

diff --git a/x-pack/docs/en/ml/categories.asciidoc b/x-pack/docs/en/ml/categories.asciidoc
@@ -1,3 +1,4 @@
+[role="xpack"]
 [[ml-configuring-categories]]
 === Categorizing log messages
 
@@ -77,7 +78,7 @@ NOTE: To add the `categorization_examples_limit` property, you must use the
 
 [float]
 [[ml-configuring-analyzer]]
-==== Customizing the Categorization Analyzer
+==== Customizing the categorization analyzer
 
 Categorization uses English dictionary words to identify log message categories.
 By default, it also uses English tokenization rules. For this reason, if you use
@@ -213,7 +214,7 @@ API examples above.
 
 [float]
 [[ml-viewing-categories]]
-==== Viewing Categorization Results
+==== Viewing categorization results
 
 After you open the job and start the {dfeed} or supply data to the job, you can
 view the categorization results in {kib}. For example:

diff --git a/x-pack/docs/en/ml/configuring.asciidoc b/x-pack/docs/en/ml/configuring.asciidoc
@@ -1,5 +1,6 @@
+[role="xpack"]
 [[ml-configuring]]
-== Configuring Machine Learning
+== Configuring machine learning
 
 If you want to use {xpackml} features, there must be at least one {ml} node in
 your cluster and all master-eligible nodes must have {ml} enabled. By default,

diff --git a/x-pack/docs/en/ml/customurl.asciidoc b/x-pack/docs/en/ml/customurl.asciidoc
@@ -48,7 +48,7 @@ using the {ml} APIs.
 
 [float]
 [[ml-configuring-url-strings]]
-==== String Substitution in Custom URLs
+==== String substitution in custom URLs
 
 You can use dollar sign ($) delimited tokens in a custom URL. These tokens are
 substituted for the values of the corresponding fields in the anomaly records.

diff --git a/x-pack/docs/en/ml/functions.asciidoc b/x-pack/docs/en/ml/functions.asciidoc
@@ -1,5 +1,6 @@
+[role="xpack"]
 [[ml-functions]]
-== Function Reference
+== Function reference
 
 The {xpackml} features include analysis functions that provide a wide variety of
 flexible ways to analyze data for anomalies.

diff --git a/x-pack/docs/en/ml/functions/count.asciidoc b/x-pack/docs/en/ml/functions/count.asciidoc
@@ -1,5 +1,6 @@
+[role="xpack"]
 [[ml-count-functions]]
-=== Count Functions
+=== Count functions
 
 Count functions detect anomalies when the number of events in a bucket is
 anomalous.
@@ -21,7 +22,7 @@ The {xpackml} features include the following count functions:
 
 [float]
 [[ml-count]]
-===== Count, High_count, Low_count
+===== Count, high_count, low_count
 
 The `count` function detects anomalies when the number of events in a bucket is
 anomalous.
@@ -44,8 +45,20 @@ see {ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects]
 .Example 1: Analyzing events with the count function
 [source,js]
 --------------------------------------------------
-{ "function" : "count" }
+PUT _xpack/ml/anomaly_detectors/example1
+{
+  "analysis_config": {
+    "detectors": [{
+      "function" : "count"
+    }]
+  },
+  "data_description": {
+    "time_field":"timestamp",
+    "time_format": "epoch_ms"
+  }
+}
 --------------------------------------------------
+// CONSOLE
 
 This example is probably the simplest possible analysis. It identifies
 time buckets during which the overall count of events is higher or lower than
@@ -57,12 +70,22 @@ and detects when the event rate is unusual compared to its past behavior.
 .Example 2: Analyzing errors with the high_count function
 [source,js]
 --------------------------------------------------
+PUT _xpack/ml/anomaly_detectors/example2
 {
-  "function" : "high_count",
-  "by_field_name" : "error_code",
-  "over_field_name": "user"
+  "analysis_config": {
+    "detectors": [{
+      "function" : "high_count",
+      "by_field_name" : "error_code",
+      "over_field_name": "user"
+    }]
+  },
+  "data_description": {
+    "time_field":"timestamp",
+    "time_format": "epoch_ms"
+  }
 }
 --------------------------------------------------
+// CONSOLE
 
 If you use this `high_count` function in a detector in your job, it
 models the event rate for each error code. It detects users that generate an
@@ -72,11 +95,21 @@ unusually high count of error codes compared to other users.
 .Example 3: Analyzing status codes with the low_count function
 [source,js]
 --------------------------------------------------
+PUT _xpack/ml/anomaly_detectors/example3
 {
-  "function" : "low_count",
-  "by_field_name" : "status_code"
+  "analysis_config": {
+    "detectors": [{
+      "function" : "low_count",
+      "by_field_name" : "status_code"
+    }]
+  },
+  "data_description": {
+    "time_field":"timestamp",
+    "time_format": "epoch_ms"
+  }
 }
 --------------------------------------------------
+// CONSOLE
 
 In this example, the function detects when the count of events for a
 status code is lower than usual.
@@ -88,22 +121,30 @@ compared to its past behavior.
 .Example 4: Analyzing aggregated data with the count function
 [source,js]
 --------------------------------------------------
+PUT _xpack/ml/anomaly_detectors/example4
 {
-  "summary_count_field_name" : "events_per_min",
-  "detectors" [
-      { "function" : "count" }
-   ]
-}
+  "analysis_config": {
+    "summary_count_field_name" : "events_per_min",
+    "detectors": [{
+      "function" : "count"
+    }]
+  },
+  "data_description": {
+    "time_field":"timestamp",
+    "time_format": "epoch_ms"
+  }
+}  
 --------------------------------------------------
+// CONSOLE
 
 If you are analyzing an aggregated `events_per_min` field, do not use a sum
 function (for example, `sum(events_per_min)`). Instead, use the count function
-and the `summary_count_field_name` property.
-//TO-DO: For more information, see <<aggreggations.asciidoc>>.
+and the `summary_count_field_name` property. For more information, see 
+<<ml-configuring-aggregation>>.
 
 [float]
 [[ml-nonzero-count]]
-===== Non_zero_count, High_non_zero_count, Low_non_zero_count
+===== Non_zero_count, high_non_zero_count, low_non_zero_count
 
 The `non_zero_count` function detects anomalies when the number of events in a
 bucket is anomalous, but it ignores cases where the bucket count is zero. Use
@@ -144,11 +185,21 @@ The `non_zero_count` function models only the following data:
 .Example 5: Analyzing signatures with the high_non_zero_count function
 [source,js]
 --------------------------------------------------
+PUT _xpack/ml/anomaly_detectors/example5
 {
-  "function" : "high_non_zero_count",
-  "by_field_name" : "signaturename"
+  "analysis_config": {
+    "detectors": [{
+      "function" : "high_non_zero_count",
+      "by_field_name" : "signaturename"
+    }]
+  },
+  "data_description": {
+    "time_field":"timestamp",
+    "time_format": "epoch_ms"
+  }
 }
 --------------------------------------------------
+// CONSOLE
 
 If you use this `high_non_zero_count` function in a detector in your job, it
 models the count of events for the `signaturename` field. It ignores any buckets
@@ -163,7 +214,7 @@ data is sparse, use the `count` functions, which are optimized for that scenario
 
 [float]
 [[ml-distinct-count]]
-===== Distinct_count, High_distinct_count, Low_distinct_count
+===== Distinct_count, high_distinct_count, low_distinct_count
 
 The `distinct_count` function detects anomalies where the number of distinct
 values in one field is unusual.
@@ -187,11 +238,21 @@ see {ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects]
 .Example 6: Analyzing users with the distinct_count function
 [source,js]
 --------------------------------------------------
+PUT _xpack/ml/anomaly_detectors/example6
 {
-  "function" : "distinct_count",
-  "field_name" : "user"
+  "analysis_config": {
+    "detectors": [{
+      "function" : "distinct_count",
+      "field_name" : "user"
+    }]
+  },
+  "data_description": {
+    "time_field":"timestamp",
+    "time_format": "epoch_ms"
+  }
 }
 --------------------------------------------------
+// CONSOLE
 
 This `distinct_count` function detects when a system has an unusual number
 of logged in users. When you use this function in a detector in your job, it
@@ -201,12 +262,22 @@ users is unusual compared to the past.
 .Example 7: Analyzing ports with the high_distinct_count function
 [source,js]
 --------------------------------------------------
+PUT _xpack/ml/anomaly_detectors/example7
 {
-  "function" : "high_distinct_count",
-  "field_name" : "dst_port",
-  "over_field_name": "src_ip"
+  "analysis_config": {
+    "detectors": [{
+      "function" : "high_distinct_count",
+      "field_name" : "dst_port",
+      "over_field_name": "src_ip"
+    }]
+  },
+  "data_description": {
+    "time_field":"timestamp",
+    "time_format": "epoch_ms"
+  }
 }
 --------------------------------------------------
+// CONSOLE
 
 This example detects instances of port scanning. When you use this function in a
 detector in your job, it models the distinct count of ports. It also detects the

diff --git a/x-pack/docs/en/ml/functions/geo.asciidoc b/x-pack/docs/en/ml/functions/geo.asciidoc
@@ -1,5 +1,6 @@
+[role="xpack"]
 [[ml-geo-functions]]
-=== Geographic Functions
+=== Geographic functions
 
 The geographic functions detect anomalies in the geographic location of the
 input data.
@@ -28,12 +29,22 @@ see {ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects]
 .Example 1: Analyzing transactions with the lat_long function
 [source,js]
 --------------------------------------------------
+PUT _xpack/ml/anomaly_detectors/example1
 {
-  "function" : "lat_long",
-  "field_name" : "transactionCoordinates",
-  "by_field_name" : "creditCardNumber"
+  "analysis_config": {
+    "detectors": [{
+      "function" : "lat_long",
+      "field_name" : "transactionCoordinates",
+      "by_field_name" : "creditCardNumber"
+    }]
+  },
+  "data_description": {
+    "time_field":"timestamp",
+    "time_format": "epoch_ms"
+  }
 }
 --------------------------------------------------
+// CONSOLE
 
 If you use this `lat_long` function in a detector in your job, it
 detects anomalies where the geographic location of a credit card transaction is
@@ -54,6 +65,7 @@ For example, JSON data might contain the following transaction coordinates:
   "creditCardNumber": "1234123412341234"
 }
 --------------------------------------------------
+// NOTCONSOLE
 
 In {es}, location data is likely to be stored in `geo_point` fields. For more
 information, see {ref}/geo-point.html[Geo-point datatype]. This data type is not
@@ -64,7 +76,15 @@ format. For example, the following Painless script transforms
 
 [source,js]
 --------------------------------------------------
+PUT _xpack/ml/datafeeds/datafeed-test2
 {
+  "job_id": "farequote",
+  "indices": ["farequote"],
+  "query": {
+    "match_all": {
+          "boost": 1
+    }
+  },
   "script_fields": {
     "lat-lon": {
       "script": {
@@ -75,5 +95,7 @@ format. For example, the following Painless script transforms
   }
 }
 --------------------------------------------------
+// CONSOLE
+// TEST[setup:farequote_job]
 
 For more information, see <<ml-configuring-transform>>.
diff --git a/x-pack/docs/en/ml/functions/info.asciidoc b/x-pack/docs/en/ml/functions/info.asciidoc
@@ -40,6 +40,7 @@ For more information about those properties, see
   "over_field_name" : "highest_registered_domain"
 }
 --------------------------------------------------
+// NOTCONSOLE
 
 If you use this `info_content` function in a detector in your job, it models
 information that is present in the `subdomain` string. It detects anomalies
@@ -60,6 +61,7 @@ choice.
   "over_field_name" : "src_ip"
 }
 --------------------------------------------------
+// NOTCONSOLE
 
 If you use this `high_info_content` function in a detector in your job, it
 models information content that is held in the DNS query string. It detects
@@ -77,6 +79,7 @@ information content is higher than expected.
   "by_field_name" : "logfilename"
 }
 --------------------------------------------------
+// NOTCONSOLE
 
 If you use this `low_info_content` function in a detector in your job, it models
 information content that is present in the message string for each