From f37bce12d23c2ad0068df7dd709119cd6c789fe9 Mon Sep 17 00:00:00 2001 From: Adam Locke Date: Thu, 27 May 2021 17:06:53 -0400 Subject: [PATCH] [DOCS] [7.x] Create a new page for grok content in scripting docs (#73118) (#73495) (#73504) * [DOCS] [7.x] Create a new page for grok content in scripting docs (#73118) * [DOCS] Moving grok to its own scripting page * Adding examples * Updating cross link for grok page * Adds same runtime field in a search request for #73262 * Clarify titles and shift navigation * Incorporating review feedback * Updating cross-link to Painless * Adding doc type to response --- .../reference/ingest/processors/grok.asciidoc | 39 --- docs/reference/mapping/runtime.asciidoc | 145 ++++++----- docs/reference/redirects.asciidoc | 5 + .../scripting/common-script-uses.asciidoc | 2 +- docs/reference/scripting/grok-syntax.asciidoc | 236 ++++++++++++++++++ docs/reference/scripting/using.asciidoc | 1 + 6 files changed, 331 insertions(+), 97 deletions(-) create mode 100644 docs/reference/scripting/grok-syntax.asciidoc diff --git a/docs/reference/ingest/processors/grok.asciidoc b/docs/reference/ingest/processors/grok.asciidoc index 9b757fc1c3dbc..4936d8ae19efd 100644 --- a/docs/reference/ingest/processors/grok.asciidoc +++ b/docs/reference/ingest/processors/grok.asciidoc @@ -8,8 +8,6 @@ Extracts structured fields out of a single text field within a document. You cho extract matched fields from, as well as the grok pattern you expect will match. A grok pattern is like a regular expression that supports aliased expressions that can be reused. -This tool is perfect for syslog logs, apache and other webserver logs, mysql logs, and in general, any log format -that is generally written for humans and not computer consumption. This processor comes packaged with many https://github.com/elastic/elasticsearch/blob/{branch}/libs/grok/src/main/resources/patterns[reusable patterns]. @@ -17,43 +15,6 @@ If you need help building patterns to match your logs, you will find the {kibana-ref}/xpack-grokdebugger.html[Grok Debugger] tool quite useful! The https://grokconstructor.appspot.com[Grok Constructor] is also a useful tool. -[[grok-basics]] -==== Grok Basics - -Grok sits on top of regular expressions, so any regular expressions are valid in grok as well. -The regular expression library is Oniguruma, and you can see the full supported regexp syntax -https://github.com/kkos/oniguruma/blob/master/doc/RE[on the Oniguruma site]. - -Grok works by leveraging this regular expression language to allow naming existing patterns and combining them into more -complex patterns that match your fields. - -The syntax for reusing a grok pattern comes in three forms: `%{SYNTAX:SEMANTIC}`, `%{SYNTAX}`, `%{SYNTAX:SEMANTIC:TYPE}`. - -The `SYNTAX` is the name of the pattern that will match your text. For example, `3.44` will be matched by the `NUMBER` -pattern and `55.3.244.1` will be matched by the `IP` pattern. The syntax is how you match. `NUMBER` and `IP` are both -patterns that are provided within the default patterns set. - -The `SEMANTIC` is the identifier you give to the piece of text being matched. For example, `3.44` could be the -duration of an event, so you could call it simply `duration`. Further, a string `55.3.244.1` might identify -the `client` making a request. - -The `TYPE` is the type you wish to cast your named field. `int`, `long`, `double`, `float` and `boolean` are supported types for coercion. - -For example, you might want to match the following text: - -[source,txt] --------------------------------------------------- -3.44 55.3.244.1 --------------------------------------------------- - -You may know that the message in the example is a number followed by an IP address. You can match this text by using the following -Grok expression. - -[source,txt] --------------------------------------------------- -%{NUMBER:duration} %{IP:client} --------------------------------------------------- - [[using-grok]] ==== Using the Grok Processor in a Pipeline diff --git a/docs/reference/mapping/runtime.asciidoc b/docs/reference/mapping/runtime.asciidoc index 8e3f6d7588d04..049dcf3aee23c 100644 --- a/docs/reference/mapping/runtime.asciidoc +++ b/docs/reference/mapping/runtime.asciidoc @@ -91,7 +91,7 @@ calculates the day of the week based on the value of `timestamp`, and uses [source,console] ---- -PUT my-index/ +PUT my-index-000001/ { "mappings": { "runtime": { @@ -130,7 +130,7 @@ the index mapping as runtime fields: [source,console] ---- -PUT my-index +PUT my-index-000001 { "mappings": { "dynamic": "runtime", @@ -152,7 +152,7 @@ a runtime field without a script, such as `day_of_week`: [source,console] ---- -PUT my-index/ +PUT my-index-000001/ { "mappings": { "runtime": { @@ -194,7 +194,7 @@ remove a runtime field from the mappings, set the value of the runtime field to [source,console] ---- -PUT my-index/_mapping +PUT my-index-000001/_mapping { "runtime": { "day_of_week": null @@ -233,7 +233,7 @@ and only within the context of this search request: [source,console] ---- -GET my-index/_search +GET my-index-000001/_search { "runtime_mappings": { "day_of_week": { @@ -262,7 +262,7 @@ other runtime fields. For example, let's say you bulk index some sensor data: [source,console] ---- -POST my-index/_bulk?refresh=true +POST my-index-000001/_bulk?refresh=true {"index":{}} {"@timestamp":1516729294000,"model_number":"QVKC92Q","measures":{"voltage":"5.2","start": "300","end":"8675309"}} {"index":{}} @@ -285,7 +285,7 @@ your indexed fields and modify the data type: [source,console] ---- -PUT my-index/_mapping +PUT my-index-000001/_mapping { "runtime": { "measures.start": { @@ -312,7 +312,7 @@ Now, you can easily run an [source,console] ---- -GET my-index/_search +GET my-index-000001/_search { "aggs": { "avg_start": { @@ -360,7 +360,7 @@ compute statistics over numeric values extracted from the aggregated documents. [source,console] ---- -GET my-index/_search +GET my-index-000001/_search { "runtime_mappings": { "duration": { @@ -413,11 +413,11 @@ script, and returns the value as part of the query. Because the runtime field shadows the mapped field, you can override the value returned in search without modifying the mapped field. -For example, let's say you indexed the following documents into `my-index`: +For example, let's say you indexed the following documents into `my-index-000001`: [source,console] ---- -POST my-index/_bulk?refresh=true +POST my-index-000001/_bulk?refresh=true {"index":{}} {"@timestamp":1516729294000,"model_number":"QVKC92Q","measures":{"voltage":5.2}} {"index":{}} @@ -442,7 +442,7 @@ If you search for documents where the model number matches `HG537PU`: [source,console] ---- -GET my-index/_search +GET my-index-000001/_search { "query": { "match": { @@ -468,7 +468,7 @@ The response includes indexed values for documents matching model number "max_score" : 1.0296195, "hits" : [ { - "_index" : "my-index", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "F1BeSXYBg_szTodcYCmk", "_score" : 1.0296195, @@ -481,7 +481,7 @@ The response includes indexed values for documents matching model number } }, { - "_index" : "my-index", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "l02aSXYBkpNf6QRDO62Q", "_score" : 1.0296195, @@ -513,7 +513,7 @@ for documents matching the search request: [source,console] ---- -POST my-index/_search +POST my-index-000001/_search { "runtime_mappings": { "measures.voltage": { @@ -553,7 +553,7 @@ which still returns in the response: "max_score" : 1.0296195, "hits" : [ { - "_index" : "my-index", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "F1BeSXYBg_szTodcYCmk", "_score" : 1.0296195, @@ -571,7 +571,7 @@ which still returns in the response: } }, { - "_index" : "my-index", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "l02aSXYBkpNf6QRDO62Q", "_score" : 1.0296195, @@ -615,7 +615,7 @@ the request so that new fields are added to the mapping as runtime fields. [source,console] ---- -PUT my-index/ +PUT my-index-000001/ { "mappings": { "dynamic": "runtime", @@ -642,7 +642,7 @@ Let's ingest some sample data, which will result in two indexed fields: [source,console] ---- -POST /my-index/_bulk?refresh +POST /my-index-000001/_bulk?refresh { "index": {}} { "@timestamp": "2020-06-21T15:00:01-05:00", "message" : "211.11.9.0 - - [2020-06-21T15:00:01-05:00] \"GET /english/index.html HTTP/1.0\" 304 0"} { "index": {}} @@ -679,7 +679,7 @@ modify the mapping without changing any field values. [source,console] ---- -GET my-index/_search +GET my-index-000001/_search { "fields": [ "@timestamp", @@ -696,7 +696,7 @@ the `message` field and will further refine the query: [source,console] ---- -PUT /my-index/_mapping +PUT /my-index-000001/_mapping { "runtime": { "client_ip": { @@ -715,7 +715,7 @@ runtime field: [source,console] ---- -GET my-index/_search +GET my-index-000001/_search { "size": 1, "query": { @@ -745,7 +745,7 @@ address. "max_score" : 1.0, "hits" : [ { - "_index" : "my-index", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "oWs5KXYB-XyJbifr9mrz", "_score" : 1.0, @@ -797,11 +797,11 @@ valves. The connected sensors are only capable of reporting a fraction of the true readings. Rather than outfit the pressure valves with new sensors, you decide to calculate the values based on reported readings. Based on the reported data, you define the following fields in your mapping for -`my-index`: +`my-index-000001`: [source,console] ---- -PUT my-index/ +PUT my-index-000001/ { "mappings": { "properties": { @@ -827,7 +827,7 @@ You then bulk index some sample data from your sensors. This data includes [source,console] ---- -POST my-index/_bulk?refresh=true +POST my-index-000001/_bulk?refresh=true {"index":{}} {"timestamp": 1516729294000, "temperature": 200, "voltage": 5.2, "node": "a"} {"index":{}} @@ -850,7 +850,7 @@ voltage and multiplies it by `2`: [source,console] ---- -PUT my-index/_mapping +PUT my-index-000001/_mapping { "runtime": { "voltage_corrected": { @@ -874,7 +874,7 @@ parameter on the `_search` API: [source,console] ---- -GET my-index/_search +GET my-index-000001/_search { "fields": [ "voltage_corrected", @@ -899,7 +899,7 @@ GET my-index/_search "max_score" : 1.0, "hits" : [ { - "_index" : "my-index", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "z4TCrHgBdg9xpPrU6z9k", "_score" : 1.0, @@ -919,7 +919,7 @@ GET my-index/_search } }, { - "_index" : "my-index", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "0ITCrHgBdg9xpPrU6z9k", "_score" : 1.0, @@ -952,7 +952,7 @@ multiplier for reported sensor data should be `4`. To gain greater performance, you decide to index the `voltage_corrected` runtime field with the new `multiplier` parameter. -In a new index named `my-index-00001`, copy the `voltage_corrected` runtime +In a new index named `my-index-000001`, copy the `voltage_corrected` runtime field definition into the mappings of the new index. It's that simple! You can add an optional parameter named `on_script_error` that determines whether to reject the entire document if the script throws an error at index time @@ -960,7 +960,7 @@ reject the entire document if the script throws an error at index time [source,console] ---- -PUT my-index-00001/ +PUT my-index-000001/ { "mappings": { "properties": { @@ -996,11 +996,11 @@ PUT my-index-00001/ index time. Setting the value to `ignore` will register the field in the document’s `_ignored` metadata field and continue indexing. -Bulk index some sample data from your sensors into the `my-index-00001` index: +Bulk index some sample data from your sensors into the `my-index-000001` index: [source,console] ---- -POST my-index-00001/_bulk?refresh=true +POST my-index-000001/_bulk?refresh=true { "index": {}} { "timestamp": 1516729294000, "temperature": 200, "voltage": 5.2, "node": "a"} { "index": {}} @@ -1024,7 +1024,7 @@ the `_search` API to retrieve the fields you want: [source,console] ---- -POST my-index-00001/_search +POST my-index-000001/_search { "query": { "range": { @@ -1056,7 +1056,7 @@ match the range query, based on the calculated value of the included script: "max_score" : 1.0, "hits" : [ { - "_index" : "my-index-00001", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "yoSLrHgBdg9xpPrUZz_P", "_score" : 1.0, @@ -1076,7 +1076,7 @@ match the range query, based on the calculated value of the included script: } }, { - "_index" : "my-index-00001", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "y4SLrHgBdg9xpPrUZz_P", "_score" : 1.0, @@ -1117,12 +1117,12 @@ time for these fields. ==== Define indexed fields as a starting point You can start with a simple example by adding the `@timestamp` and `message` -fields to the `my-index` mapping as indexed fields. To remain flexible, use +fields to the `my-index-000001` mapping as indexed fields. To remain flexible, use `wildcard` as the field type for `message`: [source,console] ---- -PUT /my-index/ +PUT /my-index-000001/ { "mappings": { "properties": { @@ -1142,7 +1142,7 @@ PUT /my-index/ ==== Ingest some data After mapping the fields you want to retrieve, index a few records from your log data into {es}. The following request uses the <> -to index raw log data into `my-index`. Instead of indexing all of your log +to index raw log data into `my-index-000001`. Instead of indexing all of your log data, you can use a small sample to experiment with runtime fields. The final document is not a valid Apache log format, but we can account for @@ -1150,7 +1150,7 @@ that scenario in our script. [source,console] ---- -POST /my-index/_bulk?refresh +POST /my-index-000001/_bulk?refresh {"index":{}} {"timestamp":"2020-04-30T14:30:17-05:00","message":"40.135.0.0 - - [30/Apr/2020:14:30:17 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"} {"index":{}} @@ -1172,7 +1172,7 @@ At this point, you can view how {es} stores your raw data. [source,console] ---- -GET /my-index +GET /my-index-000001 ---- // TEST[continued] @@ -1181,7 +1181,7 @@ The mapping contains two fields: `@timestamp` and `message`. [source,console-result] ---- { - "my-index" : { + "my-index-000001" : { "aliases" : { }, "mappings" : { "properties" : { @@ -1201,24 +1201,24 @@ The mapping contains two fields: `@timestamp` and `message`. } } ---- -// TESTRESPONSE[s/\.\.\./"settings": $body.my-index.settings/] +// TESTRESPONSE[s/\.\.\./"settings": $body.my-index-000001.settings/] [[runtime-examples-grok]] ==== Define a runtime field with a grok pattern If you want to retrieve results that include `clientip`, you can add that field as a runtime field in the mapping. The following runtime script defines a -grok pattern that extracts structured fields out of a single text +<> that extracts structured fields out of a single text field within a document. A grok pattern is like a regular expression that -supports aliased expressions that you can reuse. See <> to learn more about grok syntax. +supports aliased expressions that you can reuse. The script matches on the `%{COMMONAPACHELOG}` log pattern, which understands the structure of Apache logs. If the pattern matches, the script emits the -value matching IP address. If the pattern doesn't match +value of the matching IP address. If the pattern doesn't match (`clientip != null`), the script just returns the field value without crashing. [source,console] ---- -PUT my-index/_mappings +PUT my-index-000001/_mappings { "runtime": { "http.clientip": { @@ -1235,6 +1235,37 @@ PUT my-index/_mappings <1> This condition ensures that the script doesn't crash even if the pattern of the message doesn't match. +Alternatively, you can define the same runtime field but in the context of a +search request. The runtime definition and the script are exactly the same as +the one defined previously in the index mapping. Just copy that definition into +the search request under the `runtime_mappings` section and include a query +that matches on the runtime field. This query returns the same results as if +you defined a search query for the `http.clientip` runtime field in your index +mappings, but only in the context of this specific search: + +[source,console] +---- +GET my-index-000001/_search +{ + "runtime_mappings": { + "http.clientip": { + "type": "ip", + "script": """ + String clientip=grok('%{COMMONAPACHELOG}').extract(doc["message"].value)?.clientip; + if (clientip != null) emit(clientip); + """ + } + }, + "query": { + "match": { + "http.clientip": "40.135.0.0" + } + }, + "fields" : ["http.clientip"] +} +---- +// TEST[continued] + [[runtime-examples-grok-ip]] ===== Search for a specific IP address Using the `http.clientip` runtime field, you can define a simple query to run a @@ -1242,7 +1273,7 @@ search for a specific IP address and return all related fields. [source,console] ---- -GET my-index/_search +GET my-index-000001/_search { "query": { "match": { @@ -1281,7 +1312,7 @@ data that doesn't match the grok pattern. "max_score" : 1.0, "hits" : [ { - "_index" : "my-index", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "FdLqu3cBhqheMnFKd0gK", "_score" : 1.0, @@ -1316,7 +1347,7 @@ You can also run a <> that operates on the [source,console] ---- -GET my-index/_search +GET my-index-000001/_search { "query": { "range": { @@ -1344,7 +1375,7 @@ timestamp falls within the defined range. "max_score" : 1.0, "hits" : [ { - "_index" : "my-index", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "hdEhyncBRSB6iD-PoBqe", "_score" : 1.0, @@ -1354,7 +1385,7 @@ timestamp falls within the defined range. } }, { - "_index" : "my-index", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "htEhyncBRSB6iD-PoBqe", "_score" : 1.0, @@ -1385,7 +1416,7 @@ successful dissect patterns. [source,console] ---- -PUT my-index/_mappings +PUT my-index-000001/_mappings { "runtime": { "http.client.ip": { @@ -1404,7 +1435,7 @@ Similarly, you can define a dissect pattern to extract the https://developer.moz [source,console] ---- -PUT my-index/_mappings +PUT my-index-000001/_mappings { "runtime": { "http.response": { @@ -1424,7 +1455,7 @@ You can then run a query to retrieve a specific HTTP response using the [source,console] ---- -GET my-index/_search +GET my-index-000001/_search { "query": { "match": { @@ -1450,7 +1481,7 @@ The response includes a single document where the HTTP response is `304`: "max_score" : 1.0, "hits" : [ { - "_index" : "my-index", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "A2qDy3cBWRMvVAuI7F8M", "_score" : 1.0, diff --git a/docs/reference/redirects.asciidoc b/docs/reference/redirects.asciidoc index 656af56df436f..ec1f53b70819b 100644 --- a/docs/reference/redirects.asciidoc +++ b/docs/reference/redirects.asciidoc @@ -3,6 +3,11 @@ The following pages have moved or been deleted. +[role="exclude",id="grok-basics"] +=== Grok basics + +See <>. + // [START] Security redirects [role="exclude",id="get-started-users"] diff --git a/docs/reference/scripting/common-script-uses.asciidoc b/docs/reference/scripting/common-script-uses.asciidoc index 0b78115cc2637..3cf30b328d723 100644 --- a/docs/reference/scripting/common-script-uses.asciidoc +++ b/docs/reference/scripting/common-script-uses.asciidoc @@ -14,7 +14,7 @@ information, but you only want to extract pieces and parts. There are two options at your disposal: -* <> is a regular expression dialect that supports aliased +* <> is a regular expression dialect that supports aliased expressions that you can reuse. Because Grok sits on top of regular expressions (regex), any regular expressions are valid in grok as well. * <> extracts structured fields out of text, using diff --git a/docs/reference/scripting/grok-syntax.asciidoc b/docs/reference/scripting/grok-syntax.asciidoc new file mode 100644 index 0000000000000..4474cfe233a4d --- /dev/null +++ b/docs/reference/scripting/grok-syntax.asciidoc @@ -0,0 +1,236 @@ +[[grok]] +=== Grokking grok +Grok is a regular expression dialect that supports reusable aliased expressions. Grok works really well with syslog logs, Apache and other webserver +logs, mysql logs, and generally any log format that is written for humans and +not computer consumption. + +Grok sits on top of the https://github.com/kkos/oniguruma/blob/master/doc/RE[Oniguruma] regular expression library, so any regular expressions are +valid in grok. Grok uses this regular expression language to allow naming +existing patterns and combining them into more complex patterns that match your +fields. + +[[grok-syntax]] +==== Grok patterns +The {stack} ships with numerous https://github.com/elastic/elasticsearch/blob/master/libs/grok/src/main/resources/patterns/grok-patterns[predefined grok patterns] that simplify working with grok. The syntax for reusing grok patterns +takes one of the following forms: + +[%autowidth] +|=== +|`%{SYNTAX}` | `%{SYNTAX:ID}` |`%{SYNTAX:ID:TYPE}` +|=== + +`SYNTAX`:: +The name of the pattern that will match your text. For example, `NUMBER` and +`IP` are both patterns that are provided within the default patterns set. The +`NUMBER` pattern matches data like `3.44`, and the `IP` pattern matches data +like `55.3.244.1`. + +`ID`:: +The identifier you give to the piece of text being matched. For example, `3.44` +could be the duration of an event, so you might call it `duration`. The string +`55.3.244.1` might identify the `client` making a request. + +`TYPE`:: +The data type you want to cast your named field. `int`, `long`, `double`, +`float` and `boolean` are supported types. + +For example, let's say you have message data that looks like this: + +[source,txt] +---- +3.44 55.3.244.1 +---- + +The first value is a number, followed by what appears to be an IP address. You +can match this text by using the following grok expression: + +[source,txt] +---- +%{NUMBER:duration} %{IP:client} +---- + +[[grok-patterns]] +==== Use grok patterns in Painless scripts +You can incorporate predefined grok patterns into Painless scripts to extract +data. To test your script, use either the {painless}/painless-execute-api.html#painless-execute-runtime-field-context[field contexts] of the Painless +execute API or create a runtime field that includes the script. Runtime fields +offer greater flexibility and accept multiple documents, but the Painless +execute API is a great option if you don't have write access on a cluster +where you're testing a script. + +TIP: If you need help building grok patterns to match your data, use the +{kibana-ref}/xpack-grokdebugger.html[Grok Debugger] tool in {kib}. + +For example, if you're working with Apache log data, you can use the +`%{COMMONAPACHELOG}` syntax, which understands the structure of Apache logs. A +sample document might look like this: + +// Note to contributors that the line break in the following example is +// intentional to promote better readability in the output +[source,js] +---- +"timestamp":"2020-04-30T14:30:17-05:00","message":"40.135.0.0 - - +[30/Apr/2020:14:30:17 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736" +---- +// NOTCONSOLE + +To extract the IP address from the `message` field, you can write a Painless +script that incorporates the `%{COMMONAPACHELOG}` syntax. You can test this +script using the {painless}/painless-execute-api.html#painless-runtime-ip[`ip` field context] of the Painless execute API, but let's use a runtime field +instead. + +Based on the sample document, index the `@timestamp` and `message` fields. To +remain flexible, use `wildcard` as the field type for `message`: + +[source,console] +---- +PUT /my-index/ +{ + "mappings": { + "properties": { + "@timestamp": { + "format": "strict_date_optional_time||epoch_second", + "type": "date" + }, + "message": { + "type": "wildcard" + } + } + } +} +---- + +Next, use the <> to index some log data into +`my-index`. + +[source,console] +---- +POST /my-index/_bulk?refresh +{"index":{}} +{"timestamp":"2020-04-30T14:30:17-05:00","message":"40.135.0.0 - - [30/Apr/2020:14:30:17 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"} +{"index":{}} +{"timestamp":"2020-04-30T14:30:53-05:00","message":"232.0.0.0 - - [30/Apr/2020:14:30:53 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"} +{"index":{}} +{"timestamp":"2020-04-30T14:31:12-05:00","message":"26.1.0.0 - - [30/Apr/2020:14:31:12 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"} +{"index":{}} +{"timestamp":"2020-04-30T14:31:19-05:00","message":"247.37.0.0 - - [30/Apr/2020:14:31:19 -0500] \"GET /french/splash_inet.html HTTP/1.0\" 200 3781"} +{"index":{}} +{"timestamp":"2020-04-30T14:31:22-05:00","message":"247.37.0.0 - - [30/Apr/2020:14:31:22 -0500] \"GET /images/hm_nbg.jpg HTTP/1.0\" 304 0"} +{"index":{}} +{"timestamp":"2020-04-30T14:31:27-05:00","message":"252.0.0.0 - - [30/Apr/2020:14:31:27 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"} +{"index":{}} +{"timestamp":"2020-04-30T14:31:28-05:00","message":"not a valid apache log"} +---- +// TEST[continued] + +[[grok-patterns-runtime]] +==== Incorporate grok patterns and scripts in runtime fields +Now you can define a runtime field in the mappings that includes your Painless +script and grok pattern. If the pattern matches, the script emits the value of +the matching IP address. If the pattern doesn't match (`clientip != null`), the +script just returns the field value without crashing. + +[source,console] +---- +PUT my-index/_mappings +{ + "runtime": { + "http.clientip": { + "type": "ip", + "script": """ + String clientip=grok('%{COMMONAPACHELOG}').extract(doc["message"].value)?.clientip; + if (clientip != null) emit(clientip); + """ + } + } +} +---- +// TEST[continued] + +Alternatively, you can define the same runtime field but in the context of a +search request. The runtime definition and the script are exactly the same as +the one defined previously in the index mapping. Just copy that definition into +the search request under the `runtime_mappings` section and include a query +that matches on the runtime field. This query returns the same results as if +you <> for the `http.clientip` +runtime field in your index mappings, but only in the context of this specific +search: + +[source,console] +---- +GET my-index/_search +{ + "runtime_mappings": { + "http.clientip": { + "type": "ip", + "script": """ + String clientip=grok('%{COMMONAPACHELOG}').extract(doc["message"].value)?.clientip; + if (clientip != null) emit(clientip); + """ + } + }, + "query": { + "match": { + "http.clientip": "40.135.0.0" + } + }, + "fields" : ["http.clientip"] +} +---- +// TEST[continued] + +[[grok-pattern-results]] +==== Return calculated results +Using the `http.clientip` runtime field, you can define a simple query to run a +search for a specific IP address and return all related fields. The <> parameter on the `_search` API works for all fields, +even those that weren't sent as part of the original `_source`: + +[source,console] +---- +GET my-index/_search +{ + "query": { + "match": { + "http.clientip": "40.135.0.0" + } + }, + "fields" : ["http.clientip"] +} +---- +// TEST[continued] +// TEST[s/_search/_search\?filter_path=hits/] + +The response includes the specific IP address indicated in your search query. +The grok pattern within the Painless script extracted this value from the +`message` field at runtime. + +[source,console-result] +---- +{ + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : 1.0, + "hits" : [ + { + "_index" : "my-index", + "_type" : "_doc", + "_id" : "1iN2a3kBw4xTzEDqyYE0", + "_score" : 1.0, + "_source" : { + "timestamp" : "2020-04-30T14:30:17-05:00", + "message" : "40.135.0.0 - - [30/Apr/2020:14:30:17 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736" + }, + "fields" : { + "http.clientip" : [ + "40.135.0.0" + ] + } + } + ] + } +} +---- +// TESTRESPONSE[s/"_id" : "1iN2a3kBw4xTzEDqyYE0"/"_id": $body.hits.hits.0._id/] diff --git a/docs/reference/scripting/using.asciidoc b/docs/reference/scripting/using.asciidoc index 8c3c372032d80..4f28bf0b6a074 100644 --- a/docs/reference/scripting/using.asciidoc +++ b/docs/reference/scripting/using.asciidoc @@ -566,3 +566,4 @@ DELETE /_ingest/pipeline/my_test_scores_pipeline //// +include::grok-syntax.asciidoc[]