-
Notifications
You must be signed in to change notification settings - Fork 24.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Extract capture config from grok patterns up front (backport of #62706)…
… (#62785) This extracts the configuration for extracting values from a groked string when building the grok expression to do two things: 1. Create a method exposing that configuration on `Grok` itself which will be used grok `grok` flavored runtime fields. 2. Marginally speed up extracting grok values by skipping a little string manipulation.
- Loading branch information
Showing
5 changed files
with
289 additions
and
82 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
67 changes: 67 additions & 0 deletions
67
libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureConfig.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.grok; | ||
|
||
import org.joni.NameEntry; | ||
import org.joni.Region; | ||
|
||
import java.nio.charset.StandardCharsets; | ||
|
||
/** | ||
* Configuration for a value that {@link Grok} can capture. | ||
*/ | ||
public final class GrokCaptureConfig { | ||
private final String name; | ||
private final GrokCaptureType type; | ||
private final int[] backRefs; | ||
|
||
GrokCaptureConfig(NameEntry nameEntry) { | ||
String groupName = new String(nameEntry.name, nameEntry.nameP, nameEntry.nameEnd - nameEntry.nameP, StandardCharsets.UTF_8); | ||
String[] parts = groupName.split(":"); | ||
name = parts.length >= 2 ? parts[1] : parts[0]; | ||
type = parts.length == 3 ? GrokCaptureType.fromString(parts[2]) : GrokCaptureType.STRING; | ||
this.backRefs = nameEntry.getBackRefs(); | ||
} | ||
|
||
/** | ||
* The name defined for the field in the pattern. | ||
*/ | ||
public String name() { | ||
return name; | ||
} | ||
|
||
/** | ||
* The type defined for the field in the pattern. | ||
*/ | ||
public GrokCaptureType type() { | ||
return type; | ||
} | ||
|
||
Object extract(byte[] textAsBytes, Region region) { | ||
for (int number : backRefs) { | ||
if (region.beg[number] >= 0) { | ||
String matchValue = new String(textAsBytes, region.beg[number], region.end[number] - region.beg[number], | ||
StandardCharsets.UTF_8); | ||
return type.parse(matchValue); | ||
} | ||
} | ||
return null; | ||
} | ||
} |
90 changes: 90 additions & 0 deletions
90
libs/grok/src/main/java/org/elasticsearch/grok/GrokCaptureType.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.grok; | ||
|
||
/** | ||
* The type defined for the field in the pattern. | ||
*/ | ||
public enum GrokCaptureType { | ||
STRING { | ||
@Override | ||
protected Object parseValue(String str) { | ||
return str; | ||
} | ||
}, | ||
INTEGER { | ||
@Override | ||
protected Object parseValue(String str) { | ||
return Integer.parseInt(str); | ||
} | ||
}, | ||
LONG { | ||
@Override | ||
protected Object parseValue(String str) { | ||
return Long.parseLong(str); | ||
} | ||
}, | ||
DOUBLE { | ||
@Override | ||
protected Object parseValue(String str) { | ||
return Double.parseDouble(str); | ||
} | ||
}, | ||
FLOAT { | ||
@Override | ||
protected Object parseValue(String str) { | ||
return Float.parseFloat(str); | ||
} | ||
}, | ||
BOOLEAN { | ||
@Override | ||
protected Object parseValue(String str) { | ||
return Boolean.parseBoolean(str); | ||
} | ||
}; | ||
|
||
final Object parse(String str) { | ||
if (str == null) { | ||
return null; | ||
} | ||
return parseValue(str); | ||
} | ||
|
||
protected abstract Object parseValue(String str); | ||
|
||
static GrokCaptureType fromString(String str) { | ||
switch (str) { | ||
case "string": | ||
return STRING; | ||
case "int": | ||
return INTEGER; | ||
case "long": | ||
return LONG; | ||
case "double": | ||
return DOUBLE; | ||
case "float": | ||
return FLOAT; | ||
case "boolean": | ||
return BOOLEAN; | ||
default: | ||
return STRING; | ||
} | ||
} | ||
} |
68 changes: 0 additions & 68 deletions
68
libs/grok/src/main/java/org/elasticsearch/grok/GrokMatchGroup.java
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.