Skip to content

Commit

Permalink
Add ES|QL Locate function (#106899)
Browse files Browse the repository at this point in the history
* Add ES|QL Locate function
  • Loading branch information
tteofili committed Apr 5, 2024
1 parent ee667c4 commit 54eeb62
Show file tree
Hide file tree
Showing 13 changed files with 666 additions and 2 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/106899.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 106899
summary: Add ES|QL Locate function
area: ES|QL
type: enhancement
issues:
- 106818
5 changes: 5 additions & 0 deletions docs/reference/esql/functions/description/locate.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.

*Description*

Returns an integer that indicates the position of a keyword substring within another string
14 changes: 14 additions & 0 deletions docs/reference/esql/functions/layout/locate.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.

[discrete]
[[esql-locate]]
=== `LOCATE`

*Syntax*

[.text-center]
image::esql/functions/signature/locate.svg[Embedded,opts=inline]

include::../parameters/locate.asciidoc[]
include::../description/locate.asciidoc[]
include::../types/locate.asciidoc[]
12 changes: 12 additions & 0 deletions docs/reference/esql/functions/parameters/locate.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.

*Parameters*

`string`::
An input string

`substring`::
A substring to locate in the input string

`start`::
The start index
1 change: 1 addition & 0 deletions docs/reference/esql/functions/signature/locate.svg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
12 changes: 12 additions & 0 deletions docs/reference/esql/functions/types/locate.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.

*Supported types*

[%header.monospaced.styled,format=dsv,separator=|]
|===
string | substring | start | result
keyword | keyword | integer | integer
keyword | text | integer | integer
text | keyword | integer | integer
text | text | integer | integer
|===
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ double e()
"integer|long|double|boolean|keyword|text|ip|version least(first:integer|long|double|boolean|keyword|text|ip|version, ?rest...:integer|long|double|boolean|keyword|text|ip|version)"
"keyword left(string:keyword|text, length:integer)"
"integer length(string:keyword|text)"
"integer locate(string:keyword|text, substring:keyword|text, ?start:integer)"
"double log(?base:integer|unsigned_long|long|double, number:integer|unsigned_long|long|double)"
"double log10(number:double|integer|long|unsigned_long)"
"keyword|text ltrim(string:keyword|text)"
Expand Down Expand Up @@ -138,6 +139,7 @@ greatest |first |"integer|long|double|boolean
least |first |"integer|long|double|boolean|keyword|text|ip|version" |[""]
left |[string, length] |["keyword|text", integer] |[The string from which to return a substring., The number of characters to return.]
length |string |"keyword|text" |[""]
locate |[string, substring, start] |["keyword|text", "keyword|text", "integer"] |[An input string, A substring to locate in the input string, The start index]
log |[base, number] |["integer|unsigned_long|long|double", "integer|unsigned_long|long|double"] |["Base of logarithm. If `null`\, the function returns `null`. If not provided\, this function returns the natural logarithm (base e) of a value.", "Numeric expression. If `null`\, the function returns `null`."]
log10 |number |"double|integer|long|unsigned_long" |Numeric expression. If `null`, the function returns `null`.
ltrim |string |"keyword|text" |[""]
Expand Down Expand Up @@ -247,6 +249,7 @@ greatest |Returns the maximum value from many columns.
least |Returns the minimum value from many columns.
left |Returns the substring that extracts 'length' chars from 'string' starting from the left.
length |Returns the character length of a string.
locate |Returns an integer that indicates the position of a keyword substring within another string
log |Returns the logarithm of a value to a base. The input can be any numeric value, the return value is always a double. Logs of zero, negative numbers, and base of one return `null` as well as a warning.
log10 |Returns the logarithm of a value to base 10. The input can be any numeric value, the return value is always a double. Logs of 0 and negative numbers return `null` as well as a warning.
ltrim |Removes leading whitespaces from a string.
Expand Down Expand Up @@ -357,6 +360,7 @@ greatest |"integer|long|double|boolean|keyword|text|ip|version"
least |"integer|long|double|boolean|keyword|text|ip|version" |false |true |false
left |keyword |[false, false] |false |false
length |integer |false |false |false
locate |integer |[false, false, true] |false |false
log |double |[true, false] |false |false
log10 |double |false |false |false
ltrim |"keyword|text" |false |false |false
Expand Down Expand Up @@ -447,5 +451,5 @@ countFunctions#[skip:-8.13.99]
meta functions | stats a = count(*), b = count(*), c = count(*) | mv_expand c;

a:long | b:long | c:long
100 | 100 | 100
101 | 101 | 101
;
112 changes: 112 additions & 0 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -1168,3 +1168,115 @@ from employees | where emp_no == 10001 | eval split = split("fooMbar", gender) |
gender:keyword | split:keyword
M | [foo, bar]
;

locate#[skip:-8.13.99,reason:new string function added in 8.14]
row a = "hello" | eval a_ll = locate(a, "ll");

a:keyword | a_ll:integer
hello | 3
;

locateFail#[skip:-8.13.99,reason:new string function added in 8.14]
row a = "hello" | eval a_ll = locate(a, "int");

a:keyword | a_ll:integer
hello | 0
;

locateZeroStart#[skip:-8.13.99,reason:new string function added in 8.14]
row a = "hello" | eval a_ll = locate(a, "ll", 0);

a:keyword | a_ll:integer
hello | 3
;

locateExactStart#[skip:-8.13.99,reason:new string function added in 8.14]
row a = "hello" | eval a_ll = locate(a, "ll", 3);

a:keyword | a_ll:integer
hello | 3
;

locateLongerStart#[skip:-8.13.99,reason:new string function added in 8.14]
row a = "hello" | eval a_ll = locate(a, "ll", 10);

a:keyword | a_ll:integer
hello | 0
;

locateLongerSubstr#[skip:-8.13.99,reason:new string function added in 8.14]
row a = "hello" | eval a_ll = locate(a, "farewell");

a:keyword | a_ll:integer
hello | 0
;

locateSame#[skip:-8.13.99,reason:new string function added in 8.14]
row a = "hello" | eval a_ll = locate(a, "hello");

a:keyword | a_ll:integer
hello | 1
;

locateWithSubstring#[skip:-8.13.99,reason:new string function added in 8.14]
from employees | where emp_no <= 10010 | eval f_s = substring(last_name, 2) | eval f_l = locate(last_name, f_s) | keep emp_no, last_name, f_s, f_l;
ignoreOrder:true

emp_no:integer | last_name:keyword | f_s:keyword | f_l:integer
10001 | Facello | acello | 2
10002 | Simmel | immel | 2
10003 | Bamford | amford | 2
10004 | Koblick | oblick | 2
10005 | Maliniak | aliniak | 2
10006 | Preusig | reusig | 2
10007 | Zielinski | ielinski | 2
10008 | Kalloufi | alloufi | 2
10009 | Peac | eac | 2
10010 | Piveteau | iveteau | 2
;

locateUtf16Emoji#[skip:-8.13.99,reason:new string function added in 8.14]
row a = "🐱Meow!🐶Woof!" | eval f_s = substring(a, 3) | eval f_l = locate(a, f_s);

a:keyword | f_s:keyword | f_l:integer
🐱Meow!🐶Woof! | Meow!🐶Woof! | 3
;

locateNestedSubstring#[skip:-8.13.99,reason:new string function added in 8.14]
row a = "hello" | eval a_ll = substring(a, locate(a, "ll"));

a:keyword | a_ll:keyword
hello | llo
;

locateNestSubstring#[skip:-8.13.99,reason:new string function added in 8.14]
row a = "hello" | eval a_ll = locate(substring(a, 2), "ll");

a:keyword | a_ll:integer
hello | 2
;

locateStats#[skip:-8.13.99,reason:new string function added in 8.14]
from employees | where emp_no <= 10010 | eval f_l = locate(last_name, "ll") | stats min(f_l), max(f_l) by job_positions | sort job_positions | limit 5;

min(f_l):integer | max(f_l):integer | job_positions:keyword
5 | 5 | Accountant
0 | 0 | Architect
0 | 0 | Head Human Resources
0 | 3 | Internship
3 | 3 | Junior Developer
;

locateWarnings#[skip:-8.13.99,reason:new string function added in 8.14]
required_feature: esql.mv_warn

from hosts | where host=="epsilon" | eval l1 = locate(host_group, "ate"), l2 = locate(description, "ate") | keep l1, l2;
ignoreOrder:true
warning:Line 1:80: evaluation of [locate(description, \"ate\")] failed, treating result as null. Only first 20 failures recorded.
warning:Line 1:80: java.lang.IllegalArgumentException: single-value function encountered multi-value

l1:integer | l2:integer
2 | null
2 | null
null | 0
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License
// 2.0; you may not use this file except in compliance with the Elastic License
// 2.0.
package org.elasticsearch.xpack.esql.expression.function.scalar.string;

import java.lang.IllegalArgumentException;
import java.lang.Override;
import java.lang.String;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.BytesRefVector;
import org.elasticsearch.compute.data.IntBlock;
import org.elasticsearch.compute.data.IntVector;
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.xpack.esql.expression.function.Warnings;
import org.elasticsearch.xpack.ql.tree.Source;

/**
* {@link EvalOperator.ExpressionEvaluator} implementation for {@link Locate}.
* This class is generated. Do not edit it.
*/
public final class LocateEvaluator implements EvalOperator.ExpressionEvaluator {
private final Warnings warnings;

private final EvalOperator.ExpressionEvaluator str;

private final EvalOperator.ExpressionEvaluator substr;

private final EvalOperator.ExpressionEvaluator start;

private final DriverContext driverContext;

public LocateEvaluator(Source source, EvalOperator.ExpressionEvaluator str,
EvalOperator.ExpressionEvaluator substr, EvalOperator.ExpressionEvaluator start,
DriverContext driverContext) {
this.warnings = new Warnings(source);
this.str = str;
this.substr = substr;
this.start = start;
this.driverContext = driverContext;
}

@Override
public Block eval(Page page) {
try (BytesRefBlock strBlock = (BytesRefBlock) str.eval(page)) {
try (BytesRefBlock substrBlock = (BytesRefBlock) substr.eval(page)) {
try (IntBlock startBlock = (IntBlock) start.eval(page)) {
BytesRefVector strVector = strBlock.asVector();
if (strVector == null) {
return eval(page.getPositionCount(), strBlock, substrBlock, startBlock);
}
BytesRefVector substrVector = substrBlock.asVector();
if (substrVector == null) {
return eval(page.getPositionCount(), strBlock, substrBlock, startBlock);
}
IntVector startVector = startBlock.asVector();
if (startVector == null) {
return eval(page.getPositionCount(), strBlock, substrBlock, startBlock);
}
return eval(page.getPositionCount(), strVector, substrVector, startVector).asBlock();
}
}
}
}

public IntBlock eval(int positionCount, BytesRefBlock strBlock, BytesRefBlock substrBlock,
IntBlock startBlock) {
try(IntBlock.Builder result = driverContext.blockFactory().newIntBlockBuilder(positionCount)) {
BytesRef strScratch = new BytesRef();
BytesRef substrScratch = new BytesRef();
position: for (int p = 0; p < positionCount; p++) {
if (strBlock.isNull(p)) {
result.appendNull();
continue position;
}
if (strBlock.getValueCount(p) != 1) {
if (strBlock.getValueCount(p) > 1) {
warnings.registerException(new IllegalArgumentException("single-value function encountered multi-value"));
}
result.appendNull();
continue position;
}
if (substrBlock.isNull(p)) {
result.appendNull();
continue position;
}
if (substrBlock.getValueCount(p) != 1) {
if (substrBlock.getValueCount(p) > 1) {
warnings.registerException(new IllegalArgumentException("single-value function encountered multi-value"));
}
result.appendNull();
continue position;
}
if (startBlock.isNull(p)) {
result.appendNull();
continue position;
}
if (startBlock.getValueCount(p) != 1) {
if (startBlock.getValueCount(p) > 1) {
warnings.registerException(new IllegalArgumentException("single-value function encountered multi-value"));
}
result.appendNull();
continue position;
}
result.appendInt(Locate.process(strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch), substrBlock.getBytesRef(substrBlock.getFirstValueIndex(p), substrScratch), startBlock.getInt(startBlock.getFirstValueIndex(p))));
}
return result.build();
}
}

public IntVector eval(int positionCount, BytesRefVector strVector, BytesRefVector substrVector,
IntVector startVector) {
try(IntVector.Builder result = driverContext.blockFactory().newIntVectorBuilder(positionCount)) {
BytesRef strScratch = new BytesRef();
BytesRef substrScratch = new BytesRef();
position: for (int p = 0; p < positionCount; p++) {
result.appendInt(Locate.process(strVector.getBytesRef(p, strScratch), substrVector.getBytesRef(p, substrScratch), startVector.getInt(p)));
}
return result.build();
}
}

@Override
public String toString() {
return "LocateEvaluator[" + "str=" + str + ", substr=" + substr + ", start=" + start + "]";
}

@Override
public void close() {
Releasables.closeExpectNoException(str, substr, start);
}

static class Factory implements EvalOperator.ExpressionEvaluator.Factory {
private final Source source;

private final EvalOperator.ExpressionEvaluator.Factory str;

private final EvalOperator.ExpressionEvaluator.Factory substr;

private final EvalOperator.ExpressionEvaluator.Factory start;

public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory str,
EvalOperator.ExpressionEvaluator.Factory substr,
EvalOperator.ExpressionEvaluator.Factory start) {
this.source = source;
this.str = str;
this.substr = substr;
this.start = start;
}

@Override
public LocateEvaluator get(DriverContext context) {
return new LocateEvaluator(source, str.get(context), substr.get(context), start.get(context), context);
}

@Override
public String toString() {
return "LocateEvaluator[" + "str=" + str + ", substr=" + substr + ", start=" + start + "]";
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Locate;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.RTrim;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Replace;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Right;
Expand Down Expand Up @@ -174,7 +175,8 @@ private FunctionDefinition[][] functions() {
def(StartsWith.class, StartsWith::new, "starts_with"),
def(EndsWith.class, EndsWith::new, "ends_with"),
def(ToLower.class, ToLower::new, "to_lower"),
def(ToUpper.class, ToUpper::new, "to_upper") },
def(ToUpper.class, ToUpper::new, "to_upper"),
def(Locate.class, Locate::new, "locate") },
// date
new FunctionDefinition[] {
def(DateDiff.class, DateDiff::new, "date_diff"),
Expand Down

0 comments on commit 54eeb62

Please sign in to comment.