forked from apache/cassandra
-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
CNDB-9422: Add CQL function to get the tokens produced by a Lucene an…
…alyzer
- Loading branch information
Showing
6 changed files
with
149 additions
and
162 deletions.
There are no files selected for viewing
98 changes: 98 additions & 0 deletions
98
src/java/org/apache/cassandra/cql3/functions/IndexFcts.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.cassandra.cql3.functions; | ||
|
||
import java.nio.ByteBuffer; | ||
import java.util.ArrayList; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
|
||
import com.google.common.base.Charsets; | ||
|
||
import org.apache.cassandra.db.marshal.ListType; | ||
import org.apache.cassandra.db.marshal.UTF8Type; | ||
import org.apache.cassandra.exceptions.InvalidRequestException; | ||
import org.apache.cassandra.index.sai.analyzer.JSONAnalyzerParser; | ||
import org.apache.cassandra.index.sai.analyzer.LuceneAnalyzer; | ||
import org.apache.cassandra.transport.ProtocolVersion; | ||
import org.apache.cassandra.utils.ByteBufferUtil; | ||
import org.apache.lucene.analysis.Analyzer; | ||
|
||
public abstract class IndexFcts | ||
{ | ||
public static void addFunctionsTo(NativeFunctions functions) | ||
{ | ||
functions.add(new AnalyzeFunction()); | ||
} | ||
|
||
/** | ||
* CQL native function to get the tokens produced for given text value and the analyzer defined by the given JSON options. | ||
*/ | ||
private static class AnalyzeFunction extends NativeScalarFunction | ||
{ | ||
private static final String NAME = "analyze"; | ||
private static final ListType<String> returnType = ListType.getInstance(UTF8Type.instance, false); | ||
|
||
private AnalyzeFunction() | ||
{ | ||
super(NAME, returnType, UTF8Type.instance, UTF8Type.instance); | ||
} | ||
|
||
@Override | ||
public ByteBuffer execute(ProtocolVersion protocolVersion, List<ByteBuffer> parameters) throws InvalidRequestException | ||
{ | ||
if (parameters.get(0) == null) | ||
return null; | ||
String text = UTF8Type.instance.compose(parameters.get(0)); | ||
|
||
if (parameters.get(1) == null) | ||
throw new InvalidRequestException("Function " + name + " requires a non-null json_analyzer parameter (2nd argument)"); | ||
String json = UTF8Type.instance.compose(parameters.get(1)); | ||
|
||
LuceneAnalyzer luceneAnalyzer = null; | ||
List<String> tokens = new ArrayList<>(); | ||
try (Analyzer analyzer = JSONAnalyzerParser.parse(json)) | ||
{ | ||
luceneAnalyzer = new LuceneAnalyzer(UTF8Type.instance, analyzer, new HashMap<>()); | ||
|
||
ByteBuffer toAnalyze = ByteBuffer.wrap(text.getBytes(Charsets.UTF_8)); | ||
luceneAnalyzer.reset(toAnalyze); | ||
ByteBuffer analyzed; | ||
|
||
while (luceneAnalyzer.hasNext()) | ||
{ | ||
analyzed = luceneAnalyzer.next(); | ||
tokens.add(ByteBufferUtil.string(analyzed, Charsets.UTF_8)); | ||
} | ||
} | ||
catch (Exception ex) | ||
{ | ||
throw new InvalidRequestException("Function " + name + " unable to analyze text=" + text + " json_analyzer=" + json, ex); | ||
} | ||
finally | ||
{ | ||
if (luceneAnalyzer != null) | ||
{ | ||
luceneAnalyzer.end(); | ||
} | ||
} | ||
|
||
return returnType.decompose(tokens); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 0 additions & 104 deletions
104
src/java/org/apache/cassandra/index/sai/virtual/AnalyzerView.java
This file was deleted.
Oops, something went wrong.
50 changes: 50 additions & 0 deletions
50
test/unit/org/apache/cassandra/cql3/functions/IndexFctsTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.cassandra.cql3.functions; | ||
|
||
import org.junit.Test; | ||
|
||
import org.apache.cassandra.exceptions.InvalidRequestException; | ||
import org.apache.cassandra.index.sai.SAITester; | ||
|
||
public class IndexFctsTest extends SAITester | ||
{ | ||
@Test | ||
public void testAnalyzeFunction() throws Throwable | ||
{ | ||
createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)"); | ||
execute("INSERT INTO %s (k, v) VALUES (1, 'johnny apples seedlings')"); | ||
execute("INSERT INTO %s (k, v) VALUES (2, null)"); | ||
|
||
assertRows(execute("SELECT k, analyze(v, ?) FROM %s", | ||
"{\n" + | ||
"\t\"tokenizer\":{\"name\":\"whitespace\"},\n" + | ||
"\t\"filters\":[{\"name\":\"porterstem\"}]\n" + | ||
'}'), | ||
row(1, list("johnni", "appl", "seedl")), | ||
row(2, null)); | ||
|
||
assertInvalidThrowMessage("Function system.analyze requires a non-null json_analyzer parameter (2nd argument)", | ||
InvalidRequestException.class, | ||
"SELECT analyze(v, null) FROM %s"); | ||
|
||
assertInvalidThrowMessage("Function system.analyze unable to analyze text=abc json_analyzer=def", | ||
InvalidRequestException.class, | ||
"SELECT analyze('abc', 'def') FROM %s"); | ||
} | ||
} |
56 changes: 0 additions & 56 deletions
56
test/unit/org/apache/cassandra/index/sai/virtual/AnalyzerViewTest.java
This file was deleted.
Oops, something went wrong.
d027028
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Approved by butler; Build 1: ran 14933 tests with 0 failures and 111 skipped.; All tests passed in all runs