Skip to content

Commit

Permalink
CNDB-9422: Add CQL function to get the tokens produced by a Lucene an…
Browse files Browse the repository at this point in the history
…alyzer
  • Loading branch information
adelapena committed May 14, 2024
1 parent 3aeeac9 commit d027028
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 162 deletions.
98 changes: 98 additions & 0 deletions src/java/org/apache/cassandra/cql3/functions/IndexFcts.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.cql3.functions;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import com.google.common.base.Charsets;

import org.apache.cassandra.db.marshal.ListType;
import org.apache.cassandra.db.marshal.UTF8Type;
import org.apache.cassandra.exceptions.InvalidRequestException;
import org.apache.cassandra.index.sai.analyzer.JSONAnalyzerParser;
import org.apache.cassandra.index.sai.analyzer.LuceneAnalyzer;
import org.apache.cassandra.transport.ProtocolVersion;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.lucene.analysis.Analyzer;

public abstract class IndexFcts
{
public static void addFunctionsTo(NativeFunctions functions)
{
functions.add(new AnalyzeFunction());
}

/**
* CQL native function to get the tokens produced for given text value and the analyzer defined by the given JSON options.
*/
private static class AnalyzeFunction extends NativeScalarFunction
{
private static final String NAME = "analyze";
private static final ListType<String> returnType = ListType.getInstance(UTF8Type.instance, false);

private AnalyzeFunction()
{
super(NAME, returnType, UTF8Type.instance, UTF8Type.instance);
}

@Override
public ByteBuffer execute(ProtocolVersion protocolVersion, List<ByteBuffer> parameters) throws InvalidRequestException
{
if (parameters.get(0) == null)
return null;
String text = UTF8Type.instance.compose(parameters.get(0));

if (parameters.get(1) == null)
throw new InvalidRequestException("Function " + name + " requires a non-null json_analyzer parameter (2nd argument)");
String json = UTF8Type.instance.compose(parameters.get(1));

LuceneAnalyzer luceneAnalyzer = null;
List<String> tokens = new ArrayList<>();
try (Analyzer analyzer = JSONAnalyzerParser.parse(json))
{
luceneAnalyzer = new LuceneAnalyzer(UTF8Type.instance, analyzer, new HashMap<>());

ByteBuffer toAnalyze = ByteBuffer.wrap(text.getBytes(Charsets.UTF_8));
luceneAnalyzer.reset(toAnalyze);
ByteBuffer analyzed;

while (luceneAnalyzer.hasNext())
{
analyzed = luceneAnalyzer.next();
tokens.add(ByteBufferUtil.string(analyzed, Charsets.UTF_8));
}
}
catch (Exception ex)
{
throw new InvalidRequestException("Function " + name + " unable to analyze text=" + text + " json_analyzer=" + json, ex);
}
finally
{
if (luceneAnalyzer != null)
{
luceneAnalyzer.end();
}
}

return returnType.decompose(tokens);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public class NativeFunctions
AggregateFcts.addFunctionsTo(this);
BytesConversionFcts.addFunctionsTo(this);
VectorFcts.addFunctionsTo(this);
IndexFcts.addFunctionsTo(this);
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import com.google.common.collect.ImmutableList;

import org.apache.cassandra.config.CassandraRelevantProperties;
import org.apache.cassandra.index.sai.virtual.AnalyzerView;
import org.apache.cassandra.index.sai.virtual.IndexesSystemView;
import org.apache.cassandra.index.sai.virtual.SSTablesSystemView;
import org.apache.cassandra.index.sai.virtual.SegmentsSystemView;
Expand Down Expand Up @@ -55,7 +54,6 @@ private static Collection<VirtualTable> buildTables()
.add(new InternodeInboundTable(VIRTUAL_VIEWS))
.add(new SSTablesSystemView(VIRTUAL_VIEWS))
.add(new SegmentsSystemView(VIRTUAL_VIEWS))
.add(new AnalyzerView(VIRTUAL_VIEWS))
.addAll(TableMetricTables.getAll(VIRTUAL_VIEWS));
if (CassandraRelevantProperties.SYSTEM_VIEWS_INCLUDE_ALL.getBoolean()
|| CassandraRelevantProperties.SYSTEM_VIEWS_INCLUDE_LOCAL_AND_PEERS.getBoolean())
Expand Down
104 changes: 0 additions & 104 deletions src/java/org/apache/cassandra/index/sai/virtual/AnalyzerView.java

This file was deleted.

50 changes: 50 additions & 0 deletions test/unit/org/apache/cassandra/cql3/functions/IndexFctsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.cql3.functions;

import org.junit.Test;

import org.apache.cassandra.exceptions.InvalidRequestException;
import org.apache.cassandra.index.sai.SAITester;

public class IndexFctsTest extends SAITester
{
@Test
public void testAnalyzeFunction() throws Throwable
{
createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)");
execute("INSERT INTO %s (k, v) VALUES (1, 'johnny apples seedlings')");
execute("INSERT INTO %s (k, v) VALUES (2, null)");

assertRows(execute("SELECT k, analyze(v, ?) FROM %s",
"{\n" +
"\t\"tokenizer\":{\"name\":\"whitespace\"},\n" +
"\t\"filters\":[{\"name\":\"porterstem\"}]\n" +
'}'),
row(1, list("johnni", "appl", "seedl")),
row(2, null));

assertInvalidThrowMessage("Function system.analyze requires a non-null json_analyzer parameter (2nd argument)",
InvalidRequestException.class,
"SELECT analyze(v, null) FROM %s");

assertInvalidThrowMessage("Function system.analyze unable to analyze text=abc json_analyzer=def",
InvalidRequestException.class,
"SELECT analyze('abc', 'def') FROM %s");
}
}

This file was deleted.

1 comment on commit d027028

@cassci-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Approved by butler; Build 1: ran 14933 tests with 0 failures and 111 skipped.; All tests passed in all runs

Please sign in to comment.