Skip to content

Commit

Permalink
Expose Druid functions in INFORMATION_SCHEMA.ROUTINES table. (#14378)
Browse files Browse the repository at this point in the history
* Add INFORMATION_SCHEMA.ROUTINES to expose Druid operators and functions.

* checkstyle

* remove IS_DETERMISITIC.

* test

* cleanup test

* remove logs and simplify

* fixup unit test

* Add docs for INFORMATION_SCHEMA.ROUTINES table.

* Update test and add another SQL query.

* add stuff to .spelling and checkstyle fix.

* Add more tests for custom operators.

* checkstyle and comment.

* Some naming cleanup.

* Add FUNCTION_ID

* The different Calcite function syntax enums get translated to FUNCTION

* Update docs.

* Cleanup markdown table.

* fixup test.

* fixup intellij inspection

* Review comment: nullable column; add a function to determine function syntax.

* More tests; add non-function syntax operators.

* More unit tests. Also add a separate test for DruidOperatorTable.

* actually just validate non-zero count.

* switch up the order

* checkstyle fixes.
  • Loading branch information
abhishekrb19 committed Jun 13, 2023
1 parent 61120dc commit b8495d4
Show file tree
Hide file tree
Showing 9 changed files with 523 additions and 5 deletions.
20 changes: 20 additions & 0 deletions docs/querying/sql-metadata-tables.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,26 @@ SELECT "ORDINAL_POSITION", "COLUMN_NAME", "IS_NULLABLE", "DATA_TYPE", "JDBC_TYPE
FROM INFORMATION_SCHEMA.COLUMNS
WHERE "TABLE_NAME" = 'foo'
```
### ROUTINES table
`INFORMATION_SCHEMA.ROUTINES` provides a list of all known functions.

|Column|Type| Notes|
|------|----|------|
|ROUTINE_CATALOG|VARCHAR| The catalog that contains the routine. Always set as `druid`|
|ROUTINE_SCHEMA|VARCHAR| The schema that contains the routine. Always set as `INFORMATION_SCHEMA`|
|ROUTINE_NAME|VARCHAR| THe routine name|
|ROUTINE_TYPE|VARCHAR| The routine type. Always set as `FUNCTION`|
|IS_AGGREGATOR|VARCHAR| If a routine is an aggregator function, then the value will be set to `YES`, else `NO`|
|SIGNATURES|VARCHAR| One or more routine signatures|

For example, this query returns information about all the aggregator functions:

```sql
SELECT "ROUTINE_CATALOG", "ROUTINE_SCHEMA", "ROUTINE_NAME", "ROUTINE_TYPE", "IS_AGGREGATOR", "SIGNATURES"
FROM "INFORMATION_SCHEMA"."ROUTINES"
WHERE "IS_AGGREGATOR" = 'YES'
```


## SYSTEM SCHEMA

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -528,10 +528,21 @@ public List<SqlOperator> getOperatorList()
return retVal;
}

/**
* Checks if a given SqlSyntax value represents a valid function syntax. Treats anything other
* than prefix/suffix/binary syntax as function syntax.
*
* @param syntax The SqlSyntax value to be checked.
* @return {@code true} if the syntax is valid for a function, {@code false} otherwise.
*/
public static boolean isFunctionSyntax(final SqlSyntax syntax)
{
return syntax != SqlSyntax.PREFIX && syntax != SqlSyntax.BINARY && syntax != SqlSyntax.POSTFIX;
}

private static SqlSyntax normalizeSyntax(final SqlSyntax syntax)
{
// Treat anything other than prefix/suffix/binary syntax as function syntax.
if (syntax == SqlSyntax.PREFIX || syntax == SqlSyntax.BINARY || syntax == SqlSyntax.POSTFIX) {
if (!DruidOperatorTable.isFunctionSyntax(syntax)) {
return syntax;
} else {
return SqlSyntax.FUNCTION;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.apache.calcite.schema.TableMacro;
import org.apache.calcite.schema.impl.AbstractSchema;
import org.apache.calcite.schema.impl.AbstractTable;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.server.security.Action;
Expand All @@ -52,14 +53,17 @@
import org.apache.druid.server.security.Resource;
import org.apache.druid.server.security.ResourceAction;
import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.planner.DruidOperatorTable;
import org.apache.druid.sql.calcite.planner.DruidTypeSystem;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.table.DruidTable;
import org.apache.druid.sql.calcite.table.RowSignatures;

import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

Expand All @@ -68,9 +72,12 @@ public class InformationSchema extends AbstractSchema
private static final EmittingLogger log = new EmittingLogger(InformationSchema.class);

private static final String CATALOG_NAME = "druid";
private static final String INFORMATION_SCHEMA_NAME = "INFORMATION_SCHEMA";

private static final String SCHEMATA_TABLE = "SCHEMATA";
private static final String TABLES_TABLE = "TABLES";
private static final String COLUMNS_TABLE = "COLUMNS";
private static final String ROUTINES_TABLE = "ROUTINES";

private static class RowTypeBuilder
{
Expand Down Expand Up @@ -131,6 +138,14 @@ public RelDataType build()
.add("COLLATION_NAME", SqlTypeName.VARCHAR, true)
.add("JDBC_TYPE", SqlTypeName.BIGINT)
.build();
private static final RelDataType ROUTINES_SIGNATURE = new RowTypeBuilder()
.add("ROUTINE_CATALOG", SqlTypeName.VARCHAR)
.add("ROUTINE_SCHEMA", SqlTypeName.VARCHAR)
.add("ROUTINE_NAME", SqlTypeName.VARCHAR)
.add("ROUTINE_TYPE", SqlTypeName.VARCHAR)
.add("IS_AGGREGATOR", SqlTypeName.VARCHAR)
.add("SIGNATURES", SqlTypeName.VARCHAR, true)
.build();
private static final RelDataTypeSystem TYPE_SYSTEM = RelDataTypeSystem.DEFAULT;

private static final String INFO_TRUE = "YES";
Expand All @@ -143,14 +158,16 @@ public RelDataType build()
@Inject
public InformationSchema(
@Named(DruidCalciteSchemaModule.INCOMPLETE_SCHEMA) final DruidSchemaCatalog rootSchema,
final AuthorizerMapper authorizerMapper
final AuthorizerMapper authorizerMapper,
final DruidOperatorTable operatorTable
)
{
this.rootSchema = Preconditions.checkNotNull(rootSchema, "rootSchema");
this.tableMap = ImmutableMap.of(
SCHEMATA_TABLE, new SchemataTable(),
TABLES_TABLE, new TablesTable(),
COLUMNS_TABLE, new ColumnsTable()
COLUMNS_TABLE, new ColumnsTable(),
ROUTINES_TABLE, new RoutinesTable(operatorTable)
);
this.authorizerMapper = authorizerMapper;
}
Expand Down Expand Up @@ -469,6 +486,63 @@ public Object[] apply(final RelDataTypeField field)
}
}

static class RoutinesTable extends AbstractTable implements ScannableTable
{
private static final String FUNCTION = "FUNCTION";
private final DruidOperatorTable operatorTable;

public RoutinesTable(
DruidOperatorTable operatorTable
)
{
this.operatorTable = operatorTable;
}


@Override
public RelDataType getRowType(RelDataTypeFactory typeFactory)
{
return ROUTINES_SIGNATURE;
}

@Override
public Statistic getStatistic()
{
return Statistics.UNKNOWN;
}

@Override
public TableType getJdbcTableType()
{
return TableType.SYSTEM_TABLE;
}

@Override
public Enumerable<Object[]> scan(DataContext root)
{
final List<Object[]> rows = new ArrayList<>();
List<SqlOperator> operatorList = operatorTable.getOperatorList();

for (SqlOperator sqlOperator : operatorList) {
// For the ROUTINES table, we skip anything that's not a function syntax.
if (!DruidOperatorTable.isFunctionSyntax(sqlOperator.getSyntax())) {
continue;
}

Object[] row = new Object[]{
CATALOG_NAME, // ROUTINE_CATALOG
INFORMATION_SCHEMA_NAME, // ROUTINE_SCHEMA
sqlOperator.getName(), // ROUTINE_NAME
FUNCTION, // ROUTINE_TYPE
sqlOperator.isAggregator() ? INFO_TRUE : INFO_FALSE, // IS_AGGREGATOR
sqlOperator.getOperandTypeChecker() == null ? null : sqlOperator.getAllowedSignatures() // SIGNATURES
};
rows.add(row);
}
return Linq4j.asEnumerable(rows);
}
}

/**
* Return a view macro that may or may not be defined in a certain schema. If it's not defined, returns null.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ public void testInformationSchemaTables()
.add(new Object[]{"druid", CalciteTests.USERVISITDATASOURCE, "TABLE", "NO", "NO"})
.add(new Object[]{"druid", "wikipedia", "TABLE", "NO", "NO"})
.add(new Object[]{"INFORMATION_SCHEMA", "COLUMNS", "SYSTEM_TABLE", "NO", "NO"})
.add(new Object[]{"INFORMATION_SCHEMA", "ROUTINES", "SYSTEM_TABLE", "NO", "NO"})
.add(new Object[]{"INFORMATION_SCHEMA", "SCHEMATA", "SYSTEM_TABLE", "NO", "NO"})
.add(new Object[]{"INFORMATION_SCHEMA", "TABLES", "SYSTEM_TABLE", "NO", "NO"})
.add(new Object[]{"lookup", "lookyloo", "TABLE", "YES", "YES"})
Expand Down Expand Up @@ -262,6 +263,7 @@ public void testInformationSchemaTables()
.add(new Object[]{"druid", CalciteTests.USERVISITDATASOURCE, "TABLE", "NO", "NO"})
.add(new Object[]{"druid", "wikipedia", "TABLE", "NO", "NO"})
.add(new Object[]{"INFORMATION_SCHEMA", "COLUMNS", "SYSTEM_TABLE", "NO", "NO"})
.add(new Object[]{"INFORMATION_SCHEMA", "ROUTINES", "SYSTEM_TABLE", "NO", "NO"})
.add(new Object[]{"INFORMATION_SCHEMA", "SCHEMATA", "SYSTEM_TABLE", "NO", "NO"})
.add(new Object[]{"INFORMATION_SCHEMA", "TABLES", "SYSTEM_TABLE", "NO", "NO"})
.add(new Object[]{"lookup", "lookyloo", "TABLE", "YES", "YES"})
Expand Down Expand Up @@ -429,6 +431,51 @@ public void testAggregatorsOnInformationSchemaColumns()
);
}

@Test
public void testFilterAggregatorFunctionsOnInformationSchemaRoutines()
{
notMsqCompatible();
testQuery(
"SELECT\n"
+ " COUNT(*)\n"
+ "FROM INFORMATION_SCHEMA.ROUTINES\n"
+ "WHERE IS_AGGREGATOR = 'YES'",
ImmutableList.of(),
ImmutableList.of(
new Object[]{30L}
)
);
}

@Test
public void testFilterScalarFunctionsOnInformationSchemaRoutines()
{
notMsqCompatible();
testQuery(
"SELECT\n"
+ " COUNT(*)\n"
+ "FROM INFORMATION_SCHEMA.ROUTINES\n"
+ "WHERE IS_AGGREGATOR = 'NO'",
ImmutableList.of(),
ImmutableList.of(
new Object[]{152L}
)
);
}

@Test
public void testNonExistentSchemaOnInformationSchemaRoutines()
{
notMsqCompatible();
testQuery(
"SELECT *\n"
+ "FROM INFORMATION_SCHEMA.ROUTINES\n"
+ "WHERE ROUTINE_SCHEMA = 'boo'",
ImmutableList.of(),
ImmutableList.of()
);
}

@Test
public void testTopNLimitWrapping()
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.sql.calcite.planner;

import com.google.common.collect.ImmutableSet;
import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.SqlSyntax;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.sql.calcite.expression.DirectOperatorConversion;
import org.apache.druid.sql.calcite.expression.OperatorConversions;
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
import org.apache.druid.sql.calcite.table.RowSignatures;
import org.junit.Assert;
import org.junit.Test;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class DruidOperatorTableTest
{
@Test
public void testBuiltInOperatorTable()
{
DruidOperatorTable operatorTable = new DruidOperatorTable(ImmutableSet.of(), ImmutableSet.of());
List<SqlOperator> operatorList = operatorTable.getOperatorList();
Assert.assertNotNull(operatorList);
Assert.assertTrue("Built-in operators should be loaded by default", operatorList.size() > 0);
}

@Test
public void testIsFunctionSyntax()
{
Assert.assertTrue(DruidOperatorTable.isFunctionSyntax(SqlSyntax.FUNCTION));
Assert.assertTrue(DruidOperatorTable.isFunctionSyntax(SqlSyntax.FUNCTION_STAR));
Assert.assertTrue(DruidOperatorTable.isFunctionSyntax(SqlSyntax.FUNCTION_ID));
Assert.assertTrue(DruidOperatorTable.isFunctionSyntax(SqlSyntax.SPECIAL));
Assert.assertTrue(DruidOperatorTable.isFunctionSyntax(SqlSyntax.INTERNAL));

Assert.assertFalse(DruidOperatorTable.isFunctionSyntax(SqlSyntax.BINARY));
Assert.assertFalse(DruidOperatorTable.isFunctionSyntax(SqlSyntax.PREFIX));
Assert.assertFalse(DruidOperatorTable.isFunctionSyntax(SqlSyntax.POSTFIX));
}

@Test
public void testCustomOperatorTable()
{
final SqlOperator operator1 = OperatorConversions
.operatorBuilder("FOO")
.operandTypes(SqlTypeFamily.ANY)
.requiredOperands(0)
.returnTypeInference(
opBinding -> RowSignatures.makeComplexType(
opBinding.getTypeFactory(),
ColumnType.ofComplex("fooComplex"),
true
)
)
.functionCategory(SqlFunctionCategory.USER_DEFINED_FUNCTION)
.build();

final SqlOperator operator2 = SqlStdOperatorTable.PLUS;
final Set<SqlOperatorConversion> extractionOperators = new HashSet<>();

extractionOperators.add(new DirectOperatorConversion(operator1, "foo_fn"));
extractionOperators.add(new DirectOperatorConversion(operator2, "plus_is_not_a_fn"));

DruidOperatorTable operatorTable = new DruidOperatorTable(ImmutableSet.of(), extractionOperators);
List<SqlOperator> operatorList = operatorTable.getOperatorList();
Assert.assertNotNull(operatorList);
Assert.assertTrue("We should have at least two operators -- the ones we loaded above plus the built-in"
+ " operators that gets loaded by default", operatorList.size() > 2);

Assert.assertTrue(operatorList.contains(operator1));
Assert.assertTrue(operatorList.contains(operator2));

Assert.assertTrue(DruidOperatorTable.isFunctionSyntax(operator1.getSyntax()));
Assert.assertFalse(DruidOperatorTable.isFunctionSyntax(operator2.getSyntax()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.apache.druid.server.security.AuthorizerMapper;
import org.apache.druid.server.security.Escalator;
import org.apache.druid.sql.calcite.planner.CatalogResolver;
import org.apache.druid.sql.calcite.planner.DruidOperatorTable;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.util.CalciteTestBase;
import org.apache.druid.sql.calcite.view.ViewManager;
Expand Down Expand Up @@ -92,6 +93,8 @@ public class DruidCalciteSchemaModuleTest extends CalciteTestBase
private LookupReferencesManager lookupReferencesManager;
@Mock
private SegmentManager segmentManager;
@Mock
private DruidOperatorTable druidOperatorTable;

private DruidCalciteSchemaModule target;
private Injector injector;
Expand All @@ -112,6 +115,7 @@ public void setUp()
binder.bind(AuthorizerMapper.class).toInstance(authorizerMapper);
binder.bind(FilteredServerInventoryView.class).toInstance(serverInventoryView);
binder.bind(SegmentManager.class).toInstance(segmentManager);
binder.bind(DruidOperatorTable.class).toInstance(druidOperatorTable);
binder.bind(DruidLeaderClient.class)
.annotatedWith(Coordinator.class)
.toInstance(coordinatorDruidLeaderClient);
Expand Down
Loading

0 comments on commit b8495d4

Please sign in to comment.