Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQL: Improve correctness of SYS COLUMNS & TYPES #30418

Merged
merged 10 commits into from May 11, 2018
6 changes: 6 additions & 0 deletions docs/CHANGELOG.asciidoc
Expand Up @@ -111,6 +111,9 @@ Rollup::
* Validate timezone in range queries to ensure they match the selected job when
searching ({pull}30338[#30338])

SQL::
* Improve correctness of SYS COLUMNS & SYS TYPES commands ({pull}30418[#30418])

[float]
=== Regressions
Fail snapshot operations early when creating or deleting a snapshot on a repository that has been
Expand Down Expand Up @@ -181,6 +184,9 @@ Rollup::
* Validate timezone in range queries to ensure they match the selected job when
searching ({pull}30338[#30338])

SQL::
* Improve correctness of SYS COLUMNS & SYS TYPES commands ({pull}30418[#30418])

//[float]
//=== Regressions

Expand Down
Expand Up @@ -25,8 +25,8 @@ public enum DataType {
SHORT( JDBCType.SMALLINT, Short.class, Short.BYTES, 5, 6, true, false, true),
INTEGER( JDBCType.INTEGER, Integer.class, Integer.BYTES, 10, 11, true, false, true),
LONG( JDBCType.BIGINT, Long.class, Long.BYTES, 19, 20, true, false, true),
// 53 bits defaultPrecision ~ 16(15.95) decimal digits (53log10(2)),
DOUBLE( JDBCType.DOUBLE, Double.class, Double.BYTES, 16, 25, false, true, true),
// 53 bits defaultPrecision ~ 15(15.95) decimal digits (53log10(2)),
DOUBLE( JDBCType.DOUBLE, Double.class, Double.BYTES, 15, 25, false, true, true),
// 24 bits defaultPrecision - 24*log10(2) =~ 7 (7.22)
FLOAT( JDBCType.REAL, Float.class, Float.BYTES, 7, 15, false, true, true),
HALF_FLOAT( JDBCType.FLOAT, Double.class, Double.BYTES, 16, 25, false, true, true),
Expand All @@ -37,7 +37,10 @@ public enum DataType {
OBJECT( JDBCType.STRUCT, null, -1, 0, 0),
NESTED( JDBCType.STRUCT, null, -1, 0, 0),
BINARY( JDBCType.VARBINARY, byte[].class, -1, Integer.MAX_VALUE, 0),
DATE( JDBCType.TIMESTAMP, Timestamp.class, Long.BYTES, 19, 20);
// since ODBC and JDBC interpret precision for Date as display size,
// the precision is 23 (number of chars in ISO8601 with millis) + Z (the UTC timezone)
// see https://github.com/elastic/elasticsearch/issues/30386#issuecomment-386807288
DATE( JDBCType.TIMESTAMP, Timestamp.class, Long.BYTES, 24, 24);
// @formatter:on

private static final Map<JDBCType, DataType> jdbcToEs;
Expand Down Expand Up @@ -75,7 +78,7 @@ public enum DataType {
* <p>
* Specified column size. For numeric data, this is the maximum precision. For character
* data, this is the length in characters. For datetime datatypes, this is the length in characters of the
* String representation (assuming the maximum allowed defaultPrecision of the fractional seconds component).
* String representation (assuming the maximum allowed defaultPrecision of the fractional milliseconds component).
*/
public final int defaultPrecision;

Expand Down
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.DataTypes;
import org.elasticsearch.xpack.sql.type.EsField;

import java.sql.DatabaseMetaData;
Expand All @@ -29,7 +30,6 @@

import static java.util.Arrays.asList;
import static org.elasticsearch.xpack.sql.type.DataType.INTEGER;
import static org.elasticsearch.xpack.sql.type.DataType.NULL;
import static org.elasticsearch.xpack.sql.type.DataType.SHORT;

/**
Expand Down Expand Up @@ -133,21 +133,17 @@ static void fillInRows(String clusterName, String indexName, Map<String, EsField
type.size,
// no DECIMAL support
null,
// RADIX - Determines how numbers returned by COLUMN_SIZE and DECIMAL_DIGITS should be interpreted.
// 10 means they represent the number of decimal digits allowed for the column.
// 2 means they represent the number of bits allowed for the column.
// null means radix is not applicable for the given type.
type.isInteger ? Integer.valueOf(10) : type.isRational ? Integer.valueOf(2) : null,
DataTypes.metaSqlRadix(type),
// everything is nullable
DatabaseMetaData.columnNullable,
// no remarks
null,
// no column def
null,
// SQL_DATA_TYPE apparently needs to be same as DATA_TYPE except for datetime and interval data types
type.jdbcType.getVendorTypeNumber(),
DataTypes.metaSqlDataType(type),
// SQL_DATETIME_SUB ?
null,
DataTypes.metaSqlDateTimeSub(type),
// char octet length
type.isString() || type == DataType.BINARY ? type.size : null,
// position
Expand Down
Expand Up @@ -14,6 +14,7 @@
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.DataTypes;

import java.sql.DatabaseMetaData;
import java.util.Comparator;
Expand Down Expand Up @@ -70,6 +71,7 @@ public final void execute(SqlSession session, ActionListener<SchemaRowSet> liste
.sorted(Comparator.comparing(t -> t.jdbcType))
.map(t -> asList(t.esType.toUpperCase(Locale.ROOT),
t.jdbcType.getVendorTypeNumber(),
//https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/column-size?view=sql-server-2017
t.defaultPrecision,
"'",
"'",
Expand All @@ -83,16 +85,17 @@ public final void execute(SqlSession session, ActionListener<SchemaRowSet> liste
// only numerics are signed
!t.isSigned(),
//no fixed precision scale SQL_FALSE
false,
null,
null,
null,
Boolean.FALSE,
// not auto-incremented
Boolean.FALSE,
null,
DataTypes.metaSqlMinimumScale(t),
DataTypes.metaSqlMaximumScale(t),
// SQL_DATA_TYPE - ODBC wants this to be not null
0,
null,
DataTypes.metaSqlDataType(t),
DataTypes.metaSqlDateTimeSub(t),
// Radix
t.isInteger ? Integer.valueOf(10) : (t.isRational ? Integer.valueOf(2) : null),
DataTypes.metaSqlRadix(t),
null
))
.collect(toList());
Expand Down
Expand Up @@ -51,4 +51,71 @@ public static DataType fromJava(Object value) {
}
throw new SqlIllegalArgumentException("No idea what's the DataType for {}", value.getClass());
}
}

//
// Metadata methods, mainly for ODBC.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bpintea Can you please double check whether the metadata methods are correct?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, with the date fix, it's all correct.

// As these are fairly obscure and limited in use, there is no point to promote them as a full type methods
// hence why they appear here as utility methods.
//

// https://docs.microsoft.com/en-us/sql/relational-databases/native-client-odbc-date-time/metadata-catalog
// https://github.com/elastic/elasticsearch/issues/30386
public static Integer metaSqlDataType(DataType t) {
if (t == DataType.DATE) {
// ODBC SQL_DATETME
return Integer.valueOf(9);
}
// this is safe since the vendor SQL types are short despite the return value
return t.jdbcType.getVendorTypeNumber();
}

// https://github.com/elastic/elasticsearch/issues/30386
// https://docs.microsoft.com/en-us/sql/odbc/reference/syntax/sqlgettypeinfo-function?view=sql-server-2017
public static Integer metaSqlDateTimeSub(DataType t) {
if (t == DataType.DATE) {
// ODBC SQL_CODE_TIMESTAMP
return Integer.valueOf(3);
}
// ODBC null
return 0;
}

// https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/decimal-digits?view=sql-server-2017
public static Short metaSqlMinimumScale(DataType t) {
// TODO: return info for HALF/SCALED_FLOATS (should be based on field not type)
if (t == DataType.DATE) {
return Short.valueOf((short) 3);
}
if (t.isInteger) {
return Short.valueOf((short) 0);
}
// minimum scale?
if (t.isRational) {
return Short.valueOf((short) 0);
}
return null;
}

public static Short metaSqlMaximumScale(DataType t) {
// TODO: return info for HALF/SCALED_FLOATS (should be based on field not type)
if (t == DataType.DATE) {
return Short.valueOf((short) 3);
}
if (t.isInteger) {
return Short.valueOf((short) 0);
}
if (t.isRational) {
return Short.valueOf((short) t.defaultPrecision);
}
return null;
}

// https://docs.microsoft.com/en-us/sql/odbc/reference/syntax/sqlgettypeinfo-function?view=sql-server-2017
public static Integer metaSqlRadix(DataType t) {
// RADIX - Determines how numbers returned by COLUMN_SIZE and DECIMAL_DIGITS should be interpreted.
// 10 means they represent the number of decimal digits allowed for the column.
// 2 means they represent the number of bits allowed for the column.
// null means radix is not applicable for the given type.
return t.isInteger ? Integer.valueOf(10) : (t.isRational ? Integer.valueOf(2) : null);
}
}
Expand Up @@ -25,13 +25,6 @@ public DateEsField(String name, Map<String, EsField> properties, boolean hasDocV
this.formats = CollectionUtils.isEmpty(formats) ? DEFAULT_FORMAT : Arrays.asList(formats);
}

@Override
public int getPrecision() {
// same as Long
// TODO: based this on format string
return 19;
}

public List<String> getFormats() {
return formats;
}
Expand Down
Expand Up @@ -38,6 +38,13 @@ public void testSysColumns() {
assertEquals(null, radix(row));
assertEquals(Integer.MAX_VALUE, bufferLength(row));

row = rows.get(4);
assertEquals("date", name(row));
assertEquals(Types.TIMESTAMP, sqlType(row));
assertEquals(null, radix(row));
assertEquals(24, precision(row));
assertEquals(8, bufferLength(row));

row = rows.get(7);
assertEquals("some.dotted", name(row));
assertEquals(Types.STRUCT, sqlType(row));
Expand All @@ -59,6 +66,10 @@ private static Object sqlType(List<?> list) {
return list.get(4);
}

private static Object precision(List<?> list) {
return list.get(6);
}

private static Object bufferLength(List<?> list) {
return list.get(7);
}
Expand Down
Expand Up @@ -68,6 +68,8 @@ public void testSysTypes() throws Exception {
assertFalse(r.column(9, Boolean.class));
// make sure precision is returned as boolean (not int)
assertFalse(r.column(10, Boolean.class));
// no auto-increment
assertFalse(r.column(11, Boolean.class));

for (int i = 0; i < r.size(); i++) {
assertEquals(names.get(i), r.column(0));
Expand Down
@@ -0,0 +1,58 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.type;

import org.elasticsearch.test.ESTestCase;

import static org.elasticsearch.xpack.sql.type.DataType.DATE;
import static org.elasticsearch.xpack.sql.type.DataType.FLOAT;
import static org.elasticsearch.xpack.sql.type.DataType.KEYWORD;
import static org.elasticsearch.xpack.sql.type.DataType.LONG;
import static org.elasticsearch.xpack.sql.type.DataTypes.metaSqlDataType;
import static org.elasticsearch.xpack.sql.type.DataTypes.metaSqlDateTimeSub;
import static org.elasticsearch.xpack.sql.type.DataTypes.metaSqlMaximumScale;
import static org.elasticsearch.xpack.sql.type.DataTypes.metaSqlMinimumScale;
import static org.elasticsearch.xpack.sql.type.DataTypes.metaSqlRadix;

public class DataTypesTests extends ESTestCase {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bpintea This test might be easier to read with regards to the expected behavior.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tests are inline with my understanding as well.


public void testMetaDataType() {
assertEquals(Integer.valueOf(9), metaSqlDataType(DATE));
DataType t = randomDataTypeNoDate();
assertEquals(t.jdbcType.getVendorTypeNumber(), metaSqlDataType(t));
}

public void testMetaDateTypeSub() {
assertEquals(Integer.valueOf(3), metaSqlDateTimeSub(DATE));
assertEquals(Integer.valueOf(0), metaSqlDateTimeSub(randomDataTypeNoDate()));
}

public void testMetaMinimumScale() {
assertEquals(Short.valueOf((short) 3), metaSqlMinimumScale(DATE));
assertEquals(Short.valueOf((short) 0), metaSqlMinimumScale(LONG));
assertEquals(Short.valueOf((short) 0), metaSqlMinimumScale(FLOAT));
assertNull(metaSqlMinimumScale(KEYWORD));
}

public void testMetaMaximumScale() {
assertEquals(Short.valueOf((short) 3), metaSqlMaximumScale(DATE));
assertEquals(Short.valueOf((short) 0), metaSqlMaximumScale(LONG));
assertEquals(Short.valueOf((short) FLOAT.defaultPrecision), metaSqlMaximumScale(FLOAT));
assertNull(metaSqlMaximumScale(KEYWORD));
}

public void testMetaRadix() {
assertNull(metaSqlRadix(DATE));
assertNull(metaSqlRadix(KEYWORD));
assertEquals(Integer.valueOf(10), metaSqlRadix(LONG));
assertEquals(Integer.valueOf(2), metaSqlRadix(FLOAT));
}

private DataType randomDataTypeNoDate() {
return randomValueOtherThan(DataType.DATE, () -> randomFrom(DataType.values()));
}
}

Expand Up @@ -82,7 +82,7 @@ public void testDateField() {
EsField field = mapping.get("date");
assertThat(field.getDataType(), is(DATE));
assertThat(field.hasDocValues(), is(true));
assertThat(field.getPrecision(), is(19));
assertThat(field.getPrecision(), is(24));

DateEsField dfield = (DateEsField) field;
List<String> formats = dfield.getFormats();
Expand Down