Skip to content

Commit

Permalink
DRILL-7271: Refactor Metadata interfaces and classes to contain all n…
Browse files Browse the repository at this point in the history
…eeded information for the File based Metastore
  • Loading branch information
vvysotskyi committed Jun 25, 2019
1 parent 205e028 commit dc865e8
Show file tree
Hide file tree
Showing 102 changed files with 2,949 additions and 1,885 deletions.
Expand Up @@ -23,7 +23,7 @@
import com.fasterxml.jackson.annotation.JsonTypeName;
import org.apache.drill.exec.record.metadata.TupleSchema;
import org.apache.drill.exec.store.parquet.ParquetReaderConfig;
import org.apache.drill.metastore.LocationProvider;
import org.apache.drill.metastore.metadata.LocationProvider;
import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.expression.LogicalExpression;
Expand Down Expand Up @@ -211,14 +211,14 @@ protected boolean supportsFileImplicitColumns() {

@Override
protected List<String> getPartitionValues(LocationProvider locationProvider) {
return hivePartitionHolder.get(locationProvider.getLocation());
return hivePartitionHolder.get(locationProvider.getPath());
}

/**
* Implementation of RowGroupScanFilterer which uses {@link HiveDrillNativeParquetScanFilterer} as source and
* builds {@link HiveDrillNativeParquetScanFilterer} instance with filtered metadata.
*/
private class HiveDrillNativeParquetScanFilterer extends RowGroupScanFilterer {
private class HiveDrillNativeParquetScanFilterer extends RowGroupScanFilterer<HiveDrillNativeParquetScanFilterer> {

public HiveDrillNativeParquetScanFilterer(HiveDrillNativeParquetScan source) {
super(source);
Expand All @@ -228,5 +228,10 @@ public HiveDrillNativeParquetScanFilterer(HiveDrillNativeParquetScan source) {
protected AbstractParquetGroupScan getNewScan() {
return new HiveDrillNativeParquetScan((HiveDrillNativeParquetScan) source);
}

@Override
protected HiveDrillNativeParquetScanFilterer self() {
return this;
}
}
}
5 changes: 0 additions & 5 deletions exec/java-exec/pom.xml
Expand Up @@ -291,11 +291,6 @@
<artifactId>drill-metastore-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.drill.metastore</groupId>
<artifactId>drill-file-metastore-plugin</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.beust</groupId>
<artifactId>jcommander</artifactId>
Expand Down
Expand Up @@ -23,8 +23,8 @@
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.expr.fn.FunctionGenerationHelper;
import org.apache.drill.exec.expr.stat.RowsMatch;
import org.apache.drill.metastore.ColumnStatistics;
import org.apache.drill.metastore.ColumnStatisticsKind;
import org.apache.drill.metastore.statistics.ColumnStatistics;
import org.apache.drill.metastore.statistics.ColumnStatisticsKind;

import java.math.BigDecimal;
import java.math.BigInteger;
Expand Down Expand Up @@ -86,12 +86,13 @@ public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V valu
* where Column1 and Column2 are from same parquet table.
*/
@Override
@SuppressWarnings("unchecked")
public RowsMatch matches(StatisticsProvider<C> evaluator) {
ColumnStatistics<C> leftStat = left.accept(evaluator, null);
ColumnStatistics leftStat = left.accept(evaluator, null);
if (IsPredicate.isNullOrEmpty(leftStat)) {
return RowsMatch.SOME;
}
ColumnStatistics<C> rightStat = right.accept(evaluator, null);
ColumnStatistics rightStat = right.accept(evaluator, null);
if (IsPredicate.isNullOrEmpty(rightStat)) {
return RowsMatch.SOME;
}
Expand Down Expand Up @@ -126,14 +127,13 @@ public RowsMatch matches(StatisticsProvider<C> evaluator) {
* @param scale adjustment scale
* @return adjusted statistics
*/
@SuppressWarnings("unchecked")
private ColumnStatistics<C> adjustDecimalStatistics(ColumnStatistics<C> statistics, int scale) {
BigInteger min = new BigDecimal((BigInteger) statistics.getStatistic(ColumnStatisticsKind.MIN_VALUE))
private ColumnStatistics adjustDecimalStatistics(ColumnStatistics<BigInteger> statistics, int scale) {
BigInteger min = new BigDecimal(ColumnStatisticsKind.MIN_VALUE.getValueStatistic(statistics))
.setScale(scale, RoundingMode.HALF_UP).unscaledValue();
BigInteger max = new BigDecimal((BigInteger) statistics.getStatistic(ColumnStatisticsKind.MAX_VALUE))
BigInteger max = new BigDecimal(ColumnStatisticsKind.MAX_VALUE.getValueStatistic(statistics))
.setScale(scale, RoundingMode.HALF_UP).unscaledValue();

return new StatisticsProvider.MinMaxStatistics(min, max, Comparator.nullsFirst(Comparator.naturalOrder()));
return StatisticsProvider.getColumnStatistics(min, max, ColumnStatisticsKind.NULLS_COUNT.getFrom(statistics), TypeProtos.MinorType.VARDECIMAL);
}

/**
Expand Down Expand Up @@ -188,11 +188,11 @@ private static <C extends Comparable<C>> LogicalExpression createGTPredicate(
}

static <C> C getMaxValue(ColumnStatistics<C> leftStat) {
return leftStat.getValueStatistic(ColumnStatisticsKind.MAX_VALUE);
return ColumnStatisticsKind.MAX_VALUE.getValueStatistic(leftStat);
}

static <C> C getMinValue(ColumnStatistics<C> leftStat) {
return leftStat.getValueStatistic(ColumnStatisticsKind.MIN_VALUE);
return ColumnStatisticsKind.MIN_VALUE.getValueStatistic(leftStat);
}

/**
Expand Down
Expand Up @@ -27,7 +27,7 @@
import java.util.Queue;

import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.record.metadata.SchemaPathUtils;
import org.apache.drill.metastore.util.SchemaPathUtils;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
import org.apache.drill.shaded.guava.com.google.common.collect.Maps;
Expand Down
Expand Up @@ -18,9 +18,9 @@
package org.apache.drill.exec.expr;

import org.apache.drill.exec.expr.stat.RowsMatch;
import org.apache.drill.exec.physical.impl.statistics.Statistic;
import org.apache.drill.metastore.ColumnStatistics;
import org.apache.drill.metastore.ColumnStatisticsKind;
import org.apache.drill.metastore.statistics.Statistic;
import org.apache.drill.metastore.statistics.ColumnStatistics;
import org.apache.drill.metastore.statistics.ColumnStatisticsKind;
import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.LogicalExpressionBase;
Expand Down Expand Up @@ -73,10 +73,10 @@ public RowsMatch matches(StatisticsProvider<C> evaluator) {
*/
static boolean isNullOrEmpty(ColumnStatistics stat) {
return stat == null
|| !stat.containsStatistic(ColumnStatisticsKind.MIN_VALUE)
|| !stat.containsStatistic(ColumnStatisticsKind.MAX_VALUE)
|| !stat.containsStatistic(ColumnStatisticsKind.NULLS_COUNT)
|| (long) stat.getStatistic(ColumnStatisticsKind.NULLS_COUNT) == Statistic.NO_COLUMN_STATS;
|| !stat.contains(ColumnStatisticsKind.MIN_VALUE)
|| !stat.contains(ColumnStatisticsKind.MAX_VALUE)
|| !stat.contains(ColumnStatisticsKind.NULLS_COUNT)
|| ColumnStatisticsKind.NULLS_COUNT.getFrom(stat) == Statistic.NO_COLUMN_STATS;
}

/**
Expand All @@ -96,7 +96,7 @@ private static RowsMatch checkNull(ColumnStatistics exprStat) {
* @return <tt>true</tt> if the statistics does not have nulls and <tt>false</tt> otherwise
*/
static boolean hasNoNulls(ColumnStatistics stat) {
return (long) stat.getStatistic(ColumnStatisticsKind.NULLS_COUNT) == 0;
return ColumnStatisticsKind.NULLS_COUNT.getFrom(stat) == 0;
}

/**
Expand Down Expand Up @@ -129,13 +129,13 @@ private static <C extends Comparable<C>> LogicalExpression createIsNullPredicate
*/
static boolean isAllNulls(ColumnStatistics stat, long rowCount) {
Preconditions.checkArgument(rowCount >= 0, String.format("negative rowCount %d is not valid", rowCount));
return (long) stat.getStatistic(ColumnStatisticsKind.NULLS_COUNT) == rowCount;
return ColumnStatisticsKind.NULLS_COUNT.getFrom(stat) == rowCount;
}

static boolean hasNonNullValues(ColumnStatistics stat, long rowCount) {
return rowCount > (long) stat.getStatistic(ColumnStatisticsKind.NULLS_COUNT)
&& stat.getValueStatistic(ColumnStatisticsKind.MIN_VALUE) != null
&& stat.getValueStatistic(ColumnStatisticsKind.MAX_VALUE) != null;
static <T> boolean hasNonNullValues(ColumnStatistics<T> stat, long rowCount) {
return rowCount > ColumnStatisticsKind.NULLS_COUNT.getFrom(stat)
&& ColumnStatisticsKind.MIN_VALUE.getValueStatistic(stat) != null
&& ColumnStatisticsKind.MAX_VALUE.getValueStatistic(stat) != null;
}

/**
Expand All @@ -158,10 +158,10 @@ private static LogicalExpression createIsTruePredicate(LogicalExpression expr) {
if (!hasNonNullValues(exprStat, evaluator.getRowCount())) {
return RowsMatch.SOME;
}
if (!exprStat.getValueStatistic(ColumnStatisticsKind.MAX_VALUE)) {
if (!ColumnStatisticsKind.MAX_VALUE.getValueStatistic(exprStat)) {
return RowsMatch.NONE;
}
return exprStat.getValueStatistic(ColumnStatisticsKind.MIN_VALUE) ? checkNull(exprStat) : RowsMatch.SOME;
return ColumnStatisticsKind.MIN_VALUE.getValueStatistic(exprStat) ? checkNull(exprStat) : RowsMatch.SOME;
});
}

Expand All @@ -176,10 +176,10 @@ private static LogicalExpression createIsFalsePredicate(LogicalExpression expr)
if (!hasNonNullValues(exprStat, evaluator.getRowCount())) {
return RowsMatch.SOME;
}
if (exprStat.getValueStatistic(ColumnStatisticsKind.MIN_VALUE)) {
if (ColumnStatisticsKind.MIN_VALUE.getValueStatistic(exprStat)) {
return RowsMatch.NONE;
}
return exprStat.getValueStatistic(ColumnStatisticsKind.MAX_VALUE) ? RowsMatch.SOME : checkNull(exprStat);
return ColumnStatisticsKind.MAX_VALUE.getValueStatistic(exprStat) ? RowsMatch.SOME : checkNull(exprStat);
});
}

Expand All @@ -194,10 +194,10 @@ private static LogicalExpression createIsNotTruePredicate(LogicalExpression expr
if (!hasNonNullValues(exprStat, evaluator.getRowCount())) {
return RowsMatch.SOME;
}
if (exprStat.getValueStatistic(ColumnStatisticsKind.MIN_VALUE)) {
if (ColumnStatisticsKind.MIN_VALUE.getValueStatistic(exprStat)) {
return hasNoNulls(exprStat) ? RowsMatch.NONE : RowsMatch.SOME;
}
return exprStat.getValueStatistic(ColumnStatisticsKind.MAX_VALUE) ? RowsMatch.SOME : RowsMatch.ALL;
return ColumnStatisticsKind.MAX_VALUE.getValueStatistic(exprStat) ? RowsMatch.SOME : RowsMatch.ALL;
});
}

Expand All @@ -212,10 +212,10 @@ private static LogicalExpression createIsNotFalsePredicate(LogicalExpression exp
if (!hasNonNullValues(exprStat, evaluator.getRowCount())) {
return RowsMatch.SOME;
}
if (!exprStat.getValueStatistic(ColumnStatisticsKind.MAX_VALUE)) {
if (!ColumnStatisticsKind.MAX_VALUE.getValueStatistic(exprStat)) {
return hasNoNulls(exprStat) ? RowsMatch.NONE : RowsMatch.SOME;
}
return exprStat.getValueStatistic(ColumnStatisticsKind.MIN_VALUE) ? RowsMatch.ALL : RowsMatch.SOME;
return ColumnStatisticsKind.MIN_VALUE.getValueStatistic(exprStat) ? RowsMatch.ALL : RowsMatch.SOME;
});
}

Expand Down

0 comments on commit dc865e8

Please sign in to comment.