Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -610,21 +610,13 @@ private Optional<ColumnStatistic> getHiveColumnStats(String colName) {
continue;
}
ColumnStatisticsData data = tableStat.getStatsData();
try {
setStatData(column, data, columnStatisticBuilder, count);
} catch (AnalysisException e) {
if (LOG.isDebugEnabled()) {
LOG.debug(e);
}
return Optional.empty();
}
setStatData(column, data, columnStatisticBuilder, count);
}

return Optional.of(columnStatisticBuilder.build());
}

private void setStatData(Column col, ColumnStatisticsData data, ColumnStatisticBuilder builder, long count)
throws AnalysisException {
private void setStatData(Column col, ColumnStatisticsData data, ColumnStatisticBuilder builder, long count) {
long ndv = 0;
long nulls = 0;
double colSize = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2695,7 +2695,14 @@ private void getStatsForAllColumns(List<Pair<Pair<String, String>, ColumnStatist
if (indexName == null) {
continue;
}
columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), ColumnStatistic.fromResultRow(row)));
try {
columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), ColumnStatistic.fromResultRow(row)));
} catch (Exception e) {
LOG.warn("Failed to deserialize column statistics. reason: [{}]. Row [{}]", e.getMessage(), row);
if (LOG.isDebugEnabled()) {
LOG.debug(e);
}
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -834,8 +834,12 @@ public void updateLocalPartitionStatsCache(long catalogId, long dbId, long table
// count, ndv, null_count, min, max, data_size, update_time]
StatisticsCache cache = Env.getCurrentEnv().getStatisticsCache();
for (ResultRow row : resultRows) {
cache.updatePartitionColStatsCache(catalogId, dbId, tableId, indexId, row.get(4), colName,
PartitionColumnStatistic.fromResultRow(row));
try {
cache.updatePartitionColStatsCache(catalogId, dbId, tableId, indexId, row.get(4), colName,
PartitionColumnStatistic.fromResultRow(row));
} catch (Exception e) {
cache.invalidatePartitionColumnStatsCache(catalogId, dbId, tableId, indexId, row.get(4), colName);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,100 +108,82 @@ public ColumnStatistic(double count, double ndv, ColumnStatistic original, doubl
}

public static ColumnStatistic fromResultRow(List<ResultRow> resultRows) {
ColumnStatistic columnStatistic = null;
try {
for (ResultRow resultRow : resultRows) {
String partId = resultRow.get(6);
if (partId == null) {
columnStatistic = fromResultRow(resultRow);
} else {
LOG.warn("Column statistics table shouldn't contain partition stats. [{}]", resultRow);
}
}
} catch (Throwable t) {
if (LOG.isDebugEnabled()) {
LOG.debug("Failed to deserialize column stats", t);
ColumnStatistic columnStatistic = ColumnStatistic.UNKNOWN;
for (ResultRow resultRow : resultRows) {
String partId = resultRow.get(6);
if (partId == null) {
columnStatistic = fromResultRow(resultRow);
} else {
LOG.warn("Column statistics table shouldn't contain partition stats. [{}]", resultRow);
}
return ColumnStatistic.UNKNOWN;
}
if (columnStatistic == null) {
return ColumnStatistic.UNKNOWN;
}
return columnStatistic;
}

// TODO: use thrift
public static ColumnStatistic fromResultRow(ResultRow row) {
try {
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder();
double count = Double.parseDouble(row.get(7));
columnStatisticBuilder.setCount(count);
double ndv = Double.parseDouble(row.getWithDefault(8, "0"));
columnStatisticBuilder.setNdv(ndv);
String nullCount = row.getWithDefault(9, "0");
columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
columnStatisticBuilder.setDataSize(Double
.parseDouble(row.getWithDefault(12, "0")));
columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0
? 0 : columnStatisticBuilder.getDataSize()
/ columnStatisticBuilder.getCount());
long catalogId = Long.parseLong(row.get(1));
long idxId = Long.parseLong(row.get(4));
long dbID = Long.parseLong(row.get(2));
long tblId = Long.parseLong(row.get(3));
String colName = row.get(5);
Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName);
if (col == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Failed to deserialize column statistics, ctlId: {} dbId: {}"
+ "tblId: {} column: {} not exists",
catalogId, dbID, tblId, colName);
}
return ColumnStatistic.UNKNOWN;
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder();
double count = Double.parseDouble(row.get(7));
columnStatisticBuilder.setCount(count);
double ndv = Double.parseDouble(row.getWithDefault(8, "0"));
columnStatisticBuilder.setNdv(ndv);
String nullCount = row.getWithDefault(9, "0");
columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
columnStatisticBuilder.setDataSize(Double
.parseDouble(row.getWithDefault(12, "0")));
columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0
? 0 : columnStatisticBuilder.getDataSize()
/ columnStatisticBuilder.getCount());
long catalogId = Long.parseLong(row.get(1));
long idxId = Long.parseLong(row.get(4));
long dbID = Long.parseLong(row.get(2));
long tblId = Long.parseLong(row.get(3));
String colName = row.get(5);
Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName);
if (col == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Failed to deserialize column statistics, ctlId: {} dbId: {}"
+ "tblId: {} column: {} not exists",
catalogId, dbID, tblId, colName);
}
String min = row.get(10);
String max = row.get(11);
if (min != null && !min.equalsIgnoreCase("NULL")) {
// Internal catalog get the min/max value using a separate SQL,
// and the value is already encoded by base64. Need to handle internal and external catalog separately.
if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && min.equalsIgnoreCase("NULL")) {
return ColumnStatistic.UNKNOWN;
}
String min = row.get(10);
String max = row.get(11);
if (min != null && !min.equalsIgnoreCase("NULL")) {
// Internal catalog get the min/max value using a separate SQL,
// and the value is already encoded by base64. Need to handle internal and external catalog separately.
if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && min.equalsIgnoreCase("NULL")) {
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
} else {
try {
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min));
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min));
} catch (AnalysisException e) {
LOG.warn("Failed to deserialize column {} min value {}.", col, min, e);
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
} else {
try {
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min));
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min));
} catch (AnalysisException e) {
LOG.warn("Failed to deserialize column {} min value {}.", col, min, e);
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
}
}
} else {
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
}
if (max != null && !max.equalsIgnoreCase("NULL")) {
if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && max.equalsIgnoreCase("NULL")) {
} else {
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
}
if (max != null && !max.equalsIgnoreCase("NULL")) {
if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && max.equalsIgnoreCase("NULL")) {
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
} else {
try {
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max));
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max));
} catch (AnalysisException e) {
LOG.warn("Failed to deserialize column {} max value {}.", col, max, e);
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
} else {
try {
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max));
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max));
} catch (AnalysisException e) {
LOG.warn("Failed to deserialize column {} max value {}.", col, max, e);
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
}
}
} else {
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
}
columnStatisticBuilder.setUpdatedTime(row.get(13));
return columnStatisticBuilder.build();
} catch (Exception e) {
LOG.warn("Failed to deserialize column statistics. reason: [{}]. Row [{}]", e.getMessage(), row);
if (LOG.isDebugEnabled()) {
LOG.debug(e);
}
return ColumnStatistic.UNKNOWN;
} else {
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
}
columnStatisticBuilder.setUpdatedTime(row.get(13));
return columnStatisticBuilder.build();
}

public static boolean isAlmostUnique(double ndv, double rowCount) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.doris.statistics;

import org.apache.doris.catalog.TableIf;
import org.apache.doris.qe.InternalQueryExecutionException;
import org.apache.doris.statistics.util.StatisticsUtil;

import org.apache.logging.log4j.LogManager;
Expand All @@ -33,28 +32,22 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<Statistic

@Override
protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) {
Optional<ColumnStatistic> columnStatistic = Optional.empty();
Optional<ColumnStatistic> columnStatistic;
try {
// Load from statistics table.
columnStatistic = loadFromStatsTable(key);
if (!columnStatistic.isPresent()) {
// Load from data source metadata
try {
TableIf table = StatisticsUtil.findTable(key.catalogId, key.dbId, key.tableId);
columnStatistic = table.getColumnStatistic(key.colName);
} catch (Exception e) {
if (LOG.isDebugEnabled()) {
LOG.debug("Exception to get column statistics by metadata. [Catalog:{}, DB:{}, Table:{}]",
key.catalogId, key.dbId, key.tableId, e);
}
}
TableIf table = StatisticsUtil.findTable(key.catalogId, key.dbId, key.tableId);
columnStatistic = table.getColumnStatistic(key.colName);
}
} catch (Throwable t) {
LOG.warn("Failed to load stats for column [Catalog:{}, DB:{}, Table:{}, Column:{}], Reason: {}",
key.catalogId, key.dbId, key.tableId, key.colName, t.getMessage());
if (LOG.isDebugEnabled()) {
LOG.debug(t);
}
return null;
}
if (columnStatistic.isPresent()) {
// For non-empty table, return UNKNOWN if we can't collect ndv value.
Expand All @@ -68,22 +61,9 @@ protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) {
}

private Optional<ColumnStatistic> loadFromStatsTable(StatisticsCacheKey key) {
List<ResultRow> columnResults;
try {
columnResults = StatisticsRepository.loadColStats(
key.catalogId, key.dbId, key.tableId, key.idxId, key.colName);
} catch (InternalQueryExecutionException e) {
LOG.info("Failed to load stats for table {} column {}. Reason:{}",
key.tableId, key.colName, e.getMessage());
return Optional.empty();
}
ColumnStatistic columnStatistics;
try {
columnStatistics = StatisticsUtil.deserializeToColumnStatistics(columnResults);
} catch (Exception e) {
LOG.warn("Exception to deserialize column statistics", e);
return Optional.empty();
}
List<ResultRow> columnResults
= StatisticsRepository.loadColStats(key.catalogId, key.dbId, key.tableId, key.idxId, key.colName);
ColumnStatistic columnStatistics = StatisticsUtil.deserializeToColumnStatistics(columnResults);
if (columnStatistics == null) {
return Optional.empty();
} else {
Expand Down
Loading