From ee3266666f8590eb27656aab3b524b76298cb546 Mon Sep 17 00:00:00 2001 From: qiaojialin <646274302@qq.com> Date: Fri, 3 Apr 2020 16:40:40 +0800 Subject: [PATCH] add docs for deletion handle in query --- docs/SystemDesign/5-DataQuery/1-DataQuery.md | 3 + .../5-DataQuery/8-ModificationHandle.md | 63 +++++++++++++++++++ .../SystemDesign/5-DataQuery/1-DataQuery.md | 3 + .../5-DataQuery/8-ModificationHandle.md | 63 +++++++++++++++++++ .../db/query/reader/series/SeriesReader.java | 4 +- .../iotdb/db/utils/FileLoaderUtils.java | 21 +++---- site/src/main/.vuepress/config.js | 2 + 7 files changed, 146 insertions(+), 13 deletions(-) create mode 100644 docs/SystemDesign/5-DataQuery/8-ModificationHandle.md create mode 100644 docs/zh/SystemDesign/5-DataQuery/8-ModificationHandle.md diff --git a/docs/SystemDesign/5-DataQuery/1-DataQuery.md b/docs/SystemDesign/5-DataQuery/1-DataQuery.md index 37222ff73d7ba..9325e4549ce1c 100644 --- a/docs/SystemDesign/5-DataQuery/1-DataQuery.md +++ b/docs/SystemDesign/5-DataQuery/1-DataQuery.md @@ -28,6 +28,7 @@ There are several types of data queries * Downsampling query * Single point supplementary null query * Latest data query +* Align by device query In order to achieve the above kinds of queries, a basic query component for a single time series is designed in the IoTDB query engine, and on this basis, various query functions are implemented. @@ -38,3 +39,5 @@ In order to achieve the above kinds of queries, a basic query component for a si * [Aggregate query](/SystemDesign/5-DataQuery/4-AggregationQuery.html) * [Downsampling query](/SystemDesign/5-DataQuery/5-GroupByQuery.html) * [Recent timestamp query](/SystemDesign/5-DataQuery/6-LastQuery.html) +* [Align by device query](/SystemDesign/5-DataQuery/7-AlignByDeviceQuery.html) +* [Modification handle](/SystemDesign/5-DataQuery/8-ModificationHandle.html) \ No newline at end of file diff --git a/docs/SystemDesign/5-DataQuery/8-ModificationHandle.md b/docs/SystemDesign/5-DataQuery/8-ModificationHandle.md new file mode 100644 index 0000000000000..116625009bd87 --- /dev/null +++ b/docs/SystemDesign/5-DataQuery/8-ModificationHandle.md @@ -0,0 +1,63 @@ + + +# Modification handling in query + +Data deletion only record a mods file for disk data, the data is not really deleted. Therefore, we need to consider the modifications in query. + +Each timeseries is treated independently in query process. For each timeseries, there are 5 levels: TsFileResource -> TimeseriesMetadata -> ChunkMetadata -> IPageReader -> BatchData + +Query resource: TsFileResource and possibly exist mods file. If a TsFile is influenced by deletion, a modification log will be recorded in its mods file. The log contains 3 parts: path, deleted time, version + +![](https://user-images.githubusercontent.com/7240743/78339324-deca5d80-75c6-11ea-8fa8-dbd94232b756.png) + +* TsFileResource -> TimeseriesMetadata + +``` +// Set the statistics in TimeseriesMetadata unusable if the timeseries contains modifications +FileLoaderUtils.loadTimeseriesMetadata() +``` + +* TimeseriesMetadata -> List\ + +``` +// For each ChunkMetadata, find the largest timestamp in all modifications whose version is larger than it. Set deleted time to ChunkMetadata. +// set the statistics in ChunkMetadata is unusable if it is affected by deletion +FileLoaderUtils.loadChunkMetadataList() +``` + +E.g., the got ChunkMetadatas are: + +![](https://user-images.githubusercontent.com/7240743/78339335-e427a800-75c6-11ea-815f-16dc5b6ebfa3.png) + +* ChunkMetadata -> List\ + +``` +// Skip the fully deleted page, set deleteAt into PageReader,Set the page statistics unusalbe if it is affected by deletion +FileLoaderUtils.loadPageReaderList() +``` + +* IPageReader -> BatchData + +``` +// For disk page, skip the data points that be deleted and filterd out. For memory data, skip data points be filtered out. +IPageReader.getAllSatisfiedPageData() +``` \ No newline at end of file diff --git a/docs/zh/SystemDesign/5-DataQuery/1-DataQuery.md b/docs/zh/SystemDesign/5-DataQuery/1-DataQuery.md index 0ace265fa8d37..47cbc19f17d1b 100644 --- a/docs/zh/SystemDesign/5-DataQuery/1-DataQuery.md +++ b/docs/zh/SystemDesign/5-DataQuery/1-DataQuery.md @@ -28,6 +28,7 @@ * 降采样查询 * 单点补空值查询 * 最新数据查询 +* 按设备对齐查询 为了实现以上几种查询,IoTDB 查询引擎中设计了针对单个时间序列的基础查询组件,在此基础上,实现了多种查询功能。 @@ -38,3 +39,5 @@ * [聚合查询](/zh/SystemDesign/5-DataQuery/4-AggregationQuery.html) * [降采样查询](/zh/SystemDesign/5-DataQuery/5-GroupByQuery.html) * [最近时间戳查询](/zh/SystemDesign/5-DataQuery/6-LastQuery.html) +* [按设备对齐查询](/zh/SystemDesign/5-DataQuery/7-AlignByDeviceQuery.html) +* [查询中的数据删改处理](/zh/SystemDesign/5-DataQuery/8-ModificationHandle.html) diff --git a/docs/zh/SystemDesign/5-DataQuery/8-ModificationHandle.md b/docs/zh/SystemDesign/5-DataQuery/8-ModificationHandle.md new file mode 100644 index 0000000000000..ce4ff7badce0a --- /dev/null +++ b/docs/zh/SystemDesign/5-DataQuery/8-ModificationHandle.md @@ -0,0 +1,63 @@ + + +# 查询中的数据修改处理 + +数据删除操作对磁盘数据只记录了 mods 文件,并未真正执行删除逻辑,因此查询时需要考虑数据删除的逻辑。 + +查询时每个时间序列会单独处理。针对一个时间序列,由大到小有 5 个层次:TsFileResource -> TimeseriesMetadata -> ChunkMetadata -> IPageReader -> BatchData + +查询资源:TsFileResource 以及可能存在的 mods 文件,如果一个文件中有数据被删除了,将删除操作记录到 mods 文件中。记录三列:删除的时间序列,删除范围的最大时间点,删除操作对应的版本。 + +![](https://user-images.githubusercontent.com/7240743/78339324-deca5d80-75c6-11ea-8fa8-dbd94232b756.png) + +* TsFileResource -> TimeseriesMetadata + +``` +// 只要这个时间序列有对应的 modification,就标记 TimeseriesMetadata 中的统计信息不可用 +FileLoaderUtils.loadTimeseriesMetadata() +``` + +* TimeseriesMetadata -> List\ + +``` +// 对于每个 ChunkMetadata,找到比其 version 大的所有 modification 中最大时间戳, 设置到 ChunkMetadata 的 deleteAt 中,并标记 统计信息不可用 +FileLoaderUtils.loadChunkMetadataList() +``` + +对于以上示例,读取到的 ChunkMetadataList 为 + +![](https://user-images.githubusercontent.com/7240743/78339335-e427a800-75c6-11ea-815f-16dc5b6ebfa3.png) + +* ChunkMetadata -> List\ + +``` +// 跳过被完全删除的 Page,将 deleteAt 设置到 PageReader 里,将数据被部分删除的 page 标记统计信息不可用 +FileLoaderUtils.loadPageReaderList() +``` + +* IPageReader -> BatchData + +``` +// 对于磁盘数据,跳过被删除的和过滤掉的,对于内存数据,跳过被过滤掉的 +IPageReader.getAllSatisfiedPageData() +``` + diff --git a/server/src/main/java/org/apache/iotdb/db/query/reader/series/SeriesReader.java b/server/src/main/java/org/apache/iotdb/db/query/reader/series/SeriesReader.java index 1586d6fcbdf87..55906153e9be9 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/reader/series/SeriesReader.java +++ b/server/src/main/java/org/apache/iotdb/db/query/reader/series/SeriesReader.java @@ -241,7 +241,7 @@ private void unpackAllOverlappedTimeSeriesMetadataToCachedChunkMetadata(long end } private void unpackOneTimeSeriesMetadata(TimeseriesMetadata timeSeriesMetadata) throws IOException { - cachedChunkMetadata.addAll(FileLoaderUtils.loadChunkMetadata(timeSeriesMetadata)); + cachedChunkMetadata.addAll(FileLoaderUtils.loadChunkMetadataList(timeSeriesMetadata)); } boolean isChunkOverlapped() throws IOException { @@ -338,7 +338,7 @@ private void unpackAllOverlappedChunkMetadataToCachedPageReaders(long endTime, b } private void unpackOneChunkMetaData(ChunkMetadata chunkMetaData) throws IOException { - FileLoaderUtils.loadPageReader(chunkMetaData, timeFilter) + FileLoaderUtils.loadPageReaderList(chunkMetaData, timeFilter) .forEach(pageReader -> cachedPageReaders.add(new VersionPageReader(chunkMetaData.getVersion(), pageReader))); } diff --git a/server/src/main/java/org/apache/iotdb/db/utils/FileLoaderUtils.java b/server/src/main/java/org/apache/iotdb/db/utils/FileLoaderUtils.java index 9f9cb133de748..8884a7de4d145 100644 --- a/server/src/main/java/org/apache/iotdb/db/utils/FileLoaderUtils.java +++ b/server/src/main/java/org/apache/iotdb/db/utils/FileLoaderUtils.java @@ -90,7 +90,7 @@ public static void updateTsFileResource(TsFileMetadata metaData, TsFileSequenceR * @param allSensors measurements queried at the same time of this device */ public static TimeseriesMetadata loadTimeSeriesMetadata(TsFileResource resource, Path seriesPath, - QueryContext context, Filter timeFilter, Set allSensors) throws IOException { + QueryContext context, Filter timeFilter, Set allSensors) throws IOException { TimeseriesMetadata timeSeriesMetadata; if (resource.isClosed()) { timeSeriesMetadata = TimeSeriesMetadataCache.getInstance() @@ -129,7 +129,8 @@ public static TimeseriesMetadata loadTimeSeriesMetadata(TsFileResource resource, * load all chunk metadata of one time series in one file. * @param timeSeriesMetadata the corresponding TimeSeriesMetadata in that file. */ - public static List loadChunkMetadata(TimeseriesMetadata timeSeriesMetadata) throws IOException { + public static List loadChunkMetadataList(TimeseriesMetadata timeSeriesMetadata) + throws IOException { return timeSeriesMetadata.loadChunkMetadataList(); } @@ -139,27 +140,25 @@ public static List loadChunkMetadata(TimeseriesMetadata timeSerie * @param chunkMetaData the corresponding chunk metadata * @param timeFilter it should be a TimeFilter instead of a ValueFilter */ - public static List loadPageReader(ChunkMetadata chunkMetaData, Filter timeFilter) throws IOException { - return initChunkReader(chunkMetaData, timeFilter).loadPageReaderList(); - } - - private static IChunkReader initChunkReader(ChunkMetadata metaData, Filter timeFilter) throws IOException { - if (metaData == null) { + public static List loadPageReaderList(ChunkMetadata chunkMetaData, Filter timeFilter) + throws IOException { + if (chunkMetaData == null) { throw new IOException("Can't init null chunkMeta"); } IChunkReader chunkReader; - IChunkLoader chunkLoader = metaData.getChunkLoader(); + IChunkLoader chunkLoader = chunkMetaData.getChunkLoader(); if (chunkLoader instanceof MemChunkLoader) { MemChunkLoader memChunkLoader = (MemChunkLoader) chunkLoader; chunkReader = new MemChunkReader(memChunkLoader.getChunk(), timeFilter); } else { - Chunk chunk = chunkLoader.loadChunk(metaData); + Chunk chunk = chunkLoader.loadChunk(chunkMetaData); chunkReader = new ChunkReader(chunk, timeFilter); chunkReader.hasNextSatisfiedPage(); } - return chunkReader; + return chunkReader.loadPageReaderList(); } + /** * load all ChunkMetadatas belong to the seriesPath */ diff --git a/site/src/main/.vuepress/config.js b/site/src/main/.vuepress/config.js index d0658ef886bfd..869536fb0ecdb 100644 --- a/site/src/main/.vuepress/config.js +++ b/site/src/main/.vuepress/config.js @@ -446,6 +446,7 @@ var config = { '5-DataQuery/5-GroupByQuery', '5-DataQuery/6-LastQuery', '5-DataQuery/7-AlignByDeviceQuery', + '5-DataQuery/8-ModificationHandle', ] }, { @@ -860,6 +861,7 @@ var config = { '5-DataQuery/5-GroupByQuery', '5-DataQuery/6-LastQuery', '5-DataQuery/7-AlignByDeviceQuery', + '5-DataQuery/8-ModificationHandle', ] }, {