-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[enhancement](cloud) add CloudTabletStatMgr to capture stats in cloud mode #31818
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
273 changes: 273 additions & 0 deletions
273
fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,273 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| package org.apache.doris.catalog; | ||
|
|
||
| import org.apache.doris.catalog.MaterializedIndex.IndexExtState; | ||
| import org.apache.doris.catalog.TableIf.TableType; | ||
| import org.apache.doris.cloud.proto.Cloud.GetTabletStatsRequest; | ||
| import org.apache.doris.cloud.proto.Cloud.GetTabletStatsResponse; | ||
| import org.apache.doris.cloud.proto.Cloud.MetaServiceCode; | ||
| import org.apache.doris.cloud.proto.Cloud.TabletIndexPB; | ||
| import org.apache.doris.cloud.proto.Cloud.TabletStatsPB; | ||
| import org.apache.doris.cloud.rpc.MetaServiceProxy; | ||
| import org.apache.doris.common.Config; | ||
| import org.apache.doris.common.Pair; | ||
| import org.apache.doris.common.util.MasterDaemon; | ||
| import org.apache.doris.rpc.RpcException; | ||
|
|
||
| import lombok.Getter; | ||
| import org.apache.logging.log4j.LogManager; | ||
| import org.apache.logging.log4j.Logger; | ||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.List; | ||
| import java.util.concurrent.ConcurrentHashMap; | ||
| import java.util.concurrent.ForkJoinPool; | ||
|
|
||
| /* | ||
| * CloudTabletStatMgr is for collecting tablet(replica) statistics from backends. | ||
| * Each FE will collect by itself. | ||
| */ | ||
| public class CloudTabletStatMgr extends MasterDaemon { | ||
| private static final Logger LOG = LogManager.getLogger(CloudTabletStatMgr.class); | ||
|
|
||
| private ForkJoinPool taskPool = new ForkJoinPool(Runtime.getRuntime().availableProcessors()); | ||
|
|
||
| // <(dbId, tableId) -> CloudTableStats> | ||
| private ConcurrentHashMap<Pair<Long, Long>, CloudTableStats> cloudTableStatsMap = new ConcurrentHashMap<>(); | ||
|
|
||
| public CloudTabletStatMgr() { | ||
| super("cloud tablet stat mgr", Config.tablet_stat_update_interval_second * 1000); | ||
| } | ||
|
|
||
| @Override | ||
| protected void runAfterCatalogReady() { | ||
| LOG.info("cloud tablet stat begin"); | ||
| long start = System.currentTimeMillis(); | ||
|
|
||
| List<GetTabletStatsRequest> reqList = new ArrayList<GetTabletStatsRequest>(); | ||
| GetTabletStatsRequest.Builder builder = GetTabletStatsRequest.newBuilder(); | ||
| List<Long> dbIds = Env.getCurrentInternalCatalog().getDbIds(); | ||
| for (Long dbId : dbIds) { | ||
| Database db = Env.getCurrentInternalCatalog().getDbNullable(dbId); | ||
| if (db == null) { | ||
| continue; | ||
| } | ||
|
|
||
| List<Table> tableList = db.getTables(); | ||
| for (Table table : tableList) { | ||
| if (table.getType() != TableType.OLAP) { | ||
| continue; | ||
| } | ||
|
|
||
| table.readLock(); | ||
| try { | ||
| OlapTable tbl = (OlapTable) table; | ||
| for (Partition partition : tbl.getAllPartitions()) { | ||
| for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { | ||
| for (Long tabletId : index.getTabletIdsInOrder()) { | ||
| Tablet tablet = index.getTablet(tabletId); | ||
| TabletIndexPB.Builder tabletBuilder = TabletIndexPB.newBuilder(); | ||
| tabletBuilder.setDbId(dbId); | ||
| tabletBuilder.setTableId(table.getId()); | ||
| tabletBuilder.setIndexId(index.getId()); | ||
| tabletBuilder.setPartitionId(partition.getId()); | ||
| tabletBuilder.setTabletId(tablet.getId()); | ||
| builder.addTabletIdx(tabletBuilder); | ||
|
|
||
| if (builder.getTabletIdxCount() >= Config.get_tablet_stat_batch_size) { | ||
| reqList.add(builder.build()); | ||
| builder = GetTabletStatsRequest.newBuilder(); | ||
| } | ||
| } | ||
| } | ||
| } // partitions | ||
| } finally { | ||
| table.readUnlock(); | ||
| } | ||
| } // tables | ||
| } // end for dbs | ||
|
|
||
| if (builder.getTabletIdxCount() > 0) { | ||
| reqList.add(builder.build()); | ||
| } | ||
|
|
||
| for (GetTabletStatsRequest req : reqList) { | ||
| GetTabletStatsResponse resp; | ||
| try { | ||
| resp = getTabletStats(req); | ||
| } catch (RpcException e) { | ||
| LOG.info("get tablet stats exception:", e); | ||
| continue; | ||
| } | ||
|
|
||
| if (resp.getStatus().getCode() != MetaServiceCode.OK) { | ||
| continue; | ||
| } | ||
|
|
||
| if (LOG.isDebugEnabled()) { | ||
| int i = 0; | ||
| for (TabletIndexPB idx : req.getTabletIdxList()) { | ||
| LOG.debug("db_id: {} table_id: {} index_id: {} tablet_id: {} size: {}", | ||
| idx.getDbId(), idx.getTableId(), idx.getIndexId(), idx.getTabletId(), | ||
| resp.getTabletStats(i++).getDataSize()); | ||
| } | ||
| } | ||
| updateTabletStat(resp); | ||
| } | ||
|
|
||
| LOG.info("finished to get tablet stat of all backends. cost: {} ms", | ||
| (System.currentTimeMillis() - start)); | ||
|
|
||
| // after update replica in all backends, update index row num | ||
| start = System.currentTimeMillis(); | ||
| ConcurrentHashMap<Pair<Long, Long>, CloudTableStats> newCloudTableStatsMap = new ConcurrentHashMap<>(); | ||
| for (Long dbId : dbIds) { | ||
| Database db = Env.getCurrentInternalCatalog().getDbNullable(dbId); | ||
| if (db == null) { | ||
| continue; | ||
| } | ||
|
|
||
| List<Table> tableList = db.getTables(); | ||
| for (Table table : tableList) { | ||
| if (table.getType() != TableType.OLAP) { | ||
| continue; | ||
| } | ||
| OlapTable olapTable = (OlapTable) table; | ||
|
|
||
| String dbName = db.getName(); | ||
| Long tableId = table.getId(); | ||
| String tableName = table.getName(); | ||
|
|
||
| Long tableDataSize = 0L; | ||
| Long tableRowsetCount = 0L; | ||
| Long tableSegmentCount = 0L; | ||
| Long tableRowCount = 0L; | ||
|
|
||
| if (!table.writeLockIfExist()) { | ||
| continue; | ||
| } | ||
|
|
||
| try { | ||
| for (Partition partition : olapTable.getAllPartitions()) { | ||
| for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { | ||
| long indexRowCount = 0L; | ||
| for (Tablet tablet : index.getTablets()) { | ||
| long tabletDataSize = 0L; | ||
| long tabletRowsetCount = 0L; | ||
| long tabletSegmentCount = 0L; | ||
| long tabletRowCount = 0L; | ||
|
|
||
| for (Replica replica : tablet.getReplicas()) { | ||
| if (replica.getDataSize() > tabletDataSize) { | ||
| tabletDataSize = replica.getDataSize(); | ||
| } | ||
|
|
||
| if (replica.getRowsetCount() > tabletRowsetCount) { | ||
| tabletRowsetCount = replica.getRowsetCount(); | ||
| } | ||
|
|
||
| if (replica.getSegmentCount() > tabletSegmentCount) { | ||
| tabletSegmentCount = replica.getSegmentCount(); | ||
| } | ||
|
|
||
| if (replica.getRowCount() > tabletRowCount) { | ||
| tabletRowCount = replica.getRowCount(); | ||
| } | ||
| } | ||
|
|
||
| tableDataSize += tabletDataSize; | ||
| tableRowsetCount += tabletRowsetCount; | ||
| tableSegmentCount += tabletSegmentCount; | ||
| tableRowCount += tabletRowCount; | ||
|
|
||
| indexRowCount += tabletRowCount; | ||
| } // end for tablets | ||
| index.setRowCount(indexRowCount); | ||
| } // end for indices | ||
| } // end for partitions | ||
| LOG.debug("finished to set row num for table: {} in database: {}", | ||
| table.getName(), db.getFullName()); | ||
| } finally { | ||
| table.writeUnlock(); | ||
| } | ||
|
|
||
| newCloudTableStatsMap.put(Pair.of(dbId, tableId), new CloudTableStats(dbName, tableName, | ||
| tableDataSize, tableRowsetCount, tableSegmentCount, tableRowCount)); | ||
| } | ||
| } | ||
| this.cloudTableStatsMap = newCloudTableStatsMap; | ||
| LOG.info("finished to update index row num of all databases. cost: {} ms", | ||
| (System.currentTimeMillis() - start)); | ||
| } | ||
|
|
||
| private void updateTabletStat(GetTabletStatsResponse response) { | ||
| TabletInvertedIndex invertedIndex = Env.getCurrentInvertedIndex(); | ||
| for (TabletStatsPB stat : response.getTabletStatsList()) { | ||
| if (invertedIndex.getTabletMeta(stat.getIdx().getTabletId()) != null) { | ||
| List<Replica> replicas = invertedIndex.getReplicasByTabletId(stat.getIdx().getTabletId()); | ||
| if (replicas != null && !replicas.isEmpty() && replicas.get(0) != null) { | ||
| replicas.get(0).updateCloudStat(stat.getDataSize(), stat.getNumRowsets(), | ||
| stat.getNumSegments(), stat.getNumRows()); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private GetTabletStatsResponse getTabletStats(GetTabletStatsRequest request) | ||
| throws RpcException { | ||
| GetTabletStatsResponse response; | ||
| try { | ||
| response = MetaServiceProxy.getInstance().getTabletStats(request); | ||
| } catch (RpcException e) { | ||
| LOG.info("get tablet stat get exception:", e); | ||
| throw e; | ||
| } | ||
| return response; | ||
| } | ||
|
|
||
| public ConcurrentHashMap<Pair<Long, Long>, CloudTableStats> getCloudTableStatsMap() { | ||
| return this.cloudTableStatsMap; | ||
| } | ||
|
|
||
| public static class CloudTableStats { | ||
| @Getter | ||
| private String dbName; | ||
| @Getter | ||
| private String tableName; | ||
|
|
||
| @Getter | ||
| private Long tableDataSize; | ||
| @Getter | ||
| private Long tableRowsetCount; | ||
| @Getter | ||
| private Long tableSegmentCount; | ||
| @Getter | ||
| private Long tableRowCount; | ||
|
|
||
| public CloudTableStats(String dbName, String tableName, Long tableDataSize, Long tableRowsetCount, | ||
| Long tableSegmentCount, Long tableRowCount) { | ||
| this.dbName = dbName; | ||
| this.tableName = tableName; | ||
| this.tableDataSize = tableDataSize; | ||
| this.tableRowsetCount = tableRowsetCount; | ||
| this.tableSegmentCount = tableSegmentCount; | ||
| this.tableRowCount = tableRowCount; | ||
| } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
taskPoolseems redundant