Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ public class MetricNames {
public static final String LAKE_TIERING_TABLE_FAILURES_TOTAL = "failuresTotal";
public static final String LAKE_TIERING_TABLE_FILE_SIZE = "fileSize";
public static final String LAKE_TIERING_TABLE_RECORD_COUNT = "recordCount";
public static final String LAKE_TIERING_TABLE_PENDING_TIME = "pendingTime";
public static final String LAKE_TIERING_TABLE_FRESHNESS = "freshness";

// --------------------------------------------------------------------------------------------
// metrics for tablet server
Expand Down Expand Up @@ -222,13 +224,11 @@ public class MetricNames {
// metrics for table bucket
// --------------------------------------------------------------------------------------------

// for tablet
public static final String LAKE_PENDING_RECORDS = "pendingRecords";

// for log tablet
public static final String LOG_NUM_SEGMENTS = "numSegments";
public static final String LOG_END_OFFSET = "endOffset";
public static final String REMOTE_LOG_SIZE = "size";
public static final String LOG_LAKE_PENDING_RECORDS = "pendingRecords";
public static final String LOG_LAKE_TIMESTAMP_LAG = "timestampLag";

// for logic storage
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ public class LakeTableTieringManager implements AutoCloseable {
// table_id -> start time (ms) of the currently in-progress tiering round
private final Map<Long, Long> currentTieringStartTime;

// table_id -> time (ms) when the table entered pending queue
private final Map<Long, Long> pendingEnterTime;

// the live tables that are tiering,
// from table_id -> last heartbeat time by the tiering service
private final Map<Long, Long> liveTieringTableIds;
Expand Down Expand Up @@ -184,6 +187,7 @@ protected LakeTableTieringManager(
this.delayedTieringByTableId = new HashMap<>();
this.tableFailureCounters = new HashMap<>();
this.currentTieringStartTime = new HashMap<>();
this.pendingEnterTime = new HashMap<>();
this.tieringMetricGroup = lakeTieringMetricGroup;
registerMetrics();
}
Expand Down Expand Up @@ -279,6 +283,17 @@ private void registerTableMetrics(long tableId, TablePath tablePath) {
MetricNames.LAKE_TIERING_TABLE_TIER_DURATION,
() -> inReadLock(lock, () -> getLastResultField(tableId, r -> r.tierDuration)));

// pendingTime: how long the table has been waiting in the pending queue
tableMetricGroup.gauge(
MetricNames.LAKE_TIERING_TABLE_PENDING_TIME,
() ->
inReadLock(
lock,
() -> {
long enterTime = pendingEnterTime.getOrDefault(tableId, 0L);
return enterTime > 0 ? clock.milliseconds() - enterTime : 0L;
}));

// failuresTotal: total failure count for this table
Counter failuresCounter =
tableMetricGroup.counter(MetricNames.LAKE_TIERING_TABLE_FAILURES_TOTAL);
Expand All @@ -293,6 +308,11 @@ private void registerTableMetrics(long tableId, TablePath tablePath) {
tableMetricGroup.gauge(
MetricNames.LAKE_TIERING_TABLE_RECORD_COUNT,
() -> inReadLock(lock, () -> getLastResultField(tableId, r -> r.recordCount)));

// freshness: the user-configured table data freshness interval in milliseconds
tableMetricGroup.gauge(
MetricNames.LAKE_TIERING_TABLE_FRESHNESS,
() -> inReadLock(lock, () -> tableLakeFreshness.getOrDefault(tableId, -1L)));
}

/**
Expand All @@ -316,6 +336,7 @@ public void removeLakeTable(long tableId) {
tableLakeFreshness.remove(tableId);
lastTieringResult.remove(tableId);
currentTieringStartTime.remove(tableId);
pendingEnterTime.remove(tableId);
tableFailureCounters.remove(tableId);
// close and remove the metric group to unregister metrics
tieringMetricGroup.removeTableLakeTieringMetricGroup(tableId);
Expand Down Expand Up @@ -579,10 +600,12 @@ private void doHandleStateChange(long tableId, TieringState targetState) {
// increase tiering epoch and initialize the heartbeat of the tiering table
tableTierEpoch.computeIfPresent(tableId, (t, v) -> v + 1);
pendingTieringTables.add(tableId);
pendingEnterTime.put(tableId, clock.milliseconds());
break;
case Tiering:
liveTieringTableIds.put(tableId, clock.milliseconds());
currentTieringStartTime.put(tableId, clock.milliseconds());
pendingEnterTime.put(tableId, 0L);
break;
case Tiered:
liveTieringTableIds.remove(tableId);
Expand Down
14 changes: 12 additions & 2 deletions website/docs/maintenance/observability/monitor-metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ Some metrics might not be exposed when using other JVM implementations (e.g. IBM
</thead>
<tbody>
<tr>
<th rowspan="25"><strong>coordinator</strong></th>
<th rowspan="27"><strong>coordinator</strong></th>
<td style={{textAlign: 'center', verticalAlign: 'middle' }} rowspan="10">-</td>
<td>activeCoordinatorCount</td>
<td>The number of active CoordinatorServer (only leader) in this cluster.</td>
Expand Down Expand Up @@ -401,7 +401,7 @@ Some metrics might not be exposed when using other JVM implementations (e.g. IBM
<td>Gauge</td>
</tr>
<tr>
<td rowspan="5">lakeTiering_table</td>
<td rowspan="7">lakeTiering_table</td>
<td>tierLag</td>
<td>Time in milliseconds since the last successful tiering operation for this table. For newly registered tables that have never completed a tiering round, the lag is measured from the time the table was registered.</td>
<td>Gauge</td>
Expand All @@ -426,6 +426,16 @@ Some metrics might not be exposed when using other JVM implementations (e.g. IBM
<td>Cumulative total record count of the lake table after the last tiering round. Returns -1 if no tiering has completed yet.</td>
<td>Gauge</td>
</tr>
<tr>
<td>pendingTime</td>
<td>How long (in milliseconds) the table has been waiting in the pending queue for tiering. Returns 0 when the table is not currently pending.</td>
<td>Gauge</td>
</tr>
<tr>
<td>freshness</td>
<td>The user-configured data freshness interval (in milliseconds) for this table.</td>
<td>Gauge</td>
</tr>
</tbody>
</table>

Expand Down
Loading