From a9163fdfdd9e3ed54565ea28915b8f8d0ce11e4f Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Mon, 11 May 2026 22:09:18 +0800 Subject: [PATCH 01/18] HDDS-14643. [docs] Fix pre-existing structural bugs in Recon OpenAPI yaml --- .../static/swagger-resources/recon-api.yaml | 72 ++++++++++--------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml index ebaf5e508204..bda0340551f3 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml +++ b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml @@ -17,6 +17,7 @@ openapi: 3.0.0 info: title: Ozone Recon REST API + version: v1 license: url: http://www.apache.org/licenses/LICENSE-2.0.html name: Apache 2.0 License @@ -328,9 +329,10 @@ paths: default: 1000 - name: startPrefix in: query - description: Will return keys matching this prefix + description: Will return keys matching this prefix. Must be at bucket level or deeper (e.g. /vol1/bucket1[/...]). + required: false schema: - type: integer + type: string - name: includeFso in: query description: Boolean value to determine whether to include FSO keys or not @@ -1718,17 +1720,17 @@ components: path: /vol1/bucket1/dir1-2 size: 30000 sizeWithReplica: 90000 - isKey": false + isKey: false - key: false path: /vol1/bucket1/dir1-3 size: 30000 sizeWithReplica: 90000 - isKey": false + isKey: false - key: true path: /vol1/bucket1/key1-1 size: 30000 sizeWithReplica: 90000 - isKey": true + isKey: true sizeDirectKey: type: number example: 10000 @@ -1800,36 +1802,36 @@ components: filesystemAvailable: type: number example: 270071111680 - ClusterStorageReport: - type: object - properties: - capacity: - type: number - example: 270429917184 - used: - type: number - example: 358805504 - remaining: - type: number - example: 270071111680 - committed: - type: number - example: 27007111 - minimumFreeSpace: - type: number - example: 20480 - reserved: - type: number - example: 31457280 - filesystemCapacity: - type: number - example: 270461374464 - filesystemUsed: - type: number - example: 390262784 - filesystemAvailable: - type: number - example: 270071111680 + ClusterStorageReport: + type: object + properties: + capacity: + type: number + example: 270429917184 + used: + type: number + example: 358805504 + remaining: + type: number + example: 270071111680 + committed: + type: number + example: 27007111 + minimumFreeSpace: + type: number + example: 20480 + reserved: + type: number + example: 31457280 + filesystemCapacity: + type: number + example: 270461374464 + filesystemUsed: + type: number + example: 390262784 + filesystemAvailable: + type: number + example: 270071111680 ClusterState: type: object properties: From 4d67934b82e12e2787ab82354d4eb79ba2e4f8fc Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Mon, 11 May 2026 22:09:39 +0800 Subject: [PATCH 02/18] HDDS-14643. [docs] Add OpenAPI tags for new Recon API groups --- .../ozonedoc/static/swagger-resources/recon-api.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml index bda0340551f3..64159bf3e008 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml +++ b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml @@ -53,6 +53,18 @@ tags: externalDocs: description: Prometheus API docs url: https://prometheus.io/docs/prometheus/latest/querying/api/ + - name: Container Export + description: Async export job lifecycle for unhealthy container metadata. **Admin Only** + - name: Storage Distribution + description: APIs to fetch data about storage distribution across datanodes. **Admin Only** + - name: Pending Deletion + description: APIs to fetch data about pending deletions by component (SCM, OM, or Datanodes). **Admin Only** + - name: Heat Map + description: APIs to fetch read-access heatmap data. **Admin Only**, feature-gated by HeatMapProvider service. + - name: Features + description: APIs to introspect Recon feature state. **Admin Only** + - name: Admin Utilities + description: Administrative actions such as triggering OM DB sync. **Admin Only** paths: /containers: get: From 0a5e5bf423eb2ad2600f6cfaff49012ada3b1c0c Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Mon, 11 May 2026 23:38:32 +0800 Subject: [PATCH 03/18] HDDS-14643. [docs] Refresh /containers endpoint docs (params, examples, schema) --- .../docs/content/interface/ReconApi.md | 74 +++++++++-------- .../static/swagger-resources/recon-api.yaml | 79 ++++++++++++++++--- 2 files changed, 111 insertions(+), 42 deletions(-) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index e2df65d168b6..aa367a588a4e 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -96,30 +96,32 @@ Returns all the ContainerMetadata objects. **Returns** -Returns all the KeyMetadata objects for the given ContainerID. - +Returns all the KeyMetadata objects for the given ContainerID. `lastKey` is the final key seen in +this page: pass it back as `prevKey` to continue paginating. + ```json { - "totalCount":7, + "totalCount": 7, + "lastKey": "/vol-1-73141/bucket-3-35816/key-0-43637", "keys": [ { - "Volume":"vol-1-73141", - "Bucket":"bucket-3-35816", - "Key":"key-0-43637", - "DataSize":1000, - "Versions":[0], + "Volume": "vol-1-73141", + "Bucket": "bucket-3-35816", + "Key": "key-0-43637", + "CompletePath": "/vol-1-73141/bucket-3-35816/dir1/dir2/key-0-43637", + "DataSize": 1000, + "Versions": [0], "Blocks": { "0": [ { - "containerID":1, - "localID":105232659753992201 + "containerID": 1, + "localID": 105232659753992201 } ] }, - "CreationTime":"2020-11-18T18:09:17.722Z", - "ModificationTime":"2020-11-18T18:09:30.405Z" - }, - ... + "CreationTime": "2020-11-18T18:09:17.722Z", + "ModificationTime": "2020-11-18T18:09:30.405Z" + } ] } ``` @@ -183,22 +185,26 @@ Returns all the ContainerHistory objects for the given ContainerID. ### GET /api/v1/containers/unhealthy - -**Parameters** -* batchNum (optional) +**Parameters** - The batch number (like "page number") of results to return. - Passing 1, will return records 1 to limit. 2 will return - limit + 1 to 2 * limit, etc. - * limit (optional) - Only returns the limited number of results. The default limit is 1000. + Only returns the limited number of results. The default limit is 1000. + +* maxContainerId (optional) + + Upper bound for container IDs (exclusive). When specified, returns containers with IDs less + than this value in descending order. Use it for backward pagination. + +* minContainerId (optional) + + Lower bound for container IDs (exclusive). When `maxContainerId` is not specified, returns + containers with IDs greater than this value in ascending order. Use it for forward pagination. **Returns** -Returns the UnhealthyContainerMetadata objects for all the unhealthycontainers. +Returns the UnhealthyContainerMetadata objects for all the unhealthy containers. ```json { @@ -231,23 +237,27 @@ Returns the UnhealthyContainerMetadata objects for all the unhealthycontainers. ``` ### GET /api/v1/containers/unhealthy/:state - + **Parameters** -* batchNum (optional) - - The batch number (like "page number") of results to return. - Passing 1, will return records 1 to limit. 2 will return - limit + 1 to 2 * limit, etc. - * limit (optional) - Only returns the limited number of results. The default limit is 1000. + Only returns the limited number of results. The default limit is 1000. + +* maxContainerId (optional) + + Upper bound for container IDs (exclusive). When specified, returns containers with IDs less + than this value in descending order. Use it for backward pagination. + +* minContainerId (optional) + + Lower bound for container IDs (exclusive). When `maxContainerId` is not specified, returns + containers with IDs greater than this value in ascending order. Use it for forward pagination. **Returns** Returns the UnhealthyContainerMetadata objects for the containers in the given state. -Possible unhealthy container states are `MISSING`, `MIS_REPLICATED`,`UNDER_REPLICATED`, `OVER_REPLICATED`. +Possible unhealthy container states are `MISSING`, `MIS_REPLICATED`, `UNDER_REPLICATED`, `OVER_REPLICATED`. The response structure is same as `/containers/unhealthy`. diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml index 64159bf3e008..efa427c09609 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml +++ b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml @@ -72,6 +72,24 @@ paths: - Containers summary: Get all Container Metadata information operationId: getContainerInfo + parameters: + - name: prevKey + in: query + description: | + Returns containers with ID greater than the given prevKey (the prevKey container itself is + skipped). Use 0 to start at the beginning. + required: false + schema: + type: integer + format: int64 + default: 0 + - name: limit + in: query + description: Maximum number of containers to return. + required: false + schema: + type: integer + default: 1000 responses: '200': description: Successful operation @@ -79,6 +97,8 @@ paths: application/json: schema: $ref: '#/components/schemas/ContainerMetadata' + '406': + description: Invalid parameters (negative prevKey or limit). /containers/deleted: get: tags: @@ -142,19 +162,33 @@ paths: summary: Get UnhealthyContainerMetadata for all the unhealthy containers operationId: getUnhealthyContainers parameters: - - name: batchNum + - name: limit in: query - description: Size of the batch for the result. It will give us results from **(limit + 1) to (2 * limit)** + description: Maximum number of unhealthy containers to return. required: false schema: type: integer - - name: limit + default: 1000 + - name: maxContainerId in: query - description: Limit of the number of results returned + description: | + Upper bound for container IDs to include (exclusive). When specified, returns containers + with IDs less than this value in descending order. Use for backward pagination. required: false schema: type: integer - default: 1000 + format: int64 + default: 0 + - name: minContainerId + in: query + description: | + Lower bound for container IDs to include (exclusive). When `maxContainerId` is not specified, + returns containers with IDs greater than this value in ascending order. Use for forward pagination. + required: false + schema: + type: integer + format: int64 + default: 0 responses: '200': description: Successful operation @@ -176,19 +210,33 @@ paths: schema: type: string example: MISSING - - name: batchNum + - name: limit in: query - description: Size of the batch for the result. It will give us results from **(limit + 1) to (2 * limit)** + description: Maximum number of unhealthy containers to return. required: false schema: type: integer - - name: limit + default: 1000 + - name: maxContainerId in: query - description: Limit of the number of results returned + description: | + Upper bound for container IDs to include (exclusive). When specified, returns containers + with IDs less than this value in descending order. Use for backward pagination. required: false schema: type: integer - default: 1000 + format: int64 + default: 0 + - name: minContainerId + in: query + description: | + Lower bound for container IDs to include (exclusive). When `maxContainerId` is not specified, + returns containers with IDs greater than this value in ascending order. Use for forward pagination. + required: false + schema: + type: integer + format: int64 + default: 0 responses: '200': description: Successful operation @@ -1131,6 +1179,17 @@ components: misReplicatedCount: type: integer example: 0 + replicaMismatchCount: + type: integer + example: 0 + firstKey: + type: integer + description: Smallest container ID present in this page. Use with `maxContainerId` for backward pagination. + example: 1 + lastKey: + type: integer + description: Largest container ID present in this page. Use as `minContainerId` for the next forward page. + example: 42 containers: type: array items: From edf795890afe178317d543669a677d3d8d9ab1af Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Mon, 11 May 2026 23:38:32 +0800 Subject: [PATCH 04/18] HDDS-14643. [docs] Document /containers/unhealthy/export and /containers/deleted endpoints --- .../docs/content/interface/ReconApi.md | 106 +++++++++ .../static/swagger-resources/recon-api.yaml | 202 +++++++++++++++++- 2 files changed, 307 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index aa367a588a4e..f73e69128c63 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -128,6 +128,8 @@ this page: pass it back as `prevKey` to continue paginating. ### GET /api/v1/containers/missing +> **Deprecated.** Use `/api/v1/containers/unhealthy/MISSING` instead. + **Parameters** * limit (optional) @@ -261,6 +263,75 @@ Possible unhealthy container states are `MISSING`, `MIS_REPLICATED`, `UNDER_REPL The response structure is same as `/containers/unhealthy`. +### GET /api/v1/containers/unhealthy/export + +**Returns** + +Lists every unhealthy-container export job currently tracked by Recon, in any status. +Items are `ExportJob` objects (see schema below). + +```json +[ + { + "jobId": "4f7a8b9c-1234-5678-9abc-def012345678", + "state": "MISSING", + "status": "RUNNING", + "submittedAt": 1718640123456, + "startedAt": 1718640124000, + "completedAt": 0, + "totalRecords": 250, + "estimatedTotal": 1000, + "fileName": "", + "errorMessage": null, + "progressPercent": 25, + "queuePosition": 0, + "downloadCount": 0, + "downloadsRemaining": 3 + } +] +``` + +### POST /api/v1/containers/unhealthy/export + +**Parameters** + +* state (required) + + One of `MISSING`, `MIS_REPLICATED`, `UNDER_REPLICATED`, `OVER_REPLICATED`. + +**Returns** + +Submits a new CSV export job and returns the `ExportJob` with the assigned `jobId`. +The job initially has `status: QUEUED`. + +* `400 Bad Request`: `state` is missing or not a valid unhealthy state. +* `429 Too Many Requests`: the export queue is full; retry later. Body: `{ "error": "Too Many Requests", "message": "" }`. + +### GET /api/v1/containers/unhealthy/export/:jobId + +**Returns** + +Returns the current `ExportJob` for the given `jobId`. `404 Not Found` if no job has that id. + +### GET /api/v1/containers/unhealthy/export/:jobId/download + +**Returns** + +Streams the TAR archive produced by the export job. Response `Content-Type` is `application/x-tar` with +a `Content-Disposition: attachment` header carrying the export filename. + +* `404 Not Found`: `jobId` is unknown or the on-disk file was removed. +* `409 Conflict`: the job has not reached `COMPLETED` status yet. +* `429 Too Many Requests`: the per-job download limit has been reached. Body matches `RateLimitedError`. + +### DELETE /api/v1/containers/unhealthy/export/:jobId + +**Returns** + +Cancels the export job. `200 OK` with empty body on success. `404 Not Found` if the job cannot be +cancelled (for example, it has already reached a terminal state). + + ### GET /api/v1/containers/mismatch **Returns** @@ -316,6 +387,41 @@ list of keys mapped to such DELETED state containers. ] ``` +### GET /api/v1/containers/deleted + +**Parameters** + +* limit (optional) + + Maximum number of DELETED containers to return. Default 1000. + +* prevKey (optional) + + Previous container ID to skip. Use the last returned `containerId` to fetch the next page. + Default 0. + +**Returns** + +Returns all DELETED containers in SCM along with their pipeline and replication info. + +```json +[ + { + "containerId": 12, + "pipelineID": { "id": "1202e6bb-b7c1-4a85-8067-61374b069adb" }, + "containerState": "DELETED", + "stateEnterTime": 1716123456789, + "lastUsed": 1716123456789, + "replicationConfig": { + "replicationType": "RATIS", + "replicationFactor": "THREE", + "replicationNodes": 3 + }, + "replicationFactor": "THREE" + } +] +``` + ### GET /api/v1/keys/open diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml index efa427c09609..ae1e741cfff6 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml +++ b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml @@ -105,6 +105,22 @@ paths: - Containers summary: Return all DELETED containers in SCM operationId: getSCMDeletedContainers + parameters: + - name: limit + in: query + description: Maximum number of DELETED containers to return. + required: false + schema: + type: integer + default: 1000 + - name: prevKey + in: query + description: Previous container ID to skip. Use 0 to start at the beginning. + required: false + schema: + type: integer + format: int64 + default: 0 responses: 200: description: Successful operation @@ -117,6 +133,8 @@ paths: tags: - Containers summary: Get the MissingContainerMetadata for all missing containers + description: Deprecated. Use `/containers/unhealthy/MISSING` instead. + deprecated: true operationId: getMissingContainers parameters: - name: limit @@ -244,6 +262,115 @@ paths: application/json: schema: $ref: '#/components/schemas/UnhealthyContainerMetadata' + /containers/unhealthy/export: + get: + tags: + - Container Export + summary: List all unhealthy-container export jobs (any status). + operationId: listUnhealthyExportJobs + responses: + '200': + description: Successful operation + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/ExportJob' + post: + tags: + - Container Export + summary: Start an async CSV export job for unhealthy containers in the given state. + operationId: startUnhealthyExport + parameters: + - name: state + in: query + required: true + description: One of **MISSING**, **MIS_REPLICATED**, **UNDER_REPLICATED**, **OVER_REPLICATED**. + schema: + type: string + responses: + '200': + description: Job submitted; returns the ExportJob with assigned jobId. + content: + application/json: + schema: + $ref: '#/components/schemas/ExportJob' + '400': + description: Missing or invalid state parameter. + '429': + description: Too many concurrent export jobs; try again later. + content: + application/json: + schema: + $ref: '#/components/schemas/RateLimitedError' + /containers/unhealthy/export/{jobId}: + get: + tags: + - Container Export + summary: Get the current status of an export job. + operationId: getUnhealthyExportStatus + parameters: + - name: jobId + in: path + required: true + schema: + type: string + responses: + '200': + description: Job found; returns the ExportJob with current status and progress. + content: + application/json: + schema: + $ref: '#/components/schemas/ExportJob' + '404': + description: Job not found. + delete: + tags: + - Container Export + summary: Cancel a queued or running export job. + operationId: cancelUnhealthyExport + parameters: + - name: jobId + in: path + required: true + schema: + type: string + responses: + '200': + description: Cancel request accepted (empty body). + '404': + description: Job not found or already in a terminal state. + /containers/unhealthy/export/{jobId}/download: + get: + tags: + - Container Export + summary: Download the TAR archive for a completed export job. + operationId: downloadUnhealthyExport + parameters: + - name: jobId + in: path + required: true + schema: + type: string + responses: + '200': + description: TAR archive stream. Content-Disposition includes the export filename. + content: + application/x-tar: + schema: + type: string + format: binary + '404': + description: Job or export file not found. + '409': + description: Job has not reached COMPLETED status yet. + '429': + description: Maximum download limit for this job has been reached. + content: + application/json: + schema: + $ref: '#/components/schemas/RateLimitedError' /containers/mismatch: get: tags: @@ -1034,7 +1161,7 @@ components: properties: containerId: type: integer - pipelineId: + pipelineID: type: object properties: id: @@ -2285,3 +2412,76 @@ components: example: - 1599159384.455 - "5" + ExportJob: + type: object + properties: + jobId: + type: string + example: 4f7a8b9c-1234-5678-9abc-def012345678 + state: + type: string + description: The unhealthy-container state being exported (MISSING, MIS_REPLICATED, UNDER_REPLICATED, OVER_REPLICATED). + example: MISSING + status: + type: string + enum: [QUEUED, RUNNING, COMPLETED, FAILED] + example: RUNNING + submittedAt: + type: integer + description: Epoch millis when the job was submitted. + example: 1718640123456 + startedAt: + type: integer + description: Epoch millis when the worker started the job. 0 while still queued. + example: 1718640124000 + completedAt: + type: integer + description: Epoch millis when the job reached COMPLETED or FAILED. 0 while not yet terminal. + example: 0 + totalRecords: + type: integer + description: Records written so far. + example: 250 + estimatedTotal: + type: integer + description: Estimated total records for progress reporting. `-1` when unknown. + example: 1000 + fileName: + type: string + description: Name of the export TAR file (no path). Empty until COMPLETED. + example: unhealthy_MISSING_4f7a8b9c.tar + errorMessage: + type: string + nullable: true + description: Populated only when status is FAILED. + progressPercent: + type: integer + description: Derived from totalRecords / estimatedTotal. 0 when estimatedTotal is unknown. + example: 25 + queuePosition: + type: integer + description: 0 for jobs that are not QUEUED. Otherwise 1-based position in the queue. + example: 0 + downloadCount: + type: integer + example: 0 + downloadsRemaining: + type: integer + example: 3 + maxDownloads: + type: integer + description: Maximum number of times this export can be downloaded. + example: 3 + downloadAllowed: + type: boolean + description: Whether the export currently has at least one download remaining. + example: true + RateLimitedError: + type: object + properties: + error: + type: string + example: Too Many Requests + message: + type: string + example: This export has reached its maximum download limit of 3. From d63c489895e9fc31649ccc7341a5eb128d4feef3 Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Mon, 11 May 2026 23:38:41 +0800 Subject: [PATCH 05/18] HDDS-14643. [docs] Refresh /datanodes endpoint docs (example, /remove shape, schema) --- .../docs/content/interface/ReconApi.md | 105 +++++++++++------- .../static/swagger-resources/recon-api.yaml | 55 +++++---- 2 files changed, 97 insertions(+), 63 deletions(-) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index f73e69128c63..7f04f4a9e86f 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -1014,35 +1014,41 @@ No parameters. Returns all the datanodes in the cluster. ```json - { - "totalCount": 4, - "datanodes": [{ - "uuid": "f8f8cb45-3ab2-4123", - "hostname": "localhost-1", - "state": "HEALTHY", - "lastHeartbeat": 1605738400544, - "storageReport": { - "capacity": 270429917184, - "used": 358805504, - "remaining": 119648149504 - }, - "pipelines": [{ - "pipelineID": "b9415b20-b9bd-4225", - "replicationType": "RATIS", - "replicationFactor": 3, - "leaderNode": "localhost-2" - }, { - "pipelineID": "3bf4a9e9-69cc-4d20", - "replicationType": "RATIS", - "replicationFactor": 1, - "leaderNode": "localhost-1" - }], - "containers": 17, - "leaderCount": 1 - }, - ... - ] - } +{ + "totalCount": 4, + "datanodes": [ + { + "uuid": "f8f8cb45-3ab2-4123", + "hostname": "localhost-1", + "state": "HEALTHY", + "opState": "IN_SERVICE", + "lastHeartbeat": 1605738400544, + "storageReport": { + "capacity": 270429917184, + "used": 358805504, + "remaining": 270071111680, + "committed": 27007111, + "reserved": 31457280, + "minimumFreeSpace": 20480, + "filesystemCapacity": 270461374464, + "filesystemUsed": 390262784, + "filesystemAvailable": 270071111680 + }, + "pipelines": [ + { "pipelineID": "b9415b20-b9bd-4225", "replicationType": "RATIS", "replicationFactor": 3, "leaderNode": "localhost-2" }, + { "pipelineID": "3bf4a9e9-69cc-4d20", "replicationType": "RATIS", "replicationFactor": 1, "leaderNode": "localhost-1" } + ], + "containers": 17, + "openContainers": 4, + "leaderCount": 1, + "version": "2.0.0", + "setupTime": 1605700000000, + "revision": "abcdef1", + "layoutVersion": 6, + "networkLocation": "/default-rack" + } + ] +} ``` ### PUT /api/v1/datanodes/remove @@ -1054,30 +1060,43 @@ Returns all the datanodes in the cluster. ```json [ "50ca4c95-2ef3-4430-b944-97d2442c3daf" -] +] ``` **Returns** -Returns the list of datanodes which are removed successfully and list of datanodes which were not found. +Returns a `datanodesResponseMap` keyed by the outcome category. Each value is a `DatanodesResponse` +(same shape as `GET /api/v1/datanodes`). Categories that have no entries for a given request are +omitted (not present as empty arrays). + +* `removedDatanodes`: successfully removed. +* `failedDatanodes`: pre-checks failed (e.g. node is not DEAD, or still has open containers/pipelines). Includes `totalCount` and a per-uuid `errors` map describing the failure reason; `datanodes` is empty. +* `notFoundDatanodes`: uuid did not match any known datanode. ```json { - "removedNodes": { - "totalCount": 1, - "datanodes": [ - { - "uuid": "50ca4c95-2ef3-4430-b944-97d2442c3daf", - "hostname": "ozone-datanode-4.ozone_default", - "state": "DEAD", - "pipelines": null + "datanodesResponseMap": { + "removedDatanodes": { + "totalCount": 1, + "datanodes": [ + { + "uuid": "50ca4c95-2ef3-4430-b944-97d2442c3daf", + "hostname": "ozone-datanode-4.ozone_default", + "state": "DEAD" + } + ] + }, + "failedDatanodes": { + "totalCount": 1, + "datanodes": [], + "errors": { + "60ca4c95-...": "Open Containers/Pipelines" } - ], - "message": "Success" + } } -} +} ``` - + ## Pipelines ### GET /api/v1/pipelines diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml index ae1e741cfff6..b093e6654172 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml +++ b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml @@ -2081,8 +2081,6 @@ components: items: type: object properties: - buildDate: - type: string layoutVersion: type: integer networkLocation: @@ -2134,34 +2132,51 @@ components: containers: type: integer example: 17 + openContainers: + type: integer + example: 4 leaderCount: type: integer example: 1 RemovedDatanodesResponse: type: object + description: | + Wraps the result of a remove-datanodes request. `datanodesResponseMap` is keyed by outcome + category: `removedDatanodes`, `failedDatanodes`, `notFoundDatanodes`. Categories with no + entries for this request are omitted (not empty arrays). properties: datanodesResponseMap: type: object properties: removedDatanodes: - type: object - properties: - totalCount: - type: integer - datanodes: - type: array - items: - type: object - properties: - uuid: - type: string - hostname: - type: string - state: - type: string - pipelines: - type: string - nullable: true + $ref: '#/components/schemas/DatanodesResponseEntry' + failedDatanodes: + description: Pre-check failures. `datanodes` is empty; use `totalCount` and `errors`. + allOf: + - $ref: '#/components/schemas/DatanodesResponseEntry' + notFoundDatanodes: + $ref: '#/components/schemas/DatanodesResponseEntry' + DatanodesResponseEntry: + type: object + properties: + totalCount: + type: integer + datanodes: + type: array + items: + type: object + properties: + uuid: + type: string + hostname: + type: string + state: + type: string + errors: + type: object + additionalProperties: + type: string + description: Only present on `failedDatanodes`. Maps uuid to a human-readable failure reason. DatanodesDecommissionInfo: type: object properties: From b0f2df44d015da7e0c9f83e663a21bfc3c16d1e6 Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Mon, 11 May 2026 22:15:59 +0800 Subject: [PATCH 06/18] HDDS-14643. [docs] Document /datanodes/decommission/info[/datanode] endpoints --- .../docs/content/interface/ReconApi.md | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index 7f04f4a9e86f..3f0dc574be81 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -1097,6 +1097,62 @@ omitted (not present as empty arrays). } ``` +### GET /api/v1/datanodes/decommission/info + +**Parameters** + +No parameters. + +**Returns** + +Returns info for every datanode currently in the `DECOMMISSIONING` state. Each entry wraps the +datanode details, the per-state container list, and decommission metrics from the SCM JMX bean +`Hadoop:service=StorageContainerManager,name=NodeDecommissionMetrics`. + +```json +{ + "DatanodesDecommissionInfo": [ + { + "datanodeDetails": { + "uuid": "f8f8cb45-3ab2-4123", + "hostName": "ozone-datanode-3", + "ipAddress": "10.0.0.13", + "persistedOpState": "DECOMMISSIONING" + }, + "metrics": { + "decommissionStartTime": "2024-05-01T10:00:00Z", + "numOfUnclosedContainers": 2, + "numOfUnclosedPipelines": 0, + "numOfUnderReplicatedContainers": 1 + }, + "containers": { + "OPEN": ["#1234"], + "CLOSED": ["#1235", "#1236"] + } + } + ] +} +``` + +### GET /api/v1/datanodes/decommission/info/datanode + +Returns info for a single decommissioning datanode. Provide either `uuid` or `ipAddress`. If both +are passed, `uuid` wins. Omitting both returns an error. + +**Parameters** + +* uuid (optional) + + UUID of the decommissioning datanode. + +* ipAddress (optional) + + IP address of the decommissioning datanode. Used when `uuid` is not provided. + +**Returns** + +Same shape as `/api/v1/datanodes/decommission/info`, but the array contains at most one entry. + ## Pipelines ### GET /api/v1/pipelines From cf9a93f9d01fb25c48bb487497c664be59999657 Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Mon, 11 May 2026 22:16:29 +0800 Subject: [PATCH 07/18] HDDS-14643. [docs] Refresh /clusterState example with full ClusterStateResponse fields --- .../docs/content/interface/ReconApi.md | 41 ++++++++++++------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index 3f0dc574be81..285b8bf3d63c 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -877,20 +877,33 @@ No parameters. Returns a summary of the current state of the Ozone cluster. ```json - { - "pipelines": 5, - "totalDatanodes": 4, - "healthyDatanodes": 4, - "storageReport": { - "capacity": 1081719668736, - "used": 1309212672, - "remaining": 597361258496 - }, - "containers": 26, - "volumes": 6, - "buckets": 26, - "keys": 25 - } +{ + "pipelines": 5, + "totalDatanodes": 4, + "healthyDatanodes": 4, + "storageReport": { + "capacity": 1081719668736, + "used": 1309212672, + "remaining": 597361258496, + "committed": 27007111, + "reserved": 31457280, + "minimumFreeSpace": 20480, + "filesystemCapacity": 1081730000000, + "filesystemUsed": 1310000000, + "filesystemAvailable": 597361258496 + }, + "containers": 26, + "missingContainers": 0, + "openContainers": 5, + "deletedContainers": 1, + "volumes": 6, + "buckets": 26, + "keys": 25, + "keysPendingDeletion": 0, + "deletedDirs": 0, + "scmServiceId": "scmservice", + "omServiceId": "omservice" +} ``` ## Volumes (admin only) From c07989e589b9669d8de5016738623ed84d58f176 Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Mon, 11 May 2026 23:39:16 +0800 Subject: [PATCH 08/18] HDDS-14643. [docs] Refresh /keys/open and /keys/deletePending examples --- .../docs/content/interface/ReconApi.md | 175 ++++++++---------- 1 file changed, 80 insertions(+), 95 deletions(-) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index 285b8bf3d63c..70a9a0e3e3be 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -436,55 +436,63 @@ Returns all DELETED containers in SCM along with their pipeline and replication Only returns the limited number of results. The default limit is 1000. +* startPrefix (optional) + + Restricts the listing to keys matching this prefix. Must be at bucket level or deeper + (e.g. `/vol1/bucket1` or `/vol1/bucket1/dir1`); shallower prefixes return `400 Bad Request`. + +* includeFso (optional) + + Boolean, default `false`. Include keys/files from FSO buckets in the result. + +* includeNonFso (optional) + + Boolean, default `false`. Include keys/files from non-FSO (OBS / LEGACY) buckets. + +If neither `includeFso` nor `includeNonFso` is `true`, the response will be empty. + **Returns** -Returns set of keys/files which are open. +Returns set of keys/files which are open. FSO and non-FSO keys are reported in separate arrays. ```json { "lastKey": "/vol1/fso-bucket/dir1/dir2/file2", - "replicatedTotal": 13824, - "unreplicatedTotal": 4608, - "entities": [ + "replicatedDataSize": 13824, + "unreplicatedDataSize": 4608, + "status": "OK", + "fso": [ { - "path": "/vol1/bucket1/key1", - "keyState": "Open", + "key": "/-9223372036854775552/-9223372036854774016/file1", + "path": "/vol1/fso-bucket/dir1/file1", "inStateSince": 1667564193026, "size": 1024, "replicatedSize": 3072, - "unreplicatedSize": 1024, - "replicationType": "RATIS", - "replicationFactor": "THREE" - }, - { - "path": "/vol1/bucket1/key2", - "keyState": "Open", - "inStateSince": 1667564193026, - "size": 512, - "replicatedSize": 1536, - "unreplicatedSize": 512, - "replicationType": "RATIS", - "replicationFactor": "THREE" - }, + "replicationInfo": { + "replicationFactor": "THREE", + "requiredNodes": 3, + "replicationType": "RATIS" + }, + "creationTime": 1667564000000, + "modificationTime": 1667564193026, + "isKey": true + } + ], + "nonFSO": [ { - "path": "/vol1/fso-bucket/dir1/file1", - "keyState": "Open", + "key": "/vol1/bucket1/key1", + "path": "/vol1/bucket1/key1", "inStateSince": 1667564193026, "size": 1024, "replicatedSize": 3072, - "unreplicatedSize": 1024, - "replicationType": "RATIS", - "replicationFactor": "THREE" - }, - { - "path": "/vol1/fso-bucket/dir1/dir2/file2", - "keyState": "Open", - "inStateSince": 1667564193026, - "size": 2048, - "replicatedSize": 6144, - "unreplicatedSize": 2048, - "replicationType": "RATIS", - "replicationFactor": "THREE" + "replicationInfo": { + "replicationFactor": "THREE", + "requiredNodes": 3, + "replicationType": "RATIS" + }, + "creationTime": 1667564000000, + "modificationTime": 1667564193026, + "isKey": true } ] } @@ -504,45 +512,37 @@ Returns set of keys/files which are open. Only returns the limited number of results. The default limit is 1000. +* startPrefix (optional) + + Restricts the listing to keys matching this prefix. Must be at bucket level or deeper + (e.g. `/vol1/bucket1` or `/vol1/bucket1/dir1`); shallower prefixes return `400 Bad Request`. + **Returns** -Returns set of keys/files pending for deletion. +Returns the set of keys/files pending deletion, paired with aggregated size totals. Each item in +`deletedKeyInfo` is a `RepeatedOmKeyInfo` (a wrapper around one or more `OmKeyInfo` entries). ```json { "lastKey": "sampleVol/bucketOne/key_one", - "replicatedTotal": -1530804718628866300, - "unreplicatedTotal": -1530804718628866300, - "deletedkeyinfo": [ + "replicatedDataSize": 600000, + "unreplicatedDataSize": 200000, + "deletedKeyInfo": [ { "omKeyInfoList": [ { - "metadata": {}, - "objectID": 0, - "updateID": 0, - "parentObjectID": 0, "volumeName": "sampleVol", "bucketName": "bucketOne", "keyName": "key_one", - "dataSize": -1530804718628866300, - "keyLocationVersions": [], - "creationTime": 0, - "modificationTime": 0, + "dataSize": 200000, + "replicatedSize": 600000, "replicationConfig": { - "replicationFactor": "ONE", - "requiredNodes": 1, - "replicationType": "STANDALONE" + "replicationFactor": "THREE", + "requiredNodes": 3, + "replicationType": "RATIS" }, - "fileChecksum": null, - "fileName": "key_one", - "acls": [], - "path": "0/key_one", - "file": false, - "latestVersionLocations": null, - "replicatedSize": -1530804718628866300, - "fileEncryptionInfo": null, - "objectInfo": "OMKeyInfo{volume='sampleVol', bucket='bucketOne', key='key_one', dataSize='-1530804718628866186', creationTime='0', objectID='0', parentID='0', replication='STANDALONE/ONE', fileChecksum='null}", - "updateIDset": false + "creationTime": 1717000000000, + "modificationTime": 1717100000000 } ] } @@ -567,45 +567,30 @@ Returns set of keys/files pending for deletion. **Returns** - Returns set of directories pending for deletion. +Returns the set of directories pending for deletion. Each entry in `deletedDirInfo` is a +`KeyEntityInfo` describing one pending-delete directory (not a `RepeatedOmKeyInfo` like +`/keys/deletePending`). ```json { - "lastKey": "vol1/bucket1/bucket1/dir1", - "replicatedTotal": -1530804718628866300, - "unreplicatedTotal": -1530804718628866300, - "deletedkeyinfo": [ + "lastKey": "/vol1/bucket1/dir1", + "replicatedDataSize": 13824, + "unreplicatedDataSize": 4608, + "deletedDirInfo": [ { - "omKeyInfoList": [ - { - "metadata": {}, - "objectID": 0, - "updateID": 0, - "parentObjectID": 0, - "volumeName": "sampleVol", - "bucketName": "bucketOne", - "keyName": "key_one", - "dataSize": -1530804718628866300, - "keyLocationVersions": [], - "creationTime": 0, - "modificationTime": 0, - "replicationConfig": { - "replicationFactor": "ONE", - "requiredNodes": 1, - "replicationType": "STANDALONE" - }, - "fileChecksum": null, - "fileName": "key_one", - "acls": [], - "path": "0/key_one", - "file": false, - "latestVersionLocations": null, - "replicatedSize": -1530804718628866300, - "fileEncryptionInfo": null, - "objectInfo": "OMKeyInfo{volume='sampleVol', bucket='bucketOne', key='key_one', dataSize='-1530804718628866186', creationTime='0', objectID='0', parentID='0', replication='STANDALONE/ONE', fileChecksum='null}", - "updateIDset": false - } - ] + "key": "/-9223372036854775552/-9223372036854774016/dir1", + "path": "/vol1/bucket1/dir1", + "inStateSince": 1717000000000, + "size": 4608, + "replicatedSize": 13824, + "replicationInfo": { + "replicationFactor": "THREE", + "requiredNodes": 3, + "replicationType": "RATIS" + }, + "creationTime": 1716900000000, + "modificationTime": 1716999999999, + "isKey": false } ], "status": "OK" From a5b964224f0ac5f00d551def8ddff47bb2f39949 Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Mon, 11 May 2026 23:39:28 +0800 Subject: [PATCH 09/18] HDDS-14643. [docs] Document new /keys summary and listKeys endpoints --- .../docs/content/interface/ReconApi.md | 120 ++++++++++++++ .../static/swagger-resources/recon-api.yaml | 149 ++++++++++++++++++ 2 files changed, 269 insertions(+) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index 70a9a0e3e3be..0fac12aefc64 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -498,6 +498,36 @@ Returns set of keys/files which are open. FSO and non-FSO keys are reported in s } ``` +### GET /api/v1/keys/open/summary + +**Returns** + +Returns a flat summary of all currently-open keys across the cluster. + +```json +{ + "totalOpenKeys": 8, + "totalReplicatedDataSize": 90000, + "totalUnreplicatedDataSize": 30000 +} +``` + +### GET /api/v1/keys/open/mpu/summary + +**Returns** + +Returns a flat summary of all currently-open multipart-upload keys across the cluster. Note that +the unreplicated total is reported as `totalDataSize` (not `totalUnreplicatedDataSize`): the +naming differs from `/keys/open/summary`. + +```json +{ + "totalOpenMPUKeys": 2, + "totalReplicatedDataSize": 90000, + "totalDataSize": 30000 +} +``` + ### GET /api/v1/keys/deletePending @@ -597,6 +627,96 @@ Returns the set of directories pending for deletion. Each entry in `deletedDirIn } ``` +### GET /api/v1/keys/deletePending/summary + +**Returns** + +Returns a flat summary of all keys pending deletion across the cluster. + +```json +{ + "totalDeletedKeys": 8, + "totalReplicatedDataSize": 90000, + "totalUnreplicatedDataSize": 30000 +} +``` + +### GET /api/v1/keys/deletePending/dirs/summary + +**Returns** + +Returns the total count of directories pending deletion. + +```json +{ + "totalDeletedDirectories": 5 +} +``` + +### GET /api/v1/keys/listKeys + +**Parameters** + +* startPrefix (required) + + Bucket-level or deeper prefix (e.g. `/vol1/bucket1` or `/vol1/bucket1/dir1`). Shallower prefixes + return `400 Bad Request`. + +* replicationType (optional) + + Filter by replication type (e.g. `RATIS`, `EC`). + +* creationDate (optional) + + Filter by creation date; only keys created on or after this date are returned. + +* keySize (optional) + + Filter to keys with data size at least this many bytes. Default 0. + +* prevKey (optional) + + Pagination cursor. Pass back the `lastKey` from the previous response to continue iteration. + +* limit (optional) + + Maximum number of keys to return. Default 1000. + +**Returns** + +Returns committed keys (and files in FSO buckets) under the given prefix. + +* `200 OK` with a `ListKeysResponse` body. +* `204 No Content` when no keys matched the given filters. +* `400 Bad Request` when `startPrefix` is missing or shallower than bucket level. +* `503 Service Unavailable` while Recon is still bootstrapping OM DB; response body status is `INITIALIZING`. + +```json +{ + "status": "OK", + "path": "/vol1/bucket1", + "replicatedDataSize": 600000, + "unReplicatedDataSize": 200000, + "lastKey": "/vol1/bucket1/dir1/file42", + "keys": [ + { + "key": "/vol1/bucket1/dir1/file42", + "path": "/vol1/bucket1/dir1/file42", + "size": 1048576, + "replicatedSize": 3145728, + "replicationInfo": { + "replicationFactor": "THREE", + "requiredNodes": 3, + "replicationType": "RATIS" + }, + "creationTime": 1717000000000, + "modificationTime": 1717100000000, + "isKey": true + } + ] +} +``` + ## Blocks Metadata (admin only) ### GET /api/v1/blocks/deletePending diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml index b093e6654172..aacc5d97e84f 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml +++ b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml @@ -554,6 +554,19 @@ paths: application/json: schema: $ref: '#/components/schemas/OpenKeysSummary' + /keys/open/mpu/summary: + get: + tags: + - Keys + summary: Returns the summary of all open multipart-upload keys + operationId: getOpenMPUKeySummary + responses: + '200': + description: Successful operation + content: + application/json: + schema: + $ref: '#/components/schemas/OpenMPUKeysSummary' /keys/deletePending: get: @@ -643,6 +656,70 @@ paths: properties: totalDeletedDirectories: type: integer + /keys/listKeys: + get: + tags: + - Keys + summary: List committed keys under a prefix with optional filters. + operationId: listKeys + parameters: + - name: startPrefix + in: query + required: true + description: | + Bucket-level or deeper prefix (e.g. `/vol1/bucket1` or `/vol1/bucket1/dir1`). + Shallower prefixes return `400 Bad Request`. + schema: + type: string + - name: replicationType + in: query + required: false + description: Filter by replication type (e.g. `RATIS`, `EC`). + schema: + type: string + - name: creationDate + in: query + required: false + description: Filter by creation date (only keys created on or after this date are returned). + schema: + type: string + - name: keySize + in: query + required: false + description: Filter to keys with data size at least this many bytes. + schema: + type: integer + default: 0 + - name: prevKey + in: query + required: false + description: Pagination cursor. Pass back the `lastKey` from the previous response. + schema: + type: string + - name: limit + in: query + required: false + description: Maximum number of keys to return. + schema: + type: integer + default: 1000 + responses: + '200': + description: Successful operation + content: + application/json: + schema: + $ref: '#/components/schemas/ListKeysResponse' + '204': + description: No keys matched the given filters. + '400': + description: Missing or shallower-than-bucket `startPrefix`. + '503': + description: Recon is still bootstrapping OM DB; retry later. Response status is `INITIALIZING`. + content: + application/json: + schema: + $ref: '#/components/schemas/ListKeysResponse' /containers/{id}/keys: get: tags: @@ -1512,6 +1589,18 @@ components: type: integer totalOpenKeys: type: integer + OpenMPUKeysSummary: + type: object + description: | + Note that the unreplicated total is reported as `totalDataSize` (not + `totalUnreplicatedDataSize`). This naming differs from `OpenKeysSummary`. + properties: + totalOpenMPUKeys: + type: integer + totalReplicatedDataSize: + type: integer + totalDataSize: + type: integer OpenKeys: type: object required: ['lastKey', 'replicatedDataSize', 'unreplicatedDataSize', 'status'] @@ -1730,6 +1819,66 @@ components: type: integer localID: type: integer + ListKeysResponse: + type: object + properties: + status: + type: string + example: OK + description: One of `OK`, `INITIALIZING`. `INITIALIZING` accompanies a 503 response while Recon is still bootstrapping OM DB. + path: + type: string + description: The startPrefix that was queried. + example: /vol1/bucket1 + replicatedDataSize: + type: integer + example: 600000 + unReplicatedDataSize: + type: integer + example: 200000 + lastKey: + type: string + description: Pagination cursor. Pass back as `prevKey` for the next page. + example: /vol1/bucket1/dir1/file42 + keys: + type: array + items: + type: object + properties: + key: + type: string + description: Internal table key (`/volumeId/bucketId/parentId/keyName` for FSO buckets). + path: + type: string + description: Human-readable full path. + example: /vol1/bucket1/dir1/file42 + size: + type: integer + example: 1048576 + replicatedSize: + type: integer + example: 3145728 + replicationInfo: + type: object + properties: + replicationFactor: + type: string + example: THREE + requiredNodes: + type: integer + example: 3 + replicationType: + type: string + example: RATIS + creationTime: + type: integer + example: 1717000000000 + modificationTime: + type: integer + example: 1717100000000 + isKey: + type: boolean + example: true DeletePendingKeys: type: object properties: From 2f583008e7f07ac8eea128ccdf0b1f8566c0ac72 Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Mon, 11 May 2026 23:39:28 +0800 Subject: [PATCH 10/18] HDDS-14643. [docs] Document Recon /storageDistribution and /pendingDeletion endpoints --- .../docs/content/interface/ReconApi.md | 124 +++++++- .../static/swagger-resources/recon-api.yaml | 297 ++++++++++++++++++ 2 files changed, 420 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index 0fac12aefc64..a9ac249ffd63 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -1433,4 +1433,126 @@ Example: /api/v1/metrics/query?query=ratis_leader_election_electionCount } } ``` - + +## Storage Distribution (admin only) + +### GET /api/v1/storageDistribution + +**Parameters** + +No parameters. + +**Returns** + +Aggregated storage capacity distribution across the cluster, including the global storage hierarchy +(filesystem capacity, Ozone capacity, used/free/reserved/committed space), namespace totals, a +breakdown of used space (open vs finalized), and per-datanode storage reports. + +`500 Internal Server Error` (text/plain body) is returned if the report cannot be produced. + +```json +{ + "globalStorage": { + "totalFileSystemCapacity": 270461374464, + "totalReservedSpace": 31457280, + "totalOzoneCapacity": 270429917184, + "totalOzoneUsedSpace": 358805504, + "totalOzoneFreeSpace": 270071111680, + "totalOzoneCommittedSpace": 27007111, + "totalMinimumFreeSpace": 20480 + }, + "globalNamespace": { + "totalUsedSpace": 500000000, + "totalKeys": 10000 + }, + "usedSpaceBreakdown": { + "openKeyBytes": { + "openKeyAndFileBytes": 13824, + "multipartOpenKeyBytes": 4096, + "totalOpenKeyBytes": 17920 + }, + "finalizedKeyBytes": 450000000 + }, + "dataNodeUsage": [ + { + "datanodeUuid": "841be80f-0454-47df-b676", + "hostName": "ozone-datanode-1", + "capacity": 270429917184, + "used": 358805504, + "remaining": 270071111680, + "committed": 27007111, + "minimumFreeSpace": 20480, + "reserved": 31457280, + "filesystemCapacity": 270461374464, + "filesystemUsed": 390262784, + "filesystemAvailable": 270071111680 + } + ] +} +``` + +### GET /api/v1/storageDistribution/download + +**Parameters** + +No parameters. + +**Returns** + +Triggers or polls a background per-datanode metrics collection. The response varies by collection +state: + +* `200 OK` (`text/csv`) when collection is FINISHED. The CSV columns are HostName, Datanode UUID, + Filesystem Capacity, Filesystem Used Space, Filesystem Remaining Space, Ozone Capacity, Ozone Used + Space, Ozone Remaining Space, PreAllocated Container Space, Reserved Space, Minimum Free Space, + Pending Block Size. A `Content-Disposition: attachment` header carries the file name. +* `202 Accepted` (`application/json`, body matches `DataNodeMetricsServiceResponse`) when collection + is NOT_STARTED or IN_PROGRESS. Poll the endpoint again until status is FINISHED. +* `500 Internal Server Error` (`text/plain`) if collection is marked FINISHED but the metrics data + is missing. + +## Pending Deletion (admin only) + +### GET /api/v1/pendingDeletion + +Returns pending-deletion statistics for one of the three Ozone components. + +**Parameters** + +* component (required) + + One of `scm`, `om`, `dn`. Selects the source whose pending-deletion data should be returned. + +* limit (optional) + + Maximum number of per-datanode entries to return. Only applies when `component=dn`. Must be at + least 1. + +**Returns** + +The response body depends on `component`: + +* `component=scm` + * `200 OK` with a `ScmPendingDeletion` object (`totalBlocksize`, `totalReplicatedBlockSize`, + `totalBlocksCount`). + * `204 No Content` if SCM has no pending-deletion summary yet. +* `component=om` + * `200 OK` with a map keyed by category (typical keys: `pendingDirectorySize`, + `pendingKeySize`). Values are byte counts. +* `component=dn` + * `200 OK` with a `DataNodeMetricsServiceResponse` body when the background metrics collection + has FINISHED. + * `202 Accepted` with the same shape while collection is NOT_STARTED or IN_PROGRESS; poll until + `status` becomes `FINISHED`. + +`400 Bad Request` (text/plain) is returned when `component` is missing/invalid, or when +`component=dn` and `limit < 1`. + +```json +{ + "totalBlocksize": 10485760, + "totalReplicatedBlockSize": 31457280, + "totalBlocksCount": 500 +} +``` + diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml index aacc5d97e84f..1f7ecff429f7 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml +++ b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml @@ -1099,6 +1099,118 @@ paths: application/json: schema: $ref: '#/components/schemas/MetricsQuery' + /storageDistribution: + get: + tags: + - Storage Distribution + summary: Retrieves storage capacity distribution across datanodes including global storage, namespace, and used space breakdown + operationId: getStorageDistribution + responses: + '200': + description: Successful Operation + content: + application/json: + schema: + $ref: '#/components/schemas/StorageCapacityDistributionResponse' + '500': + description: Internal server error while retrieving storage distribution + content: + text/plain: + schema: + type: string + /pendingDeletion: + get: + tags: + - Pending Deletion + summary: Returns pending deletion information for the specified component (scm, om, or dn) + operationId: getPendingDeletionByComponent + description: | + Returns pending deletion data for a specific component: + - **scm**: Returns block-level pending deletion stats from the Storage Container Manager. + - **om**: Returns a map of pending deletion sizes (pendingDirectorySize, pendingKeySize) from the Object Manager. + - **dn**: Triggers or polls a background metrics collection task across all datanodes. Returns **HTTP 202** if collection is in progress, or **HTTP 200** with per-datanode pending block sizes if finished. + parameters: + - name: component + in: query + description: Component to query. One of `scm`, `om`, or `dn`. + example: scm + required: true + schema: + type: string + enum: + - scm + - om + - dn + - name: limit + in: query + description: Maximum number of datanode results to return (only applicable when component=dn). + example: 10 + required: false + schema: + type: integer + minimum: 1 + responses: + '200': + description: | + Successful Operation. Response schema depends on the `component` parameter: + - **scm**: `ScmPendingDeletion` + - **om**: `OmPendingDeletion` + - **dn**: `DataNodeMetricsServiceResponse` (only when collection is FINISHED) + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/ScmPendingDeletion' + - $ref: '#/components/schemas/OmPendingDeletion' + - $ref: '#/components/schemas/DataNodeMetricsServiceResponse' + '202': + description: Datanode metrics collection is still in progress or not yet started (only for component=dn). + content: + application/json: + schema: + $ref: '#/components/schemas/DataNodeMetricsServiceResponse' + '204': + description: No SCM pending-deletion summary available (only for component=scm). + '400': + description: | + Missing/invalid `component` (must be one of `scm`, `om`, `dn`), or `limit` is less than 1 when `component=dn`. + content: + text/plain: + schema: + type: string + /storageDistribution/download: + get: + tags: + - Storage Distribution + summary: Downloads per-datanode storage and pending deletion statistics as a CSV file + operationId: downloadDataNodeStorageDistribution + description: | + Triggers or polls a background metrics collection task across all datanodes. + - If collection is **not yet finished**, returns **HTTP 202** with a JSON status response. + - If collection is **finished**, returns **HTTP 200** with a downloadable CSV file containing + per-datanode stats: HostName, Datanode UUID, Filesystem Capacity, Filesystem Used Space, + Filesystem Remaining Space, Ozone Capacity, Ozone Used Space, Ozone Remaining Space, + PreAllocated Container Space, Reserved Space, Minimum Free Space, Pending Block Size. + responses: + '200': + description: CSV file with storage and pending deletion statistics per datanode + content: + text/csv: + schema: + type: string + format: binary + '202': + description: Metrics collection is still in progress or has not started; returns current collection status + content: + application/json: + schema: + $ref: '#/components/schemas/DataNodeMetricsServiceResponse' + '500': + description: Internal server error, metrics data missing despite FINISHED collection status + content: + text/plain: + schema: + type: string components: schemas: Volumes: @@ -2649,3 +2761,188 @@ components: message: type: string example: This export has reached its maximum download limit of 3. + StorageCapacityDistributionResponse: + type: object + description: Aggregated storage capacity distribution report for the cluster + properties: + globalStorage: + $ref: '#/components/schemas/GlobalStorageReport' + globalNamespace: + $ref: '#/components/schemas/GlobalNamespaceReport' + usedSpaceBreakdown: + $ref: '#/components/schemas/UsedSpaceBreakDown' + dataNodeUsage: + type: array + description: Per-datanode storage usage reports + items: + $ref: '#/components/schemas/DataNodeStorageReport' + GlobalStorageReport: + type: object + description: | + Aggregated storage metrics across all datanodes in the cluster. + + **Storage Hierarchy:** + - `totalFileSystemCapacity` = `totalOzoneCapacity` + `totalReservedSpace` + - `totalOzoneCapacity` = `totalOzoneUsedSpace` + `totalOzoneFreeSpace` + properties: + totalFileSystemCapacity: + type: integer + format: int64 + description: Total OS-reported filesystem capacity across all datanodes (bytes) + example: 270461374464 + totalReservedSpace: + type: integer + format: int64 + description: Space reserved and not available for Ozone allocation (bytes) + example: 31457280 + totalOzoneCapacity: + type: integer + format: int64 + description: Portion of filesystem capacity available for Ozone, equal to filesystem capacity minus reserved space (bytes) + example: 270429917184 + totalOzoneUsedSpace: + type: integer + format: int64 + description: Space currently consumed by Ozone data (bytes) + example: 358805504 + totalOzoneFreeSpace: + type: integer + format: int64 + description: Remaining allocatable space within Ozone capacity (bytes) + example: 270071111680 + totalOzoneCommittedSpace: + type: integer + format: int64 + description: Space pre-allocated for containers but not yet fully utilized (bytes) + example: 27007111 + totalMinimumFreeSpace: + type: integer + format: int64 + description: Minimum free space that must be maintained as per configuration (bytes) + example: 20480 + GlobalNamespaceReport: + type: object + description: High-level metadata summary of the global namespace + properties: + totalUsedSpace: + type: integer + format: int64 + description: | + Total space utilized in the namespace (bytes). Includes committed data, + open keys, and data pending deletion. + example: 500000000 + totalKeys: + type: integer + format: int64 + description: Total number of keys (files) in the namespace across all volumes and buckets + example: 10000 + UsedSpaceBreakDown: + type: object + description: Breakdown of used storage space by lifecycle category + properties: + openKeyBytes: + $ref: '#/components/schemas/OpenKeyBytesInfo' + finalizedKeyBytes: + type: integer + format: int64 + description: Space occupied by written (closed) keys with replica overhead (bytes) + example: 450000000 + OpenKeyBytesInfo: + type: object + description: Breakdown of storage space occupied by open (uncommitted) keys + properties: + openKeyAndFileBytes: + type: integer + format: int64 + description: Total replicated bytes for open non-multipart keys and files + example: 13824 + multipartOpenKeyBytes: + type: integer + format: int64 + description: Total replicated bytes for in-progress multipart upload keys + example: 4096 + totalOpenKeyBytes: + type: integer + format: int64 + description: Sum of openKeyAndFileBytes and multipartOpenKeyBytes + example: 17920 + DataNodeMetricsServiceResponse: + type: object + description: Response from a background per-datanode metrics collection task + properties: + status: + type: string + enum: + - NOT_STARTED + - IN_PROGRESS + - FINISHED + - FAILED + description: Current status of the metric collection task + example: FINISHED + totalPendingDeletionSize: + type: integer + format: int64 + description: Total size of blocks pending deletion across all queried datanodes (bytes) + example: 1048576 + pendingDeletionPerDataNode: + type: array + nullable: true + description: Per-datanode pending deletion metrics; null if collection is not finished + items: + $ref: '#/components/schemas/DatanodePendingDeletionMetrics' + totalNodesQueried: + type: integer + description: Total number of datanodes queried during the collection task + example: 4 + totalNodeQueriesFailed: + type: integer + format: int64 + description: Number of datanode queries that failed during collection + example: 0 + DatanodePendingDeletionMetrics: + type: object + description: Pending deletion block metrics for a single datanode + properties: + hostName: + type: string + description: Hostname of the datanode + example: ozone-datanode-1 + datanodeUuid: + type: string + description: UUID of the datanode + example: 841be80f-0454-47df-b676-a1234567890a + pendingBlockSize: + type: integer + format: int64 + description: Total size of blocks pending deletion on this datanode (bytes) + example: 262144 + ScmPendingDeletion: + type: object + description: Block-level pending deletion statistics from the Storage Container Manager + properties: + totalBlocksize: + type: integer + format: int64 + description: Total unreplicated size of all blocks pending deletion in SCM (bytes) + example: 10485760 + totalReplicatedBlockSize: + type: integer + format: int64 + description: Total replicated size of all blocks pending deletion in SCM (bytes) + example: 31457280 + totalBlocksCount: + type: integer + format: int64 + description: Total number of blocks pending deletion in SCM + example: 500 + OmPendingDeletion: + type: object + description: | + Map of pending deletion sizes by category at the OM level (values in bytes). + Common keys: `pendingDirectorySize`, `pendingKeySize`. + additionalProperties: + type: integer + format: int64 + example: + pendingDirectorySize: 204800 + pendingKeySize: 1048576 From 3f50573c2575210577071cdca8307d331e41a833 Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Mon, 11 May 2026 23:39:28 +0800 Subject: [PATCH 11/18] HDDS-14643. [docs] Document /heatmap, /features/disabledFeatures, /triggerdbsync endpoints --- .../docs/content/interface/ReconApi.md | 81 ++++++++++++ .../static/swagger-resources/recon-api.yaml | 122 ++++++++++++++++++ 2 files changed, 203 insertions(+) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index a9ac249ffd63..b8afe08d271f 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -1556,3 +1556,84 @@ The response body depends on `component`: } ``` +## Heat Map (admin only) + +Read-access heatmap data is feature-gated. If the HeatMap feature is listed by +`/api/v1/features/disabledFeatures`, `/api/v1/heatmap/readaccess` returns `404 Not Found`. + +### GET /api/v1/heatmap/readaccess + +**Parameters** + +* startDate (optional) + + Look-back window for access aggregation. Default `24H`. + +* entityType (optional) + + Entity granularity. Default `key`. + +* path (optional) + + Restrict the heatmap to this path prefix. + +**Returns** + +A nested `EntityReadAccessHeatMap` tree. The root represents `/`; children represent volumes, then +buckets, then directories, then keys. Each node carries `size`, `accessCount`, +`minAccessCount`/`maxAccessCount`, and a normalized `color` value. + +```json +{ + "label": "root", + "path": "/", + "size": 12345678, + "accessCount": 1000, + "minAccessCount": 0, + "maxAccessCount": 250, + "color": 0.5, + "children": [ + { + "label": "vol1", + "path": "/vol1", + "size": 8345678, + "accessCount": 750, + "color": 0.75, + "children": [] + } + ] +} +``` + +### GET /api/v1/heatmap/healthCheck + +**Returns** + +Health-check response from the configured HeatMap provider. The body shape depends on the provider +implementation. + +## Features (admin only) + +### GET /api/v1/features/disabledFeatures + +**Returns** + +JSON array of feature enum names that are currently disabled. The only feature name in use today +is `HEATMAP`. Useful for the UI to decide whether to show or grey out feature-gated controls. + +```json +["HEATMAP"] +``` + +## Admin Utilities (admin only) + +### GET /api/v1/triggerdbsync/om + +**Returns** + +Requests Recon to start an immediate sync from the Ozone Manager DB. Returns a boolean indicating +whether the sync request was accepted by the OM service provider. + +```json +true +``` diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml index 1f7ecff429f7..6ecf3f34a61c 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml +++ b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml @@ -1099,6 +1099,94 @@ paths: application/json: schema: $ref: '#/components/schemas/MetricsQuery' + /heatmap/readaccess: + get: + tags: + - Heat Map + summary: Returns the top-N prefixes by read access as a tree of `EntityReadAccessHeatMap` nodes + operationId: getReadAccessHeatMap + description: | + Heatmap responses are feature-gated. If the HeatMap feature is disabled (see + `/features/disabledFeatures`), this route returns **404 Not Found**. + parameters: + - name: startDate + in: query + required: false + description: Look-back window for access aggregation. Default `24H`. + schema: + type: string + default: "24H" + - name: entityType + in: query + required: false + description: Entity granularity. Default `key`. + schema: + type: string + default: key + - name: path + in: query + required: false + description: Restrict heatmap to this path prefix. + schema: + type: string + responses: + '200': + description: Successful operation + content: + application/json: + schema: + $ref: '#/components/schemas/EntityReadAccessHeatMap' + '404': + description: HeatMap feature is disabled. + '500': + description: HeatMap provider failure. + /heatmap/healthCheck: + get: + tags: + - Heat Map + summary: Health check for the configured HeatMap provider + operationId: getHeatMapHealthCheck + responses: + '200': + description: Health check result. Body depends on the provider implementation. + content: + application/json: + schema: + type: object + /features/disabledFeatures: + get: + tags: + - Features + summary: Lists Recon features that are currently disabled + operationId: getDisabledFeatures + description: | + Returned strings match the enum constant names from `FeatureProvider.Feature` + (currently the only candidate is `HEATMAP`). + responses: + '200': + description: Array of disabled feature names (may be empty). + content: + application/json: + schema: + type: array + items: + type: string + example: + - HEATMAP + /triggerdbsync/om: + get: + tags: + - Admin Utilities + summary: Triggers an immediate OM DB sync from Recon + operationId: triggerOMDBSync + responses: + '200': + description: Boolean indicating whether the sync request was accepted. + content: + application/json: + schema: + type: boolean + example: true /storageDistribution: get: tags: @@ -2946,3 +3034,37 @@ components: example: pendingDirectorySize: 204800 pendingKeySize: 1048576 + EntityReadAccessHeatMap: + type: object + description: | + Nested tree node used by `/heatmap/readaccess`. The root has `label: "root"` and `path: "/"`; + children represent volumes, then buckets, then directories, then keys. + properties: + label: + type: string + example: vol1 + path: + type: string + example: /vol1 + size: + type: integer + format: int64 + description: Aggregate size in bytes of this entity. + accessCount: + type: integer + format: int64 + description: Access count for this entity within the queried time window. + minAccessCount: + type: integer + format: int64 + maxAccessCount: + type: integer + format: int64 + color: + type: number + format: double + description: Normalized color value (heatmap intensity). + children: + type: array + items: + $ref: '#/components/schemas/EntityReadAccessHeatMap' From ff2882ebbd21fe4045cd1d0507b38c5f4b172f50 Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Mon, 11 May 2026 22:49:32 +0800 Subject: [PATCH 12/18] HDDS-14643. [docs] Generalize /metrics path to /{api} to match Java @PathParam --- .../ozonedoc/static/swagger-resources/recon-api.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml index 6ecf3f34a61c..bfbd5471b2f8 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml +++ b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml @@ -1077,13 +1077,21 @@ paths: application/json: schema: $ref: '#/components/schemas/ContainerUtilization' - /metrics/query: + /metrics/{api}: get: tags: - Metrics summary: This is a proxy endpoint for Prometheus, and helps to fetch different metrics for Ozone operationId: getMetricsResponse parameters: + - name: api + in: path + required: true + description: | + The Prometheus HTTP API endpoint to invoke (for example `query` or `query_range`). + schema: + type: string + default: query - name: query in: query description: The query in a Prometheus query format for which to fetch results From f010e6394d0f6e3770ec11a349ed7e9f295a48c6 Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Tue, 12 May 2026 00:21:38 +0800 Subject: [PATCH 13/18] HDDS-14643. [docs] Drop meaningless default on /metrics/{api} path parameter --- .../themes/ozonedoc/static/swagger-resources/recon-api.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml index bfbd5471b2f8..c46d39792f4d 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml +++ b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml @@ -1089,9 +1089,10 @@ paths: required: true description: | The Prometheus HTTP API endpoint to invoke (for example `query` or `query_range`). + On the Java side the segment falls back to `query` when absent, but in OpenAPI a path + parameter is always required, so callers must pass a value. schema: type: string - default: query - name: query in: query description: The query in a Prometheus query format for which to fetch results From 536f945961f71e182c2a05f82e74aaf903c88c26 Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Tue, 12 May 2026 00:21:51 +0800 Subject: [PATCH 14/18] HDDS-14643. [docs] Restore truncation marker on /datanodes example --- hadoop-hdds/docs/content/interface/ReconApi.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index b8afe08d271f..08bfaadf0100 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -1164,7 +1164,8 @@ Returns all the datanodes in the cluster. "revision": "abcdef1", "layoutVersion": 6, "networkLocation": "/default-rack" - } + }, + ... ] } ``` From 20080fa0222a4f85a1a569313c4bc5cf47161d19 Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Tue, 12 May 2026 00:22:12 +0800 Subject: [PATCH 15/18] HDDS-14643. [docs] Distinguish aggregate vs per-key totals in /keys/deletePending example --- hadoop-hdds/docs/content/interface/ReconApi.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index 08bfaadf0100..4d4e4324334a 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -555,8 +555,8 @@ Returns the set of keys/files pending deletion, paired with aggregated size tota ```json { "lastKey": "sampleVol/bucketOne/key_one", - "replicatedDataSize": 600000, - "unreplicatedDataSize": 200000, + "replicatedDataSize": 1800000, + "unreplicatedDataSize": 600000, "deletedKeyInfo": [ { "omKeyInfoList": [ @@ -575,7 +575,8 @@ Returns the set of keys/files pending deletion, paired with aggregated size tota "modificationTime": 1717100000000 } ] - } + }, + ... ], "status": "OK" } From 99cee715dd6197ccd926912137b17374769f5282 Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Wed, 20 May 2026 20:58:11 +0800 Subject: [PATCH 16/18] HDDS-14643. [docs] Document /containers/quasiClosed and /triggerdbsync/scm/snapshot{,/status,/cancel} --- .../docs/content/interface/ReconApi.md | 117 +++++++++ .../static/swagger-resources/recon-api.yaml | 231 ++++++++++++++++++ 2 files changed, 348 insertions(+) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index 4d4e4324334a..ad2c51f4b459 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -163,6 +163,58 @@ Returns the MissingContainerMetadata objects for all the missing containers. } ``` +### GET /api/v1/containers/quasiClosed + +**Parameters** + +* limit (optional) + + Maximum number of containers to return. Default is 1000. + +* minContainerId (optional) + + Cursor. Returns containers with ID greater than this value, in ascending order. Pass the + previous response's `lastKey` to fetch the next page. Default is 0. + +**Returns** + +Returns containers currently in the `QUASI_CLOSED` lifecycle state. `quasiClosedCount` is the +cluster-wide total (not just the current page). When the page is empty, both `firstKey` and +`lastKey` echo back the `minContainerId` cursor. + +```json +{ + "quasiClosedCount": 42, + "firstKey": 100, + "lastKey": 199, + "containers": [ + { + "containerID": 100, + "pipelineID": "88646d32-a1aa-4e1a-a8d5-aa1e7dd3f5cc", + "keys": 17, + "stateEnterTime": 1718640123456, + "expectedReplicaCount": 3, + "actualReplicaCount": 2, + "replicas": [ + { + "containerID": 100, + "datanodeUuid": "841be80f-0454-47df-b676", + "datanodeHost": "localhost-1", + "firstSeenTime": 1605724047057, + "lastSeenTime": 1605731201301, + "lastBcsId": 123, + "state": "QUASI_CLOSED" + } + ] + } + ] +} +``` + +Responses: + +* `400 Bad Request`: `limit` or `minContainerId` is negative. + ### GET /api/v1/containers/:id/replicaHistory **Parameters** @@ -1639,3 +1691,68 @@ whether the sync request was accepted by the OM service provider. ```json true ``` + +### POST /api/v1/triggerdbsync/scm/snapshot + +**Returns** + +Starts a one-shot SCM DB snapshot sync in the background. Idempotent. The response always carries +the current `ScmDbSnapshotSyncStatus` so callers can distinguish "accepted and started" from +"rejected because another sync is already in progress". + +* `202 Accepted`: sync accepted and started. Body has `accepted: true`. +* `409 Conflict`: another SCM DB sync is already running. Body has `accepted: false`. + +```json +{ + "accepted": true, + "status": "IN_PROGRESS", + "message": "SCM DB snapshot sync started." +} +``` + +### GET /api/v1/triggerdbsync/scm/snapshot/status + +**Returns** + +Current status of the triggered SCM DB snapshot sync. Always returns 200, even when no sync has +ever run (status will be `IDLE`, phase `NONE`, `startedAt`/`finishedAt` zero). + +* `status`: one of `IDLE`, `IN_PROGRESS`, `SUCCESS`, `FAILED`, `CANCELLED`. +* `phase`: one of `NONE`, `DOWNLOADING_CHECKPOINT`, `INITIALIZING_DB`, `SWAPPING_DB`, + `COMPLETED`, `FAILED`, `CANCELLED`. +* `cancelAllowed`: true only while in `DOWNLOADING_CHECKPOINT`. Once the phase advances to + `INITIALIZING_DB`, cancellation is no longer honored. +* `durationMs`: elapsed time in millis; for a running sync, computed against `now()`. + +```json +{ + "status": "IN_PROGRESS", + "phase": "DOWNLOADING_CHECKPOINT", + "startedAt": 1718640123456, + "finishedAt": 0, + "durationMs": 12345, + "cancelAllowed": true, + "lastError": null +} +``` + +### POST /api/v1/triggerdbsync/scm/snapshot/cancel + +**Returns** + +Cancels an in-progress SCM DB snapshot sync. Only honored while `status == IN_PROGRESS` and +`cancelAllowed == true` (see `/triggerdbsync/scm/snapshot/status`). + +* `200 OK`: cancellation accepted and the sync has been cancelled. Body has `cancelled: true`. +* `409 Conflict`: no sync is running, or the sync has passed the cancellable phase. Body has + `cancelled: false` and `message` explains which. + +```json +{ + "cancelled": true, + "status": "CANCELLED", + "phase": "CANCELLED", + "message": "SCM DB snapshot sync cancelled." +} +``` diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml index c46d39792f4d..51aa479f8c08 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml +++ b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml @@ -151,6 +151,43 @@ paths: application/json: schema: $ref: '#/components/schemas/MissingContainerMetadata' + /containers/quasiClosed: + get: + tags: + - Containers + summary: List containers in QUASI_CLOSED state, paginated by container ID. + operationId: getQuasiClosedContainers + parameters: + - name: limit + in: query + description: Maximum number of containers to return. + required: false + schema: + type: integer + default: 1000 + minimum: 0 + - name: minContainerId + in: query + description: Cursor; return containers with ID greater than this value, in ascending order. + required: false + schema: + type: integer + format: int64 + default: 0 + minimum: 0 + responses: + '200': + description: Successful operation + content: + application/json: + schema: + $ref: '#/components/schemas/QuasiClosedContainersResponse' + '400': + description: '`limit` or `minContainerId` is negative.' + content: + text/plain: + schema: + type: string /containers/{id}/replicaHistory: get: tags: @@ -1196,6 +1233,66 @@ paths: schema: type: boolean example: true + /triggerdbsync/scm/snapshot: + post: + tags: + - Admin Utilities + summary: Trigger an SCM DB snapshot sync from SCM to Recon. + description: | + Starts a one-shot SCM DB snapshot sync in the background. Idempotent: if a sync is + already in progress the request is rejected with **409 Conflict**. The response body + carries the current `ScmDbSnapshotSyncStatus` so the caller can distinguish "accepted + and started" from "rejected because another sync is running". + operationId: triggerSCMDBSnapshotSync + responses: + '202': + description: Sync accepted and started. + content: + application/json: + schema: + $ref: '#/components/schemas/ScmDbSnapshotTriggerResponse' + '409': + description: Another SCM DB sync is already running. + content: + application/json: + schema: + $ref: '#/components/schemas/ScmDbSnapshotTriggerResponse' + /triggerdbsync/scm/snapshot/status: + get: + tags: + - Admin Utilities + summary: Get the current status of an SCM DB snapshot sync. + operationId: getSCMDBSnapshotSyncStatus + responses: + '200': + description: Current status (always returned, even when no sync is running). + content: + application/json: + schema: + $ref: '#/components/schemas/ScmDbSnapshotStatusResponse' + /triggerdbsync/scm/snapshot/cancel: + post: + tags: + - Admin Utilities + summary: Cancel an in-progress SCM DB snapshot sync. + description: | + Cancellation is only honored while the sync is `IN_PROGRESS` and still in a cancellable + phase (before `INITIALIZING_DB`). The response body's `cancelled` flag indicates whether + the cancel actually took effect. + operationId: cancelSCMDBSnapshotSync + responses: + '200': + description: Cancellation accepted; the sync has been cancelled. + content: + application/json: + schema: + $ref: '#/components/schemas/ScmDbSnapshotCancelResponse' + '409': + description: No sync is running, or the sync has passed the cancellable phase. + content: + application/json: + schema: + $ref: '#/components/schemas/ScmDbSnapshotCancelResponse' /storageDistribution: get: tags: @@ -1639,6 +1736,60 @@ components: type: array items: $ref: "#/components/schemas/ReplicaHistory" + QuasiClosedContainerMetadata: + type: object + properties: + containerID: + type: integer + format: int64 + example: 42 + pipelineID: + type: string + nullable: true + example: 88646d32-a1aa-4e1a-a8d5-aa1e7dd3f5cc + keys: + type: integer + format: int64 + example: 17 + stateEnterTime: + type: integer + format: int64 + description: Epoch millis when the container entered QUASI_CLOSED per SCM. + example: 1718640123456 + expectedReplicaCount: + type: integer + format: int64 + example: 3 + actualReplicaCount: + type: integer + format: int64 + example: 2 + replicas: + type: array + items: + $ref: '#/components/schemas/ReplicaHistory' + QuasiClosedContainersResponse: + type: object + properties: + quasiClosedCount: + type: integer + format: int64 + description: Total number of containers in QUASI_CLOSED state across the cluster. + example: 42 + firstKey: + type: integer + format: int64 + description: Container ID of the first item in `containers`; equals `minContainerId` when the page is empty. + example: 100 + lastKey: + type: integer + format: int64 + description: Container ID of the last item in `containers`; pass as `minContainerId` to fetch the next page. + example: 199 + containers: + type: array + items: + $ref: '#/components/schemas/QuasiClosedContainerMetadata' MismatchedContainers: type: object properties: @@ -3077,3 +3228,83 @@ components: type: array items: $ref: '#/components/schemas/EntityReadAccessHeatMap' + ScmDbSnapshotSyncStatus: + type: string + description: Overall state of a triggered SCM DB snapshot sync. + enum: + - IDLE + - IN_PROGRESS + - SUCCESS + - FAILED + - CANCELLED + ScmDbSnapshotSyncPhase: + type: string + description: | + Sub-phase of the active sync. Used to decide whether cancellation is still possible + (cancellable up to and including `DOWNLOADING_CHECKPOINT`; not cancellable from + `INITIALIZING_DB` onwards). + enum: + - NONE + - DOWNLOADING_CHECKPOINT + - INITIALIZING_DB + - SWAPPING_DB + - COMPLETED + - FAILED + - CANCELLED + ScmDbSnapshotTriggerResponse: + type: object + properties: + accepted: + type: boolean + description: Whether the trigger request actually started a new sync. + example: true + status: + $ref: '#/components/schemas/ScmDbSnapshotSyncStatus' + message: + type: string + example: SCM DB snapshot sync started. + ScmDbSnapshotStatusResponse: + type: object + properties: + status: + $ref: '#/components/schemas/ScmDbSnapshotSyncStatus' + phase: + $ref: '#/components/schemas/ScmDbSnapshotSyncPhase' + startedAt: + type: integer + format: int64 + description: Epoch millis when the current/last sync started; `0` if never run. + example: 1718640123456 + finishedAt: + type: integer + format: int64 + description: Epoch millis when the current/last sync ended; `0` while still running. + example: 0 + durationMs: + type: integer + format: int64 + description: Elapsed time in millis; for a running sync, computed against `now()`. + example: 12345 + cancelAllowed: + type: boolean + description: True only while still in a cancellable phase. + example: true + lastError: + type: string + nullable: true + description: Failure message from the last sync, if any. + example: null + ScmDbSnapshotCancelResponse: + type: object + properties: + cancelled: + type: boolean + description: Whether the cancel actually took effect. + example: true + status: + $ref: '#/components/schemas/ScmDbSnapshotSyncStatus' + phase: + $ref: '#/components/schemas/ScmDbSnapshotSyncPhase' + message: + type: string + example: SCM DB snapshot sync cancelled. From 5acb1ea58bf81bff89467653443423dec1340fa7 Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Wed, 20 May 2026 20:59:25 +0800 Subject: [PATCH 17/18] HDDS-14643. [docs] Split RateLimitedError so download-limit 429 has its own schema --- hadoop-hdds/docs/content/interface/ReconApi.md | 2 +- .../static/swagger-resources/recon-api.yaml | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index ad2c51f4b459..07667c68c5e2 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -374,7 +374,7 @@ a `Content-Disposition: attachment` header carrying the export filename. * `404 Not Found`: `jobId` is unknown or the on-disk file was removed. * `409 Conflict`: the job has not reached `COMPLETED` status yet. -* `429 Too Many Requests`: the per-job download limit has been reached. Body matches `RateLimitedError`. +* `429 Too Many Requests`: the per-job download limit has been reached. Body: `{ "error": "Download limit reached", "message": "" }` (schema `DownloadLimitReachedError`). ### DELETE /api/v1/containers/unhealthy/export/:jobId diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml index 51aa479f8c08..41244abcc7d1 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml +++ b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml @@ -407,7 +407,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RateLimitedError' + $ref: '#/components/schemas/DownloadLimitReachedError' /containers/mismatch: get: tags: @@ -3006,6 +3006,19 @@ components: error: type: string example: Too Many Requests + message: + type: string + example: Export queue is full; please retry later. + DownloadLimitReachedError: + type: object + description: | + Returned by `GET /containers/unhealthy/export/{jobId}/download` with HTTP 429 when the + per-job download limit has been reached. Same shape as `RateLimitedError` but with a + distinct `error` discriminator string so clients can branch on it. + properties: + error: + type: string + example: Download limit reached message: type: string example: This export has reached its maximum download limit of 3. From 2952f579b458cf9b28a6b953231ac8eb6b4195bd Mon Sep 17 00:00:00 2001 From: Chi-Hsuan Huang Date: Wed, 20 May 2026 21:00:29 +0800 Subject: [PATCH 18/18] HDDS-14643. [docs] Mark startPrefix optional on /keys/listKeys to match Java @DefaultValue --- hadoop-hdds/docs/content/interface/ReconApi.md | 7 ++++--- .../ozonedoc/static/swagger-resources/recon-api.yaml | 7 +++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/docs/content/interface/ReconApi.md b/hadoop-hdds/docs/content/interface/ReconApi.md index 07667c68c5e2..c338908b33f4 100644 --- a/hadoop-hdds/docs/content/interface/ReconApi.md +++ b/hadoop-hdds/docs/content/interface/ReconApi.md @@ -710,10 +710,11 @@ Returns the total count of directories pending deletion. **Parameters** -* startPrefix (required) +* startPrefix (optional, but effectively required) - Bucket-level or deeper prefix (e.g. `/vol1/bucket1` or `/vol1/bucket1/dir1`). Shallower prefixes - return `400 Bad Request`. + Bucket-level or deeper prefix (e.g. `/vol1/bucket1` or `/vol1/bucket1/dir1`). HTTP-level the + parameter is optional (defaults to `/`), but the handler rejects anything shallower than + bucket level with `400 Bad Request`, so in practice callers must supply one. * replicationType (optional) diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml index 41244abcc7d1..f67d2d70a935 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml +++ b/hadoop-hdds/docs/themes/ozonedoc/static/swagger-resources/recon-api.yaml @@ -702,12 +702,15 @@ paths: parameters: - name: startPrefix in: query - required: true + required: false description: | Bucket-level or deeper prefix (e.g. `/vol1/bucket1` or `/vol1/bucket1/dir1`). - Shallower prefixes return `400 Bad Request`. + HTTP-level the parameter is optional (defaults to `/`), but the handler rejects + anything shallower than bucket level with `400 Bad Request`, so in practice + callers must supply one. schema: type: string + default: / - name: replicationType in: query required: false