From ee598c01b5f56db46af43bed59fedaf83818b5a2 Mon Sep 17 00:00:00 2001 From: JophieQu Date: Thu, 18 Sep 2025 15:53:17 +0800 Subject: [PATCH 1/4] feat: support pprof profiling --- pprof.graphqls | 174 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 pprof.graphqls diff --git a/pprof.graphqls b/pprof.graphqls new file mode 100644 index 0000000..fe591ff --- /dev/null +++ b/pprof.graphqls @@ -0,0 +1,174 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Request to create a pprof task +input PprofTaskCreationRequest { + # Define the service to execute the task + serviceId: ID! + # Define which instances need to execute tasks + serviceInstanceIds: [String!]! + # Define the duration of this task + duration: Int! + # Define which event types this task needs to collect. + events: PprofEventType! + # Define the period of the pprof dump + dumpPeriod: Int! +} + +# PprofTaskCreationResult is the result of the task creation request +type PprofTaskCreationResult { + # Code defines the status of the response, i.e. success or failure. + code: PprofTaskCreationType! + # ErrorReason gives detailed reason for the exception, if the code returned represents a kind of failure. + errorReason: String + # Task id, if code is SUCCESS. + id: String +} + +# Pprof task creation type +enum PprofTaskCreationType { + # Task created successfully + SUCCESS + # Task creation failed due to argument errors + ARGUMENT_ERROR + # The current service already has a pprof task executing + ALREADY_PROFILING_ERROR +} + +# Request to query pprof task list +input PprofTaskListRequest { + # ServiceId associated with the task + serviceId: ID! + # Time Range + queryDuration: Duration + # Limit defines the number of the tasks to be returned. + limit: Int +} + +# Request to query flame graph analyzation +input PprofAnalyzationRequest { + # Define which task to analyze + taskId: ID! + # InstanceIds defines the instances to be included for analysis + instanceIds: [String!]! +} + +# Define pprof task list result +type PprofTaskListResult { + # If it is null or empty, it means the task is created successfully, otherwise it gets the creation error reason + errorReason: String + + # Tasks is a list of pprof tasks belonging to the specific service + tasks: [PprofTask!] +} + +# Define pprof task data +# The fields definition is the same as PprofTaskCreationRequest +type PprofTask { + id: String! + serviceId: String! + serviceInstanceIds: [String!]! + createTime: Long! + events: PprofEventType! + duration: Int! + dumpPeriod: Int! +} + +# Define the flame graph results produced by pprof +type PprofStackTree { + elements: [PprofStackElement!] +} + +# Define the thread stack analyze tree element +type PprofStackElement { + # Id is the identity of the stack element + id: ID! + # ParentId is the identity of the parent stack element. Stack elements are organized as a tree. + parentId: ID! + # Method signatures in tree nodes + codeSignature: String! + # The total number of samples of the current tree node, including child nodes + total: Long! + # The sampling number of the current tree node, excluding samples of the children + self: Long! +} + +# Define the analysis results of the task +type PprofAnalyzation { + # Displaying the tree structure data required for the flame graph + tree: PprofStackTree +} + +# Defines task progress, including task logs, success and failure instances +type PprofTaskProgress { + # All task execution logs of the current task + logs: [PprofTaskLog!] + # ErrorInstanceIds gives instances that failed to execute the task + errorInstanceIds: [ID] + # SuccessInstanceIds gives instances that have executed the task successfully + successInstanceIds: [ID] +} + +# Define the log of a task executed by an instance +type PprofTaskLog { + # The task id + id: String! + # InstanceId is the id of the instance which reported this task log + instanceId: ID! + instanceName: String! + + operationType: PprofTaskLogOperationType! + operationTime: Long! +} + +# Define the execution progress of the task +enum PprofTaskLogOperationType { + # NOTIFIED means the task has been issued to the Agent + NOTIFIED, + # EXECUTION_FINISHED means the Agent has finished the execution + EXECUTION_FINISHED + # PPROF_UPLOAD_FILE_TOO_LARGE_ERROR means the Agent has finished the task but the target file is too large to be received by the OAP server + PPROF_UPLOAD_FILE_TOO_LARGE_ERROR + # EXECUTION_TASK_ERROR means potential execution error caused by the Agent + EXECUTION_TASK_ERROR +} + +# Defines which event types pprof needs to collect +enum PprofEventType { + CPU + HEAP + BLOCK + MUTEX + GOROUTINE + THREADCREATE + ALLOCS +} + +extend type Mutation { + # Create a new pprof task + createPprofTask(pprofTaskCreationRequest: PprofTaskCreationRequest!): PprofTaskCreationResult! +} + +extend type Query { + # Query all task lists and sort them in descending order by create time + queryPprofTaskList(request: PprofTaskListRequest!): PprofTaskListResult! + # Query task progress, including task logs + queryPprofTaskProgress(taskId: String!): PprofTaskProgress! + # Query the flame graph produced by pprof + queryPprofAnalyze(request: PprofAnalyzationRequest!): PprofAnalyzation! +} + From 6c7aa211e9d3fd8f81d7e238150862493e993bcd Mon Sep 17 00:00:00 2001 From: JophieQu Date: Fri, 19 Sep 2025 16:48:54 +0800 Subject: [PATCH 2/4] fix: modify event description of duration and dumpPeriod --- pprof.graphqls | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pprof.graphqls b/pprof.graphqls index fe591ff..0f4497c 100644 --- a/pprof.graphqls +++ b/pprof.graphqls @@ -21,12 +21,14 @@ input PprofTaskCreationRequest { serviceId: ID! # Define which instances need to execute tasks serviceInstanceIds: [String!]! - # Define the duration of this task - duration: Int! + # Define the duration of this task in minutes (required for CPU, BLOCK, MUTEX events) + duration: Int # Define which event types this task needs to collect. events: PprofEventType! - # Define the period of the pprof dump - dumpPeriod: Int! + # Define the period of the pprof dump (required for BLOCK, MUTEX events) + # For BLOCK event, it represents an average of one blocking event per rate nanoseconds spent blocked. The default value is 0, which means sampling is turned off. When the value is 1, all block events will be sampled. + # For MUTEX event, it represents sampling an average of one mutex contention event per rate occurrences. The default value is 0, which means sampling is turned off. When the value is 1, all mutex events will be sampled. + dumpPeriod: Int } # PprofTaskCreationResult is the result of the task creation request From 7a1ed534c2a95cad986d8bf6bc8273aa4d84ad70 Mon Sep 17 00:00:00 2001 From: JophieQu Date: Mon, 22 Sep 2025 15:38:40 +0800 Subject: [PATCH 3/4] fix --- pprof.graphqls | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pprof.graphqls b/pprof.graphqls index 0f4497c..88e48d9 100644 --- a/pprof.graphqls +++ b/pprof.graphqls @@ -71,7 +71,7 @@ input PprofAnalyzationRequest { # Define pprof task list result type PprofTaskListResult { - # If it is null or empty, it means the task is created successfully, otherwise it gets the creation error reason + # If null or empty, it means no error occurred while fetching the task list errorReason: String # Tasks is a list of pprof tasks belonging to the specific service @@ -86,8 +86,8 @@ type PprofTask { serviceInstanceIds: [String!]! createTime: Long! events: PprofEventType! - duration: Int! - dumpPeriod: Int! + duration: Int + dumpPeriod: Int } # Define the flame graph results produced by pprof From 4c89cd7de03e15a66f6d965e56f2b2f9a369f8c5 Mon Sep 17 00:00:00 2001 From: JophieQu Date: Mon, 22 Sep 2025 16:20:44 +0800 Subject: [PATCH 4/4] fix --- pprof.graphqls | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pprof.graphqls b/pprof.graphqls index 88e48d9..5d8467c 100644 --- a/pprof.graphqls +++ b/pprof.graphqls @@ -71,7 +71,7 @@ input PprofAnalyzationRequest { # Define pprof task list result type PprofTaskListResult { - # If null or empty, it means no error occurred while fetching the task list + # ErrorReason gives detailed reason for the exception, if the task list returned represents failure. errorReason: String # Tasks is a list of pprof tasks belonging to the specific service