From c7085fb78c1e882cdc44da653cf2aa5a299f8bd0 Mon Sep 17 00:00:00 2001 From: Rohit Yanamadala Date: Sun, 23 Nov 2025 00:50:58 -0800 Subject: [PATCH 1/9] Update GitHub Action --- .github/workflows/stale-bot.yml | 27 +- .../adk_stale_agent/PROMPT_INSTRUCTION.txt | 100 ++- contributing/samples/adk_stale_agent/agent.py | 758 +++++++++--------- contributing/samples/adk_stale_agent/main.py | 133 ++- .../samples/adk_stale_agent/settings.py | 8 +- contributing/samples/adk_stale_agent/utils.py | 246 +++++- 6 files changed, 783 insertions(+), 489 deletions(-) diff --git a/.github/workflows/stale-bot.yml b/.github/workflows/stale-bot.yml index 882cb7b432..1fe5665c16 100644 --- a/.github/workflows/stale-bot.yml +++ b/.github/workflows/stale-bot.yml @@ -1,57 +1,44 @@ -# .github/workflows/stale-issue-auditor.yml - -# Best Practice: Always have a 'name' field at the top. name: ADK Stale Issue Auditor -# The 'on' block defines the triggers. on: - # The 'workflow_dispatch' trigger allows manual runs. workflow_dispatch: - # The 'schedule' trigger runs the bot on a timer. schedule: # This runs at 6:00 AM UTC (e.g., 10 PM PST). - cron: '0 6 * * *' -# The 'jobs' block contains the work to be done. jobs: - # A unique ID for the job. + run-agent: + timeout-minutes: 60 audit-stale-issues: - # The runner environment. runs-on: ubuntu-latest - # Permissions for the job's temporary GITHUB_TOKEN. - # These are standard and syntactically correct. permissions: issues: write contents: read - # The sequence of steps for the job. steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.11' - name: Install dependencies - # The '|' character allows for multi-line shell commands. run: | python -m pip install --upgrade pip pip install requests google-adk - name: Run Auditor Agent Script - # The 'env' block for setting environment variables. env: GITHUB_TOKEN: ${{ secrets.ADK_TRIAGE_AGENT }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} - OWNER: google - REPO: adk-python - ISSUES_PER_RUN: 100 + OWNER: ${{ github.repository_owner }} + REPO: ${{ github.event.repository.name }} + CONCURRENCY_LIMIT: 3 LLM_MODEL_NAME: "gemini-2.5-flash" PYTHONPATH: contributing/samples - # The final 'run' command. run: python -m adk_stale_agent.main \ No newline at end of file diff --git a/contributing/samples/adk_stale_agent/PROMPT_INSTRUCTION.txt b/contributing/samples/adk_stale_agent/PROMPT_INSTRUCTION.txt index bb31889b23..1c83aa662f 100644 --- a/contributing/samples/adk_stale_agent/PROMPT_INSTRUCTION.txt +++ b/contributing/samples/adk_stale_agent/PROMPT_INSTRUCTION.txt @@ -1,40 +1,68 @@ -You are a highly intelligent and transparent repository auditor for '{OWNER}/{REPO}'. -Your job is to analyze all open issues and report on your findings before taking any action. +You are a highly intelligent repository auditor for '{OWNER}/{REPO}'. +Your job is to analyze a specific issue and report findings before taking action. **Primary Directive:** Ignore any events from users ending in `[bot]`. -**Reporting Directive:** For EVERY issue you analyze, you MUST output a concise, human-readable summary, starting with "Analysis for Issue #[number]:". +**Reporting Directive:** Output a concise summary starting with "Analysis for Issue #[number]:". + +**THRESHOLDS:** +- Stale Threshold: {stale_threshold_days} days. +- Close Threshold: {close_threshold_days} days. **WORKFLOW:** -1. **Context Gathering**: Call `get_repository_maintainers` and `get_all_open_issues`. -2. **Per-Issue Analysis**: For each issue, call `get_issue_state`, passing in the maintainers list. -3. **Decision & Reporting**: Based on the summary from `get_issue_state`, follow this strict decision tree in order. - ---- **DECISION TREE & REPORTING TEMPLATES** --- - -**STEP 1: CHECK FOR ACTIVITY (IS THE ISSUE ACTIVE?)** -- **Condition**: Was the last human action NOT from a maintainer? (i.e., `last_human_commenter_is_maintainer` is `False`). -- **Action**: The author or a third party has acted. The issue is ACTIVE. - - **Report and Action**: If '{STALE_LABEL_NAME}' is present, report: "Analysis for Issue #[number]: Issue is ACTIVE. The last action was a [action type] by a non-maintainer. To get the [action type], you MUST use the value from the 'last_human_action_type' field in the summary you received from the tool." Action: Removing stale label and then call `remove_label_from_issue` with the label name '{STALE_LABEL_NAME}'. Otherwise, report: "Analysis for Issue #[number]: Issue is ACTIVE. No stale label to remove. Action: None." -- **If this condition is met, stop processing this issue.** - -**STEP 2: IF PENDING, MANAGE THE STALE LIFECYCLE.** -- **Condition**: The last human action WAS from a maintainer (`last_human_commenter_is_maintainer` is `True`). The issue is PENDING. -- **Action**: You must now determine the correct state. - - - **First, check if the issue is already STALE.** - - **Condition**: Is the `'{STALE_LABEL_NAME}'` label present in `current_labels`? - - **Action**: The issue is STALE. Your only job is to check if it should be closed. - - **Get Time Difference**: Call `calculate_time_difference` with the `stale_label_applied_at` timestamp. - - **Decision & Report**: If `hours_passed` > **{CLOSE_HOURS_AFTER_STALE_THRESHOLD}**: Report "Analysis for Issue #[number]: STALE. Close threshold met ({CLOSE_HOURS_AFTER_STALE_THRESHOLD} hours) with no author activity." Action: Closing issue and then call `close_as_stale`. Otherwise, report "Analysis for Issue #[number]: STALE. Close threshold not yet met. Action: None." - - - **ELSE (the issue is PENDING but not yet stale):** - - **Analyze Intent**: Semantically analyze the `last_maintainer_comment_text`. Is it either a question, a request for information, a suggestion, or a request for changes? - - **If YES (it is either a question, a request for information, a suggestion, or a request for changes)**: - - **CRITICAL CHECK**: Now, you must verify the author has not already responded. Compare the `last_author_event_time` and the `last_maintainer_comment_time`. - - **IF the author has NOT responded** (i.e., `last_author_event_time` is older than `last_maintainer_comment_time` or is null): - - **Get Time Difference**: Call `calculate_time_difference` with the `last_maintainer_comment_time`. - - **Decision & Report**: If `hours_passed` > **{STALE_HOURS_THRESHOLD}**: Report "Analysis for Issue #[number]: PENDING. Stale threshold met ({STALE_HOURS_THRESHOLD} hours)." Action: Marking as stale and then call `add_stale_label_and_comment` and if label name '{REQUEST_CLARIFICATION_LABEL}' is missing then call `add_label_to_issue` with the label name '{REQUEST_CLARIFICATION_LABEL}'. Otherwise, report: "Analysis for Issue #[number]: PENDING. Stale threshold not met. Action: None." - - **ELSE (the author HAS responded)**: - - **Report**: "Analysis for Issue #[number]: PENDING, but author has already responded to the last maintainer request. Action: None." - - **If NO (it is not a request):** - - **Report**: "Analysis for Issue #[number]: PENDING. Maintainer's last comment was not a request. Action: None." \ No newline at end of file +1. **Context Gathering**: Call `get_issue_state`. +2. **Decision**: Follow this strict decision tree using the data returned by the tool. + +--- **DECISION TREE** --- + +**STEP 1: CHECK IF ALREADY STALE** +- **Condition**: Is `is_stale` (from tool) **True**? +- **Action**: + - **Check Role**: Look at `last_action_role`. + + - **IF 'author' OR 'other_user'**: + - **Context**: The user has responded. The issue is now ACTIVE. + - **Action 1**: Call `remove_label_from_issue` with '{STALE_LABEL_NAME}'. + - **Action 2 (ALERT CHECK)**: Look at `maintainer_alert_needed`. + - **IF True**: User edited description silently. + -> **Action**: Call `alert_maintainer_of_edit`. + - **IF False**: User commented normally. No alert needed. + - **Report**: "Analysis for Issue #[number]: ACTIVE. User activity detected. Removed stale label." + + - **IF 'maintainer'**: + - **Check Time**: Check `days_since_stale_label`. + - **If `days_since_stale_label` > {close_threshold_days}**: + - **Action**: Call `close_as_stale`. + - **Report**: "Analysis for Issue #[number]: STALE. Close threshold met. Closing." + - **Else**: + - **Report**: "Analysis for Issue #[number]: STALE. Waiting for close threshold. No action." + +**STEP 2: CHECK IF ACTIVE (NOT STALE)** +- **Condition**: `is_stale` is **False**. +- **Action**: + - **Check Role**: If `last_action_role` is 'author' or 'other_user': + - **Context**: The issue is Active. + - **Action (ALERT CHECK)**: Look at `maintainer_alert_needed`. + - **IF True**: The user edited the description silently, and we haven't alerted yet. + -> **Action**: Call `alert_maintainer_of_edit`. + -> **Report**: "Analysis for Issue #[number]: ACTIVE. Silent update detected (Description Edit). Alerted maintainer." + - **IF False**: + -> **Report**: "Analysis for Issue #[number]: ACTIVE. Last action was by user. No action." + + - **Check Role**: If `last_action_role` is 'maintainer': + - **Proceed to STEP 3.** + +**STEP 3: ANALYZE MAINTAINER INTENT** +- **Context**: The last person to act was a Maintainer. +- **Action**: Read the text in `last_comment_text`. + - **Question Check**: Does the text ask a question, request clarification, ask for logs, or suggest trying a fix? + - **Time Check**: Is `days_since_activity` > {stale_threshold_days}? + + - **DECISION**: + - **IF (Question == YES) AND (Time == YES)**: + - **Action**: Call `add_stale_label_and_comment`. + - **Check**: If '{REQUEST_CLARIFICATION_LABEL}' is not in `current_labels`, call `add_label_to_issue` for it. + - **Report**: "Analysis for Issue #[number]: STALE. Maintainer asked question [days_since_activity] days ago. Marking stale." + - **IF (Question == YES) BUT (Time == NO)**: + - **Report**: "Analysis for Issue #[number]: PENDING. Maintainer asked question, but threshold not met yet. No action." + - **IF (Question == NO)** (e.g., "I am working on this"): + - **Report**: "Analysis for Issue #[number]: ACTIVE. Maintainer gave status update (not a question). No action." \ No newline at end of file diff --git a/contributing/samples/adk_stale_agent/agent.py b/contributing/samples/adk_stale_agent/agent.py index abcb128288..5693e050eb 100644 --- a/contributing/samples/adk_stale_agent/agent.py +++ b/contributing/samples/adk_stale_agent/agent.py @@ -12,423 +12,437 @@ # See the License for the specific language governing permissions and # limitations under the License. -from datetime import datetime -from datetime import timezone import logging import os -from typing import Any - -from adk_stale_agent.settings import CLOSE_HOURS_AFTER_STALE_THRESHOLD -from adk_stale_agent.settings import GITHUB_BASE_URL -from adk_stale_agent.settings import ISSUES_PER_RUN -from adk_stale_agent.settings import LLM_MODEL_NAME -from adk_stale_agent.settings import OWNER -from adk_stale_agent.settings import REPO -from adk_stale_agent.settings import REQUEST_CLARIFICATION_LABEL -from adk_stale_agent.settings import STALE_HOURS_THRESHOLD -from adk_stale_agent.settings import STALE_LABEL_NAME -from adk_stale_agent.utils import delete_request -from adk_stale_agent.utils import error_response -from adk_stale_agent.utils import get_request -from adk_stale_agent.utils import patch_request -from adk_stale_agent.utils import post_request import dateutil.parser +from datetime import datetime, timezone +from typing import Any, List, Dict, Optional + +from adk_stale_agent.settings import ( + GITHUB_BASE_URL, + OWNER, + REPO, + LLM_MODEL_NAME, + STALE_LABEL_NAME, + REQUEST_CLARIFICATION_LABEL, + STALE_HOURS_THRESHOLD, + CLOSE_HOURS_AFTER_STALE_THRESHOLD, +) +from adk_stale_agent.utils import ( + post_request, + delete_request, + patch_request, + error_response, + get_request, +) from google.adk.agents.llm_agent import Agent from requests.exceptions import RequestException logger = logging.getLogger("google_adk." + __name__) -# --- Primary Tools for the Agent --- +# --- Constants --- +BOT_ALERT_SIGNATURE = "**Notification:** The author has updated the issue description" +# --- Global Cache --- +_MAINTAINERS_CACHE: Optional[List[str]] = None -def load_prompt_template(filename: str) -> str: - """Loads the prompt text file from the same directory as this script. - Args: - filename: The name of the prompt file to load. +def _get_cached_maintainers() -> List[str]: + """ + Fetches the list of repository maintainers once and caches it. - Returns: - The content of the file as a string. - """ - file_path = os.path.join(os.path.dirname(__file__), filename) + Returns: + List[str]: A list of GitHub usernames with push access. + """ + global _MAINTAINERS_CACHE + if _MAINTAINERS_CACHE is not None: + return _MAINTAINERS_CACHE - with open(file_path, "r") as f: - return f.read() + logger.info("Initializing Maintainers Cache...") + try: + url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/collaborators" + params = {"permission": "push"} + data = get_request(url, params) + _MAINTAINERS_CACHE = [u["login"] for u in data] + logger.info(f"Cached {len(_MAINTAINERS_CACHE)} maintainers.") + except Exception as e: + logger.error(f"Failed to fetch maintainers: {e}") + _MAINTAINERS_CACHE = [] + return _MAINTAINERS_CACHE -PROMPT_TEMPLATE = load_prompt_template("PROMPT_INSTRUCTION.txt") +def load_prompt_template(filename: str) -> str: + """ + Loads the raw text content of a prompt file. + Args: + filename (str): The name of the file (e.g., 'PROMPT_INSTRUCTION.txt'). -def get_repository_maintainers() -> dict[str, Any]: - """ - Fetches the list of repository collaborators with 'push' (write) access or higher. - This should only be called once per run. - - Returns: - A dictionary with the status and a list of maintainer usernames, or an - error dictionary. - """ - logger.debug("Fetching repository maintainers with push access...") - try: - url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/collaborators" - params = {"permission": "push"} - collaborators_data = get_request(url, params) - - maintainers = [user["login"] for user in collaborators_data] - logger.info(f"Found {len(maintainers)} repository maintainers.") - logger.debug(f"Maintainer list: {maintainers}") - - return {"status": "success", "maintainers": maintainers} - except RequestException as e: - logger.error(f"Failed to fetch repository maintainers: {e}", exc_info=True) - return error_response(f"Error fetching repository maintainers: {e}") - - -def get_all_open_issues() -> dict[str, Any]: - """Fetches a batch of the oldest open issues for an audit. - - Returns: - A dictionary containing the status and a list of open issues, or an error - dictionary. - """ - logger.info( - f"Fetching a batch of {ISSUES_PER_RUN} oldest open issues for audit..." - ) - url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues" - params = { - "state": "open", - "sort": "created", - "direction": "asc", - "per_page": ISSUES_PER_RUN, - } - try: - items = get_request(url, params) - logger.info(f"Found {len(items)} open issues to audit.") - return {"status": "success", "items": items} - except RequestException as e: - logger.error(f"Failed to fetch open issues: {e}", exc_info=True) - return error_response(f"Error fetching all open issues: {e}") - - -def get_issue_state(item_number: int, maintainers: list[str]) -> dict[str, Any]: - """Analyzes an issue's complete history to create a comprehensive state summary. - - This function acts as the primary "detective" for the agent. It performs the - complex, deterministic work of fetching and parsing an issue's full history, - allowing the LLM agent to focus on high-level semantic decision-making. - - It is designed to be highly robust by fetching the complete, multi-page history - from the GitHub `/timeline` API. By handling pagination correctly, it ensures - that even issues with a very long history (more than 100 events) are analyzed - in their entirety, preventing incorrect decisions based on incomplete data. - - Args: - item_number (int): The number of the GitHub issue or pull request to analyze. - maintainers (list[str]): A dynamically fetched list of GitHub usernames to be - considered maintainers. This is used to categorize actors found in - the issue's history. - - Returns: - A dictionary that serves as a clean, factual report summarizing the - issue's state. On failure, it returns a dictionary with an 'error' status. - """ - try: - # Fetch core issue data and prepare for timeline fetching. - logger.debug(f"Fetching full timeline for issue #{item_number}...") - issue_url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}" - issue_data = get_request(issue_url) - - # Fetch All pages from the timeline API to build a complete history. - timeline_url_base = f"{issue_url}/timeline" - timeline_data = [] - page = 1 - - while True: - paginated_url = f"{timeline_url_base}?per_page=100&page={page}" - logger.debug(f"Fetching timeline page {page} for issue #{item_number}...") - events_page = get_request(paginated_url) - if not events_page: - break - timeline_data.extend(events_page) - if len(events_page) < 100: - break - page += 1 - - logger.debug( - f"Fetched a total of {len(timeline_data)} timeline events across" - f" {page-1} page(s) for issue #{item_number}." - ) + Returns: + str: The file content. + """ + file_path = os.path.join(os.path.dirname(__file__), filename) + with open(file_path, "r") as f: + return f.read() - # Initialize key variables for the analysis. - issue_author = issue_data.get("user", {}).get("login") - current_labels = [label["name"] for label in issue_data.get("labels", [])] - - # Filter and sort all events into a clean, chronological history of human activity. - human_events = [] - for event in timeline_data: - actor = event.get("actor", {}).get("login") - timestamp_str = event.get("created_at") or event.get("submitted_at") - - if not actor or not timestamp_str or actor.endswith("[bot]"): - continue - - event["parsed_time"] = dateutil.parser.isoparse(timestamp_str) - human_events.append(event) - - human_events.sort(key=lambda e: e["parsed_time"]) - - # Find the most recent, relevant events by iterating backwards. - last_maintainer_comment = None - stale_label_event_time = None - - for event in reversed(human_events): - if ( - not last_maintainer_comment - and event.get("actor", {}).get("login") in maintainers - and event.get("event") == "commented" - ): - last_maintainer_comment = event - - if ( - not stale_label_event_time - and event.get("event") == "labeled" - and event.get("label", {}).get("name") == STALE_LABEL_NAME - ): - stale_label_event_time = event["parsed_time"] - - if last_maintainer_comment and stale_label_event_time: - break - - last_author_action = next( - ( - e - for e in reversed(human_events) - if e.get("actor", {}).get("login") == issue_author - ), - None, - ) - # Build and return the final summary report for the LLM agent. - last_human_event = human_events[-1] if human_events else None - last_human_actor = ( - last_human_event.get("actor", {}).get("login") - if last_human_event - else None - ) - - return { - "status": "success", - "issue_author": issue_author, - "current_labels": current_labels, - "last_maintainer_comment_text": ( - last_maintainer_comment.get("body") - if last_maintainer_comment - else None - ), - "last_maintainer_comment_time": ( - last_maintainer_comment["parsed_time"].isoformat() - if last_maintainer_comment - else None - ), - "last_author_event_time": ( - last_author_action["parsed_time"].isoformat() - if last_author_action - else None - ), - "last_author_action_type": ( - last_author_action.get("event") if last_author_action else "unknown" - ), - "last_human_action_type": ( - last_human_event.get("event") if last_human_event else "unknown" - ), - "last_human_commenter_is_maintainer": ( - last_human_actor in maintainers if last_human_actor else False - ), - "stale_label_applied_at": ( - stale_label_event_time.isoformat() - if stale_label_event_time - else None - ), - } - - except RequestException as e: - logger.error( - f"Failed to fetch comprehensive issue state for #{item_number}: {e}", - exc_info=True, - ) - return error_response( - f"Error getting comprehensive issue state for #{item_number}: {e}" - ) +PROMPT_TEMPLATE = load_prompt_template("PROMPT_INSTRUCTION.txt") -def calculate_time_difference(timestamp_str: str) -> dict[str, Any]: - """Calculates the difference in hours between a UTC timestamp string and now. - - Args: - timestamp_str: An ISO 8601 formatted timestamp string. - - Returns: - A dictionary with the status and the time difference in hours, or an error - dictionary. - """ - try: - if not timestamp_str: - return error_response("Input timestamp is empty.") - event_time = dateutil.parser.isoparse(timestamp_str) - current_time_utc = datetime.now(timezone.utc) - time_difference = current_time_utc - event_time - hours_passed = time_difference.total_seconds() / 3600 - return {"status": "success", "hours_passed": hours_passed} - except (dateutil.parser.ParserError, TypeError) as e: - logger.error( - "Error calculating time difference for timestamp" - f" '{timestamp_str}': {e}", - exc_info=True, - ) - return error_response(f"Error calculating time difference: {e}") +def get_issue_state(item_number: int) -> Dict[str, Any]: + """ + Retrieves the comprehensive state of a GitHub issue using GraphQL. + + This function constructs a unified timeline of comments, body edits, + renames, and reopens to determine who the *absolute last* actor was. + It handles 'Ghost Edits' (description updates without comments) and + prevents spamming alerts if the bot has already notified maintainers. + + Args: + item_number (int): The GitHub issue number. + + Returns: + Dict[str, Any]: A dictionary containing: + - last_action_role (str): 'author', 'maintainer', or 'other_user'. + - is_stale (bool): Whether the issue is currently marked stale. + - maintainer_alert_needed (bool): True if a silent edit needs an alert. + - days_since_activity (float): Days since the last human action. + - ... and other metadata for the LLM. + """ + maintainers = _get_cached_maintainers() + + # GraphQL Query: Fetches Comments, Edits, and Timeline Events in one go. + query = """ + query($owner: String!, $name: String!, $number: Int!) { + repository(owner: $owner, name: $name) { + issue(number: $number) { + author { login } + createdAt + labels(first: 20) { nodes { name } } + + # 1. Comments (Fetch last 30 to scan for previous bot alerts) + comments(last: 30) { + nodes { + author { login } + body + createdAt + lastEditedAt + } + } + + # 2. Description Edits (Fetch last 10) + userContentEdits(last: 10) { + nodes { + editor { login } + editedAt + } + } + + # 3. Timeline Events (Renames, Reopens, Labels) + timelineItems(itemTypes: [LABELED_EVENT, RENAMED_TITLE_EVENT, REOPENED_EVENT], last: 20) { + nodes { + __typename + ... on LabeledEvent { + createdAt + actor { login } + label { name } + } + ... on RenamedTitleEvent { + createdAt + actor { login } + } + ... on ReopenedEvent { + createdAt + actor { login } + } + } + } + } + } + } + """ + + variables = {"owner": OWNER, "name": REPO, "number": item_number} + + try: + response = post_request( + f"{GITHUB_BASE_URL}/graphql", {"query": query, "variables": variables} + ) + + if "errors" in response: + msg = response["errors"][0]["message"] + return error_response(f"GraphQL Error: {msg}") + + data = response.get("data", {}).get("repository", {}).get("issue", {}) + if not data: + return error_response(f"Issue #{item_number} not found.") + + # --- Data Parsing --- + issue_author = data.get("author", {}).get("login") + labels_list = [l["name"] for l in data.get("labels", {}).get("nodes", [])] + + # We build a unified list of ALL events to replay history chronologically. + history = [] + last_bot_alert_time = None + + # 1. Baseline: Issue Creation + history.append({ + "type": "created", + "actor": issue_author, + "time": dateutil.parser.isoparse(data["createdAt"]), + "data": None, + }) + + # 2. Process Comments + for c in data.get("comments", {}).get("nodes", []): + actor = c.get("author", {}).get("login") + c_body = c.get("body", "") + c_time = dateutil.parser.isoparse(c.get("createdAt")) + + # Check if the bot has already alerted about a silent edit in this thread + if BOT_ALERT_SIGNATURE in c_body: + if last_bot_alert_time is None or c_time > last_bot_alert_time: + last_bot_alert_time = c_time + + # Add human comments to history + if actor and not actor.endswith("[bot]"): + e_time = c.get("lastEditedAt") + # Use edit time if available, otherwise creation time + actual_time = dateutil.parser.isoparse(e_time) if e_time else c_time + history.append({ + "type": "commented", + "actor": actor, + "time": actual_time, + "data": c_body, + }) + + # 3. Process Body Edits + for e in data.get("userContentEdits", {}).get("nodes", []): + actor = e.get("editor", {}).get("login") + if actor and not actor.endswith("[bot]"): + history.append({ + "type": "edited_description", + "actor": actor, + "time": dateutil.parser.isoparse(e.get("editedAt")), + "data": None, + }) + + # 4. Process Timeline Events (Labels, Renames, Reopens) + label_events = [] + for t in data.get("timelineItems", {}).get("nodes", []): + etype = t.get("__typename") + actor = t.get("actor", {}).get("login") + time_val = dateutil.parser.isoparse(t.get("createdAt")) + + # Store stale label events separately for timing calculations + if etype == "LabeledEvent": + if t.get("label", {}).get("name") == STALE_LABEL_NAME: + label_events.append(time_val) + continue + + if actor and not actor.endswith("[bot]"): + pretty_type = ( + "renamed_title" if etype == "RenamedTitleEvent" else "reopened" + ) + history.append({ + "type": pretty_type, + "actor": actor, + "time": time_val, + "data": None, + }) + + # --- History Replay (Chronological Sort) --- + history.sort(key=lambda x: x["time"]) + + last_action_role = "author" # Default start state + last_activity_time = history[0]["time"] + last_action_type = "created" + last_comment_text = None + + logger.debug(f"--- Activity Trace for #{item_number} ---") + + for event in history: + actor = event["actor"] + etype = event["type"] + + # Determine Role + role = "other_user" + if actor == issue_author: + role = "author" + elif actor in maintainers: + role = "maintainer" + + # Log the event trace for debugging + logger.debug( + f" [{event['time'].strftime('%m-%d %H:%M')}] " + f"{etype.upper()} by {actor} ({role})" + ) + + # Update State (The last event in the list wins) + last_action_role = role + last_activity_time = event["time"] + last_action_type = etype + + if etype == "commented": + last_comment_text = event["data"] + else: + last_comment_text = None + + # --- Spam Prevention / Alert Logic --- + maintainer_alert_needed = False + # If the User edited the description (silent action) and we haven't alerted AFTER that edit... + if ( + last_action_role in ["author", "other_user"] + and last_action_type == "edited_description" + ): + if last_bot_alert_time and last_bot_alert_time > last_activity_time: + maintainer_alert_needed = False + logger.info( + f"#{item_number}: Silent edit detected, but Bot already alerted at " + f"{last_bot_alert_time.strftime('%m-%d %H:%M')}. No spam." + ) + else: + maintainer_alert_needed = True + logger.info(f"#{item_number}: Silent edit detected. Alert needed.") + + # --- Final Metric Calculations --- + current_time = datetime.now(timezone.utc) + days_since_activity = ( + current_time - last_activity_time + ).total_seconds() / 86400 + + is_stale = STALE_LABEL_NAME in labels_list + days_since_stale_label = 0.0 + if is_stale and label_events: + # Calculate time from the MOST RECENT application of the stale label + latest_label_time = max(label_events) + days_since_stale_label = ( + current_time - latest_label_time + ).total_seconds() / 86400 + + logger.debug( + f" -> FINAL VERDICT: Last Actor = {last_action_role.upper()}, " + f"Idle = {days_since_activity:.2f} days" + ) + + return { + "status": "success", + "last_action_role": last_action_role, + "last_action_type": last_action_type, + "maintainer_alert_needed": maintainer_alert_needed, + "is_stale": is_stale, + "days_since_activity": days_since_activity, + "days_since_stale_label": days_since_stale_label, + "last_comment_text": last_comment_text, + "current_labels": labels_list, + "stale_threshold_days": STALE_HOURS_THRESHOLD / 24, + "close_threshold_days": CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24, + } + + except RequestException as e: + return error_response(f"Network Error: {e}") + + +# --- Tool Definitions --- def add_label_to_issue(item_number: int, label_name: str) -> dict[str, Any]: - """Adds a specific label to an issue. - - Args: - item_number: The issue number. - label_name: The name of the label to add. - - Returns: - A dictionary indicating the status of the operation. - """ - logger.debug(f"Adding label '{label_name}' to issue #{item_number}.") - url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/labels" - try: - post_request(url, [label_name]) - logger.info( - f"Successfully added label '{label_name}' to issue #{item_number}." - ) - return {"status": "success"} - except RequestException as e: - logger.error(f"Failed to add '{label_name}' to issue #{item_number}: {e}") - return error_response(f"Error adding label: {e}") - - -def remove_label_from_issue( - item_number: int, label_name: str -) -> dict[str, Any]: - """Removes a specific label from an issue or PR. - - Args: - item_number: The issue number. - label_name: The name of the label to remove. - - Returns: - A dictionary indicating the status of the operation. - """ - logger.debug(f"Removing label '{label_name}' from issue #{item_number}.") - url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/labels/{label_name}" - try: - delete_request(url) - logger.info( - f"Successfully removed label '{label_name}' from issue #{item_number}." - ) - return {"status": "success"} - except RequestException as e: - logger.error( - f"Failed to remove '{label_name}' from issue #{item_number}: {e}" - ) - return error_response(f"Error removing label: {e}") + """Adds a label to the issue.""" + logger.debug(f"Adding label '{label_name}' to issue #{item_number}.") + url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/labels" + try: + post_request(url, [label_name]) + return {"status": "success"} + except RequestException as e: + return error_response(f"Error adding label: {e}") + + +def remove_label_from_issue(item_number: int, label_name: str) -> dict[str, Any]: + """Removes a label from the issue.""" + logger.debug(f"Removing label '{label_name}' from issue #{item_number}.") + url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/labels/{label_name}" + try: + delete_request(url) + return {"status": "success"} + except RequestException as e: + return error_response(f"Error removing label: {e}") def add_stale_label_and_comment(item_number: int) -> dict[str, Any]: - """Adds the 'stale' label to an issue and posts a comment explaining why. - - Args: - item_number: The issue number. - - Returns: - A dictionary indicating the status of the operation. - """ - logger.debug(f"Adding stale label and comment to issue #{item_number}.") - comment = ( - "This issue has been automatically marked as stale because it has not" - " had recent activity after a maintainer requested clarification. It" - " will be closed if no further activity occurs within" - f" {CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24:.0f} days." - ) - try: - post_request( - f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/comments", - {"body": comment}, + """Marks the issue as stale with a comment and label.""" + comment = ( + "This issue has been automatically marked as stale because it has not" + " had recent activity after a maintainer requested clarification. It" + " will be closed if no further activity occurs within" + f" {CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24:.0f} days." ) - post_request( - f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/labels", - [STALE_LABEL_NAME], + try: + post_request( + f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/comments", + {"body": comment}, + ) + post_request( + f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/labels", + [STALE_LABEL_NAME], + ) + return {"status": "success"} + except RequestException as e: + return error_response(f"Error marking issue as stale: {e}") + + +def alert_maintainer_of_edit(item_number: int) -> dict[str, Any]: + """Posts a comment alerting maintainers of a silent description update.""" + comment = ( + "**Notification:** The author has updated the issue description. " + "Maintainers, please review." ) - logger.info(f"Successfully marked issue #{item_number} as stale.") - return {"status": "success"} - except RequestException as e: - logger.error( - f"Failed to mark issue #{item_number} as stale: {e}", exc_info=True - ) - return error_response(f"Error marking issue as stale: {e}") + try: + post_request( + f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/comments", + {"body": comment}, + ) + return {"status": "success"} + except RequestException as e: + return error_response(f"Error posting alert: {e}") def close_as_stale(item_number: int) -> dict[str, Any]: - """Posts a final comment and closes an issue or PR as stale. - - Args: - item_number: The issue number. - - Returns: - A dictionary indicating the status of the operation. - """ - logger.debug(f"Closing issue #{item_number} as stale.") - comment = ( - "This has been automatically closed because it has been marked as stale" - f" for over {CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24:.0f} days." - ) - try: - post_request( - f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/comments", - {"body": comment}, - ) - patch_request( - f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}", - {"state": "closed"}, + """Closes the issue as not planned/stale.""" + comment = ( + "This has been automatically closed because it has been marked as stale" + f" for over {CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24:.0f} days." ) - logger.info(f"Successfully closed issue #{item_number} as stale.") - return {"status": "success"} - except RequestException as e: - logger.error( - f"Failed to close issue #{item_number} as stale: {e}", exc_info=True - ) - return error_response(f"Error closing issue: {e}") - - -# --- Agent Definition --- + try: + post_request( + f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/comments", + {"body": comment}, + ) + patch_request( + f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}", + {"state": "closed"}, + ) + return {"status": "success"} + except RequestException as e: + return error_response(f"Error closing issue: {e}") root_agent = Agent( model=LLM_MODEL_NAME, name="adk_repository_auditor_agent", - description=( - "Audits open issues to manage their state based on conversation" - " history." - ), + description="Audits open issues.", instruction=PROMPT_TEMPLATE.format( OWNER=OWNER, REPO=REPO, STALE_LABEL_NAME=STALE_LABEL_NAME, REQUEST_CLARIFICATION_LABEL=REQUEST_CLARIFICATION_LABEL, - STALE_HOURS_THRESHOLD=STALE_HOURS_THRESHOLD, - CLOSE_HOURS_AFTER_STALE_THRESHOLD=CLOSE_HOURS_AFTER_STALE_THRESHOLD, + stale_threshold_days=STALE_HOURS_THRESHOLD / 24, + close_threshold_days=CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24, ), tools=[ + alert_maintainer_of_edit, + get_issue_state, add_label_to_issue, + remove_label_from_issue, add_stale_label_and_comment, - calculate_time_difference, close_as_stale, - get_all_open_issues, - get_issue_state, - get_repository_maintainers, - remove_label_from_issue, ], -) +) \ No newline at end of file diff --git a/contributing/samples/adk_stale_agent/main.py b/contributing/samples/adk_stale_agent/main.py index f6fba3fba0..e91ee72cf3 100644 --- a/contributing/samples/adk_stale_agent/main.py +++ b/contributing/samples/adk_stale_agent/main.py @@ -17,8 +17,12 @@ import time from adk_stale_agent.agent import root_agent -from adk_stale_agent.settings import OWNER -from adk_stale_agent.settings import REPO +from adk_stale_agent.settings import OWNER, REPO, STALE_HOURS_THRESHOLD, CONCURRENCY_LIMIT +from adk_stale_agent.utils import ( + get_api_call_count, + get_old_open_issue_numbers, + reset_api_call_count, +) from google.adk.cli.utils import logs from google.adk.runners import InMemoryRunner from google.genai import types @@ -26,49 +30,112 @@ logs.setup_adk_logger(level=logging.INFO) logger = logging.getLogger("google_adk." + __name__) -APP_NAME = "adk_stale_agent_app" -USER_ID = "adk_stale_agent_user" +APP_NAME = "stale_bot_app" +USER_ID = "stale_bot_user" - -async def main(): - """Initializes and runs the stale issue agent.""" - logger.info("--- Starting Stale Agent Run ---") +async def process_single_issue(issue_number: int): + """Processes a single GitHub issue and logs its metrics.""" + issue_start_time = time.time() + # Reset counter for each individual issue to get isolated metrics + reset_api_call_count() + + logger.info(f"Processing Issue #{issue_number}...") + runner = InMemoryRunner(agent=root_agent, app_name=APP_NAME) session = await runner.session_service.create_session( user_id=USER_ID, app_name=APP_NAME ) + prompt_text = f"Audit Issue #{issue_number}." + prompt_message = types.Content(role="user", parts=[types.Part(text=prompt_text)]) - prompt_text = ( - "Find and process all open issues to manage staleness according to your" - " rules." + try: + async for event in runner.run_async( + user_id=USER_ID, session_id=session.id, new_message=prompt_message + ): + if ( + event.content + and event.content.parts + and hasattr(event.content.parts[0], "text") + ): + text = event.content.parts[0].text + if text: + logger.info(f"#{issue_number} Decision: {text[:150]}...") + except Exception as e: + logger.error(f"Error processing issue #{issue_number}: {e}") + + # --- Logging is now inside this function --- + issue_duration = time.time() - issue_start_time + issue_api_calls = get_api_call_count() + + logger.info( + f"Issue #{issue_number} finished in {issue_duration:.2f} seconds " + f"with {issue_api_calls} API calls." ) - logger.info(f"Initial Agent Prompt: {prompt_text}\n") - prompt_message = types.Content( - role="user", parts=[types.Part(text=prompt_text)] + # Return metrics for final summary + return issue_duration, issue_api_calls + + +async def main(): + """Main function to run the stale issue bot concurrently.""" + logger.info(f"--- Starting Stale Bot for {OWNER}/{REPO} ---") + logger.info(f"Concurrency level set to {CONCURRENCY_LIMIT}") + + reset_api_call_count() + filter_days = STALE_HOURS_THRESHOLD / 24 + + all_issues = get_old_open_issue_numbers(OWNER, REPO, days_old=filter_days) + total_count = len(all_issues) + search_api_calls = get_api_call_count() + + if total_count == 0: + logger.info("No issues matched the criteria. Run finished.") + return + + logger.info( + f"Found {total_count} issues to process. " + f"(Initial search used {search_api_calls} API calls)." ) - async for event in runner.run_async( - user_id=USER_ID, session_id=session.id, new_message=prompt_message - ): - if ( - event.content - and event.content.parts - and hasattr(event.content.parts[0], "text") - ): - # Print the agent's "thoughts" and actions for logging purposes - logger.debug(f"** {event.author} (ADK): {event.content.parts[0].text}") + total_processing_time = 0 + total_issue_api_calls = 0 + processed_count = 0 + + # --- Concurrency Logic --- + # Process the list in chunks of size CONCURRENCY_LIMIT + for i in range(0, total_count, CONCURRENCY_LIMIT): + chunk = all_issues[i:i + CONCURRENCY_LIMIT] + + # Create a list of tasks for the current chunk + tasks = [process_single_issue(issue_num) for issue_num in chunk] + + logger.info(f"--- Starting chunk {i//CONCURRENCY_LIMIT + 1}: Processing issues {chunk} ---") + + # Run the tasks in the chunk concurrently + results = await asyncio.gather(*tasks) + + # Aggregate the results from the chunk + for duration, api_calls in results: + total_processing_time += duration + total_issue_api_calls += api_calls + processed_count += len(chunk) - logger.info(f"--- Stale Agent Run Finished---") + logger.info(f"--- Finished chunk. Processed so far: {processed_count}/{total_count} ---") + + # A small delay between chunks to be respectful to the GitHub API + if (i + CONCURRENCY_LIMIT) < total_count: + time.sleep(1.5) + + total_api_calls_for_run = search_api_calls + total_issue_api_calls + avg_time_per_issue = total_processing_time / total_count if total_count > 0 else 0 + + logger.info("--- Stale Agent Run Finished ---") + logger.info(f"Successfully processed {processed_count} issues.") + logger.info(f"Total API calls made this run: {total_api_calls_for_run}") + logger.info(f"Average time per issue: {avg_time_per_issue:.2f} seconds.") if __name__ == "__main__": start_time = time.time() - logger.info(f"Initializing stale agent for repository: {OWNER}/{REPO}") - logger.info("-" * 80) - asyncio.run(main()) - - logger.info("-" * 80) - end_time = time.time() - duration = end_time - start_time - logger.info(f"Script finished in {duration:.2f} seconds.") + duration = time.time() - start_time + logger.info(f"Full audit finished in {duration/60:.2f} minutes.") \ No newline at end of file diff --git a/contributing/samples/adk_stale_agent/settings.py b/contributing/samples/adk_stale_agent/settings.py index 1b71e451f3..b7f727452d 100644 --- a/contributing/samples/adk_stale_agent/settings.py +++ b/contributing/samples/adk_stale_agent/settings.py @@ -33,7 +33,6 @@ REQUEST_CLARIFICATION_LABEL = "request clarification" # --- THRESHOLDS IN HOURS --- -# These values can be overridden in a .env file for rapid testing (e.g., STALE_HOURS_THRESHOLD=1) # Default: 168 hours (7 days) # The number of hours of inactivity after a maintainer comment before an issue is marked as stale. STALE_HOURS_THRESHOLD = float(os.getenv("STALE_HOURS_THRESHOLD", 168)) @@ -44,6 +43,7 @@ os.getenv("CLOSE_HOURS_AFTER_STALE_THRESHOLD", 168) ) -# --- BATCH SIZE CONFIGURATION --- -# The maximum number of oldest open issues to process in a single run of the bot. -ISSUES_PER_RUN = int(os.getenv("ISSUES_PER_RUN", 100)) +# --- Performance Configuration --- +# The number of issues to process concurrently. +# Higher values are faster but increase the immediate rate of API calls +CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 3)) \ No newline at end of file diff --git a/contributing/samples/adk_stale_agent/utils.py b/contributing/samples/adk_stale_agent/utils.py index 0efb051f72..56d7a3cded 100644 --- a/contributing/samples/adk_stale_agent/utils.py +++ b/contributing/samples/adk_stale_agent/utils.py @@ -12,48 +12,246 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any +import logging +from datetime import datetime, timedelta, timezone +import dateutil.parser +from typing import Any, Dict, List, Optional -from adk_stale_agent.settings import GITHUB_TOKEN import requests +from adk_stale_agent.settings import GITHUB_TOKEN, STALE_HOURS_THRESHOLD +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry +# --- Module-level logger setup --- +logger = logging.getLogger("google_adk." + __name__) + +# --- API Call Counter for Monitoring --- +_api_call_count = 0 + + +def get_api_call_count() -> int: + """Returns the total number of API calls made since the last reset.""" + return _api_call_count + + +def reset_api_call_count() -> None: + """Resets the global API call counter to zero.""" + global _api_call_count + _api_call_count = 0 + + +def _increment_api_call_count() -> None: + """Atomically increments the global API call counter.""" + global _api_call_count + _api_call_count += 1 + + +# --- Production-Ready HTTP Session with Exponential Backoff --- + +# Configure the retry strategy. This implements exponential backoff automatically. +# - total=6: Allow up to 6 total retries. +# - backoff_factor=2: A key factor for exponential delay. The time between retries +# will be {backoff_factor} * (2 ** ({number_of_retries} - 1)). +# e.g., waits for [2s, 4s, 8s, 16s, 32s] between retries. +# - status_forcelist: A set of HTTP status codes that will trigger a retry. +# These are common codes for temporary server errors or rate limiting. +retry_strategy = Retry( + total=6, + backoff_factor=2, + status_forcelist=[429, 500, 502, 503, 504], + allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "OPTIONS", "TRACE", "PATCH"], +) + +# Create an adapter with the retry strategy. +adapter = HTTPAdapter(max_retries=retry_strategy) + +# Create a single, reusable Session object for the entire application. +# This is crucial for performance as it enables connection pooling. _session = requests.Session() + +# Mount the adapter to the session for both http and https protocols. +_session.mount("https://", adapter) +_session.mount("http://", adapter) + +# Set common headers for all requests made with this session. _session.headers.update({ "Authorization": f"token {GITHUB_TOKEN}", "Accept": "application/vnd.github.v3+json", }) -def get_request(url: str, params: dict[str, Any] | None = None) -> Any: - """Sends a GET request to the GitHub API.""" - response = _session.get(url, params=params or {}, timeout=60) - response.raise_for_status() - return response.json() +def get_request(url: str, params: Optional[Dict[str, Any]] = None) -> Any: + """ + Sends a GET request to the GitHub API with configured retries. + + Args: + url: The URL endpoint to send the request to. + params: An optional dictionary of URL parameters. + + Returns: + The JSON response from the API as a dictionary or list. + + Raises: + requests.exceptions.RequestException: For network errors or HTTP status + codes that are not resolved by retries. + """ + _increment_api_call_count() + try: + response = _session.get(url, params=params or {}, timeout=60) + response.raise_for_status() # Raise an exception for HTTP error codes + return response.json() + except requests.exceptions.RequestException as e: + logger.error(f"GET request failed for {url}: {e}") + raise def post_request(url: str, payload: Any) -> Any: - """Sends a POST request to the GitHub API.""" - response = _session.post(url, json=payload, timeout=60) - response.raise_for_status() - return response.json() + """ + Sends a POST request to the GitHub API with configured retries. + + Args: + url: The URL endpoint to send the request to. + payload: The JSON payload to send with the request. + + Returns: + The JSON response from the API as a dictionary or list. + + Raises: + requests.exceptions.RequestException: For network errors or HTTP status + codes that are not resolved by retries. + """ + _increment_api_call_count() + try: + response = _session.post(url, json=payload, timeout=60) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + logger.error(f"POST request failed for {url}: {e}") + raise def patch_request(url: str, payload: Any) -> Any: - """Sends a PATCH request to the GitHub API.""" - response = _session.patch(url, json=payload, timeout=60) - response.raise_for_status() - return response.json() + """ + Sends a PATCH request to the GitHub API with configured retries. + + Args: + url: The URL endpoint to send the request to. + payload: The JSON payload to send with the request. + + Returns: + The JSON response from the API as a dictionary or list. + + Raises: + requests.exceptions.RequestException: For network errors or HTTP status + codes that are not resolved by retries. + """ + _increment_api_call_count() + try: + response = _session.patch(url, json=payload, timeout=60) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + logger.error(f"PATCH request failed for {url}: {e}") + raise def delete_request(url: str) -> Any: - """Sends a DELETE request to the GitHub API.""" - response = _session.delete(url, timeout=60) - response.raise_for_status() - if response.status_code == 204: - return {"status": "success"} - return response.json() + """ + Sends a DELETE request to the GitHub API with configured retries. + + Args: + url: The URL endpoint to send the request to. + + Returns: + A success dictionary for 204 status, otherwise the JSON response. + + Raises: + requests.exceptions.RequestException: For network errors or HTTP status + codes that are not resolved by retries. + """ + _increment_api_call_count() + try: + response = _session.delete(url, timeout=60) + response.raise_for_status() + if response.status_code == 204: + return {"status": "success", "message": "Deletion successful."} + return response.json() + except requests.exceptions.RequestException as e: + logger.error(f"DELETE request failed for {url}: {e}") + raise + + +def error_response(error_message: str) -> Dict[str, Any]: + """ + Creates a standardized error response dictionary for tool outputs. + + Args: + error_message: A descriptive message of the error that occurred. + + Returns: + A dictionary containing the error status and message. + """ + return {"status": "error", "message": error_message} + + +def get_old_open_issue_numbers( + owner: str, repo: str, days_old: Optional[float] = None +) -> List[int]: + """ + Finds open issues older than the precise `days_old` threshold. + + This function first fetches ALL open issues from the repository and then + applies a precise, client-side filter to find the ones that are + older than the specified threshold. + """ + if days_old is None: + days_old = STALE_HOURS_THRESHOLD / 24 + + # 1. Calculate the PRECISE cutoff time in UTC. + now_utc = datetime.now(timezone.utc) + precise_cutoff_datetime = now_utc - timedelta(days=days_old) + + # 2. Build a query to get ALL open issues. The date filter is removed. + query = f"repo:{owner}/{repo} is:issue state:open" + logger.info(f"Fetching all open issues from '{owner}/{repo}'...") + + all_open_issues = [] + page = 1 + url = "https://api.github.com/search/issues" + + # Stage 1: Fetch all open issues via API + while True: + params = {"q": query, "per_page": 100, "page": page} + try: + data = get_request(url, params=params) + items = data.get("items", []) + if not items: + break + + all_open_issues.extend(items) + + if len(items) < 100: + break + page += 1 + except requests.exceptions.RequestException as e: + logger.error(f"GitHub search failed on page {page}: {e}") + break + + logger.info( + f"Fetched {len(all_open_issues)} total open issues. " + f"Now filtering for those created before: {precise_cutoff_datetime.isoformat()}" + ) + + # Stage 2: Apply the precise time filter in Python + final_issue_numbers = [] + for item in all_open_issues: + if "pull_request" in item: + continue + + issue_creation_time = dateutil.parser.isoparse(item["created_at"]) + if issue_creation_time < precise_cutoff_datetime: + final_issue_numbers.append(item["number"]) -def error_response(error_message: str) -> dict[str, Any]: - """Creates a standardized error dictionary for the agent.""" - return {"status": "error", "message": error_message} + logger.info(f"Found {len(final_issue_numbers)} issues that are older than the threshold.") + return final_issue_numbers \ No newline at end of file From 7583b15bf8cbe249c3aeb3a59d058d10bfa30447 Mon Sep 17 00:00:00 2001 From: Rohit Yanamadala Date: Sun, 23 Nov 2025 00:56:14 -0800 Subject: [PATCH 2/9] Update GitHub Action --- .github/workflows/stale-bot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/stale-bot.yml b/.github/workflows/stale-bot.yml index 1fe5665c16..6cb397d9f6 100644 --- a/.github/workflows/stale-bot.yml +++ b/.github/workflows/stale-bot.yml @@ -35,8 +35,8 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.ADK_TRIAGE_AGENT }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} - OWNER: ${{ github.repository_owner }} - REPO: ${{ github.event.repository.name }} + OWNER: ryanaiagent + REPO: adk-python CONCURRENCY_LIMIT: 3 LLM_MODEL_NAME: "gemini-2.5-flash" PYTHONPATH: contributing/samples From e32c54381d3f294c824adbf2dbdc54a8d37ae098 Mon Sep 17 00:00:00 2001 From: Rohit Yanamadala Date: Sun, 23 Nov 2025 00:58:37 -0800 Subject: [PATCH 3/9] Update GitHub Action --- .github/workflows/stale-bot.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/stale-bot.yml b/.github/workflows/stale-bot.yml index 6cb397d9f6..159abacac4 100644 --- a/.github/workflows/stale-bot.yml +++ b/.github/workflows/stale-bot.yml @@ -8,10 +8,9 @@ on: - cron: '0 6 * * *' jobs: - run-agent: - timeout-minutes: 60 audit-stale-issues: runs-on: ubuntu-latest + timeout-minutes: 60 permissions: issues: write From 0a3b5727dace16b490d976cb8a2e10ed6836c5e4 Mon Sep 17 00:00:00 2001 From: Rohit Yanamadala Date: Mon, 24 Nov 2025 11:52:46 -0800 Subject: [PATCH 4/9] Feat:Refactor all files --- .github/workflows/stale-bot.yml | 4 +- .../samples/adk_stale_agent/README.md | 86 ++- contributing/samples/adk_stale_agent/agent.py | 662 ++++++++++-------- contributing/samples/adk_stale_agent/main.py | 159 +++-- .../samples/adk_stale_agent/settings.py | 2 +- contributing/samples/adk_stale_agent/utils.py | 347 ++++----- contributing/samples/gepa/experiment.py | 1 - contributing/samples/gepa/run_experiment.py | 1 - 8 files changed, 700 insertions(+), 562 deletions(-) diff --git a/.github/workflows/stale-bot.yml b/.github/workflows/stale-bot.yml index 159abacac4..6948b56459 100644 --- a/.github/workflows/stale-bot.yml +++ b/.github/workflows/stale-bot.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: schedule: - # This runs at 6:00 AM UTC (e.g., 10 PM PST). + # This runs at 6:00 AM UTC (10 PM PST) - cron: '0 6 * * *' jobs: @@ -34,7 +34,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.ADK_TRIAGE_AGENT }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} - OWNER: ryanaiagent + OWNER: ${{ github.repository_owner }} REPO: adk-python CONCURRENCY_LIMIT: 3 LLM_MODEL_NAME: "gemini-2.5-flash" diff --git a/contributing/samples/adk_stale_agent/README.md b/contributing/samples/adk_stale_agent/README.md index 17b427d77c..afc47b11cc 100644 --- a/contributing/samples/adk_stale_agent/README.md +++ b/contributing/samples/adk_stale_agent/README.md @@ -1,65 +1,89 @@ # ADK Stale Issue Auditor Agent -This directory contains an autonomous agent designed to audit a GitHub repository for stale issues, helping to maintain repository hygiene and ensure that all open items are actionable. +This directory contains an autonomous, **GraphQL-powered** agent designed to audit a GitHub repository for stale issues. It maintains repository hygiene by ensuring all open items are actionable and responsive. -The agent operates as a "Repository Auditor," proactively scanning all open issues rather than waiting for a specific trigger. It uses a combination of deterministic Python tools and the semantic understanding of a Large Language Model (LLM) to make intelligent decisions about the state of a conversation. +Unlike traditional "Stale Bots" that only look at timestamps, this agent uses a **Unified History Trace** and an **LLM (Large Language Model)** to understand the *context* of a conversation. It distinguishes between a maintainer asking a question (stale candidate) vs. a maintainer providing a status update (active). --- ## Core Logic & Features -The agent's primary goal is to identify issues where a maintainer has requested information from the author, and to manage the lifecycle of that issue based on the author's response (or lack thereof). +The agent operates as a "Repository Auditor," proactively scanning open issues using a high-efficiency decision tree. -**The agent follows a precise decision tree:** +### 1. Smart State Verification (GraphQL) +Instead of making multiple expensive API calls, the agent uses a single **GraphQL** query per issue to reconstruct the entire history of the conversation. It combines: +* **Comments** +* **Description/Body Edits** ("Ghost Edits") +* **Title Renames** +* **State Changes** (Reopens) -1. **Audits All Open Issues:** On each run, the agent fetches a batch of the oldest open issues in the repository. -2. **Identifies Pending Issues:** It analyzes the full timeline of each issue to see if the last human action was a comment from a repository maintainer. -3. **Semantic Intent Analysis:** If the last comment was from a maintainer, the agent uses the LLM to determine if the comment was a **question or a request for clarification**. -4. **Marks as Stale:** If the maintainer's question has gone unanswered by the author for a configurable period (e.g., 7 days), the agent will: - * Apply a `stale` label to the issue. - * Post a comment notifying the author that the issue is now considered stale and will be closed if no further action is taken. - * Proactively add a `request clarification` label if it's missing, to make the issue's state clear. -5. **Handles Activity:** If any non-maintainer (the author or a third party) comments on an issue, the agent will automatically remove the `stale` label, marking the issue as active again. -6. **Closes Stale Issues:** If an issue remains in the `stale` state for another configurable period (e.g., 7 days) with no new activity, the agent will post a final comment and close the issue. +It sorts these events chronologically to determine the **Last Active Actor**. -### Self-Configuration +### 2. The "Last Actor" Rule +The agent follows a precise logic flow based on who acted last: -A key feature of this agent is its ability to self-configure. It does not require a hard-coded list of maintainer usernames. On each run, it uses the GitHub API to dynamically fetch the list of users with write access to the repository, ensuring its logic is always based on the current team. +* **If Author/User acted last:** The issue is **ACTIVE**. + * This includes comments, title changes, and *silent* description edits. + * **Action:** The agent immediately removes the `stale` label. + * **Silent Update Alert:** If the user edited the description but *did not* comment, the agent posts a specific alert: *"Notification: The author has updated the issue description..."* to ensure maintainers are notified (since GitHub does not trigger notifications for body edits). + * **Spam Prevention:** The agent checks if it has already alerted about a specific silent edit to avoid spamming the thread. + +* **If Maintainer acted last:** The issue is **POTENTIALLY STALE**. + * The agent passes the text of the maintainer's last comment to the LLM. + +### 3. Semantic Intent Analysis (LLM) +If the maintainer was the last person to speak, the LLM analyzes the comment text to determine intent: +* **Question/Request:** "Can you provide logs?" / "Please try v2.0." + * **Verdict:** **STALE** (Waiting on Author). + * **Action:** If the time threshold is met, the agent adds the `stale` label. It also checks for the `request clarification` label and adds it if missing. +* **Status Update:** "We are working on a fix." / "Added to backlog." + * **Verdict:** **ACTIVE** (Waiting on Maintainer). + * **Action:** No action taken. The issue remains open without stale labels. + +### 4. Lifecycle Management +* **Marking Stale:** After `STALE_HOURS_THRESHOLD` (default: 7 days) of inactivity following a maintainer's question. +* **Closing:** After `CLOSE_HOURS_AFTER_STALE_THRESHOLD` (default: 7 days) of continued inactivity while marked stale. + +--- + +## Performance & Safety + +* **GraphQL Optimized:** Fetches comments, edits, labels, and timeline events in a single network request to minimize latency and API quota usage. +* **Search API Filtering:** Uses the GitHub Search API to pre-filter issues created recently, ensuring the bot doesn't waste cycles analyzing brand-new issues. +* **Rate Limit Aware:** Includes intelligent sleeping and retry logic (exponential backoff) to handle GitHub API rate limits (HTTP 429) gracefully. +* **Execution Metrics:** Logs the time taken and API calls consumed for every issue processed. --- ## Configuration -The agent is configured entirely via environment variables, which should be set as secrets in the GitHub Actions workflow environment. +The agent is configured via environment variables, typically set as secrets in GitHub Actions. ### Required Secrets | Secret Name | Description | | :--- | :--- | -| `GITHUB_TOKEN` | A GitHub Personal Access Token (PAT) with the required permissions. It's recommended to use a PAT from a dedicated "bot" account. -| `GOOGLE_API_KEY` | An API key for the Google AI (Gemini) model used for the agent's reasoning. - -### Required PAT Permissions - -The `GITHUB_TOKEN` requires the following **Repository Permissions**: -* **Issues**: `Read & write` (to read issues, add labels, comment, and close) -* **Administration**: `Read-only` (to read the list of repository collaborators/maintainers) +| `GITHUB_TOKEN` | A GitHub Personal Access Token (PAT) or Service Account Token with `repo` scope. | +| `GOOGLE_API_KEY` | An API key for the Google AI (Gemini) model used for reasoning. | ### Optional Configuration -These environment variables can be set in the workflow file to override the defaults in `settings.py`. +These variables control the timing thresholds and model selection. | Variable Name | Description | Default | | :--- | :--- | :--- | -| `STALE_HOURS_THRESHOLD` | The number of hours of inactivity after a maintainer's question before an issue is marked as `stale`. | `168` (7 days) | -| `CLOSE_HOURS_AFTER_STALE_THRESHOLD` | The number of hours after being marked `stale` before an issue is closed. | `168` (7 days) | -| `ISSUES_PER_RUN` | The maximum number of oldest open issues to process in a single workflow run. | `100` | -| `LLM_MODEL_NAME`| LLM model to use. | `gemini-2.5-flash` | +| `STALE_HOURS_THRESHOLD` | Hours of inactivity after a maintainer's question before marking as `stale`. | `168` (7 days) | +| `CLOSE_HOURS_AFTER_STALE_THRESHOLD` | Hours after being marked `stale` before the issue is closed. | `168` (7 days) | +| `LLM_MODEL_NAME`| The specific Gemini model version to use. | `gemini-2.5-flash` | +| `OWNER` | Repository owner (auto-detected in Actions). | (Environment dependent) | +| `REPO` | Repository name (auto-detected in Actions). | (Environment dependent) | --- ## Deployment -To deploy this agent, a GitHub Actions workflow file (`.github/workflows/stale-bot.yml`) is included. This workflow runs on a daily schedule and executes the agent's main script. +To deploy this agent, a GitHub Actions workflow file (`.github/workflows/stale-bot.yml`) is recommended. + +### Directory Structure Note +Because this agent resides within the `adk-python` package structure, the workflow must ensure the script is executed correctly to handle imports. -Ensure the necessary repository secrets are configured and the `stale` and `request clarification` labels exist in the repository. \ No newline at end of file diff --git a/contributing/samples/adk_stale_agent/agent.py b/contributing/samples/adk_stale_agent/agent.py index 5693e050eb..9948f26584 100644 --- a/contributing/samples/adk_stale_agent/agent.py +++ b/contributing/samples/adk_stale_agent/agent.py @@ -12,107 +12,135 @@ # See the License for the specific language governing permissions and # limitations under the License. +from datetime import datetime +from datetime import timezone import logging import os +from typing import Any +from typing import Dict +from typing import List +from typing import Optional + +from adk_stale_agent.settings import CLOSE_HOURS_AFTER_STALE_THRESHOLD +from adk_stale_agent.settings import GITHUB_BASE_URL +from adk_stale_agent.settings import LLM_MODEL_NAME +from adk_stale_agent.settings import OWNER +from adk_stale_agent.settings import REPO +from adk_stale_agent.settings import REQUEST_CLARIFICATION_LABEL +from adk_stale_agent.settings import STALE_HOURS_THRESHOLD +from adk_stale_agent.settings import STALE_LABEL_NAME +from adk_stale_agent.utils import delete_request +from adk_stale_agent.utils import error_response +from adk_stale_agent.utils import get_request +from adk_stale_agent.utils import patch_request +from adk_stale_agent.utils import post_request import dateutil.parser -from datetime import datetime, timezone -from typing import Any, List, Dict, Optional - -from adk_stale_agent.settings import ( - GITHUB_BASE_URL, - OWNER, - REPO, - LLM_MODEL_NAME, - STALE_LABEL_NAME, - REQUEST_CLARIFICATION_LABEL, - STALE_HOURS_THRESHOLD, - CLOSE_HOURS_AFTER_STALE_THRESHOLD, -) -from adk_stale_agent.utils import ( - post_request, - delete_request, - patch_request, - error_response, - get_request, -) from google.adk.agents.llm_agent import Agent from requests.exceptions import RequestException logger = logging.getLogger("google_adk." + __name__) # --- Constants --- -BOT_ALERT_SIGNATURE = "**Notification:** The author has updated the issue description" +# Used to detect if the bot has already posted an alert to avoid spamming. +BOT_ALERT_SIGNATURE = ( + "**Notification:** The author has updated the issue description" +) # --- Global Cache --- _MAINTAINERS_CACHE: Optional[List[str]] = None def _get_cached_maintainers() -> List[str]: - """ - Fetches the list of repository maintainers once and caches it. + """ + Fetches the list of repository maintainers. - Returns: - List[str]: A list of GitHub usernames with push access. - """ - global _MAINTAINERS_CACHE - if _MAINTAINERS_CACHE is not None: - return _MAINTAINERS_CACHE - - logger.info("Initializing Maintainers Cache...") - try: - url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/collaborators" - params = {"permission": "push"} - data = get_request(url, params) - _MAINTAINERS_CACHE = [u["login"] for u in data] - logger.info(f"Cached {len(_MAINTAINERS_CACHE)} maintainers.") - except Exception as e: - logger.error(f"Failed to fetch maintainers: {e}") - _MAINTAINERS_CACHE = [] + This function relies on `utils.get_request` for network resilience. + `get_request` is configured with an HTTPAdapter that automatically performs + exponential backoff retries (up to 6 times) for 5xx errors and rate limits. + + If the retries are exhausted or the data format is invalid, this function + raises a RuntimeError to prevent the bot from running with incorrect permissions. + + Returns: + List[str]: A list of GitHub usernames with push access. + + Raises: + RuntimeError: If the API fails after all retries or returns invalid data. + """ + global _MAINTAINERS_CACHE + if _MAINTAINERS_CACHE is not None: return _MAINTAINERS_CACHE + logger.info("Initializing Maintainers Cache...") + + try: + url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/collaborators" + params = {"permission": "push"} + + data = get_request(url, params) + + if isinstance(data, list): + _MAINTAINERS_CACHE = [u["login"] for u in data if "login" in u] + logger.info(f"Cached {len(_MAINTAINERS_CACHE)} maintainers.") + return _MAINTAINERS_CACHE + else: + logger.error( + f"Invalid API response format: Expected list, got {type(data)}" + ) + raise ValueError(f"GitHub API returned non-list data: {data}") + + except Exception as e: + logger.critical( + f"FATAL: Failed to verify repository maintainers. Error: {e}" + ) + raise RuntimeError( + "Maintainer verification failed. processing aborted." + ) from e + def load_prompt_template(filename: str) -> str: - """ - Loads the raw text content of a prompt file. + """ + Loads the raw text content of a prompt file. - Args: - filename (str): The name of the file (e.g., 'PROMPT_INSTRUCTION.txt'). + Args: + filename (str): The name of the file (e.g., 'PROMPT_INSTRUCTION.txt'). - Returns: - str: The file content. - """ - file_path = os.path.join(os.path.dirname(__file__), filename) - with open(file_path, "r") as f: - return f.read() + Returns: + str: The file content. + """ + file_path = os.path.join(os.path.dirname(__file__), filename) + with open(file_path, "r") as f: + return f.read() PROMPT_TEMPLATE = load_prompt_template("PROMPT_INSTRUCTION.txt") def get_issue_state(item_number: int) -> Dict[str, Any]: - """ - Retrieves the comprehensive state of a GitHub issue using GraphQL. - - This function constructs a unified timeline of comments, body edits, - renames, and reopens to determine who the *absolute last* actor was. - It handles 'Ghost Edits' (description updates without comments) and - prevents spamming alerts if the bot has already notified maintainers. - - Args: - item_number (int): The GitHub issue number. - - Returns: - Dict[str, Any]: A dictionary containing: - - last_action_role (str): 'author', 'maintainer', or 'other_user'. - - is_stale (bool): Whether the issue is currently marked stale. - - maintainer_alert_needed (bool): True if a silent edit needs an alert. - - days_since_activity (float): Days since the last human action. - - ... and other metadata for the LLM. - """ - maintainers = _get_cached_maintainers() - - # GraphQL Query: Fetches Comments, Edits, and Timeline Events in one go. - query = """ + """ + Retrieves the comprehensive state of a GitHub issue using GraphQL. + + This function constructs a unified timeline of comments, body edits, + renames, and reopens to determine who the *absolute last* actor was. + It handles 'Ghost Edits' (description updates without comments) and + prevents spamming alerts if the bot has already notified maintainers. + + Args: + item_number (int): The GitHub issue number. + + Returns: + Dict[str, Any]: A dictionary containing metadata for the LLM. + - last_action_role (str): 'author', 'maintainer', or 'other_user'. + - is_stale (bool): Whether the issue is currently marked stale. + - maintainer_alert_needed (bool): True if a silent edit needs an alert. + - days_since_activity (float): Days since the last human action. + - last_comment_text (str|None): The text of the last comment, if applicable. + - current_labels (List[str]): List of labels on the issue. + """ + maintainers = _get_cached_maintainers() + + # GraphQL Query: Fetches Comments, Edits, and Timeline Events in one go. + query = """ query($owner: String!, $name: String!, $number: Int!) { repository(owner: $owner, name: $name) { issue(number: $number) { @@ -162,268 +190,302 @@ def get_issue_state(item_number: int) -> Dict[str, Any]: } """ - variables = {"owner": OWNER, "name": REPO, "number": item_number} + variables = {"owner": OWNER, "name": REPO, "number": item_number} - try: - response = post_request( - f"{GITHUB_BASE_URL}/graphql", {"query": query, "variables": variables} - ) + try: + response = post_request( + f"{GITHUB_BASE_URL}/graphql", {"query": query, "variables": variables} + ) + + if "errors" in response: + msg = response["errors"][0]["message"] + return error_response(f"GraphQL Error: {msg}") + + data = response.get("data", {}).get("repository", {}).get("issue", {}) + if not data: + return error_response(f"Issue #{item_number} not found.") + + issue_author = data.get("author", {}).get("login") + labels_list = [l["name"] for l in data.get("labels", {}).get("nodes", [])] + + # Unified list of ALL events to replay history chronologically. + history = [] + last_bot_alert_time = None + + # 1. Baseline: Issue Creation + history.append({ + "type": "created", + "actor": issue_author, + "time": dateutil.parser.isoparse(data["createdAt"]), + "data": None, + }) + + # 2. Process Comments + for c in data.get("comments", {}).get("nodes", []): + if not c: + continue + + actor = c.get("author", {}).get("login") + c_body = c.get("body", "") + c_time = dateutil.parser.isoparse(c.get("createdAt")) + + if BOT_ALERT_SIGNATURE in c_body: + if last_bot_alert_time is None or c_time > last_bot_alert_time: + last_bot_alert_time = c_time + + if actor and not actor.endswith("[bot]"): + e_time = c.get("lastEditedAt") + actual_time = dateutil.parser.isoparse(e_time) if e_time else c_time + history.append({ + "type": "commented", + "actor": actor, + "time": actual_time, + "data": c_body, + }) + + # 3. Process Body Edits + for e in data.get("userContentEdits", {}).get("nodes", []): + if not e: + continue - if "errors" in response: - msg = response["errors"][0]["message"] - return error_response(f"GraphQL Error: {msg}") + actor = e.get("editor", {}).get("login") + if actor and not actor.endswith("[bot]"): + history.append({ + "type": "edited_description", + "actor": actor, + "time": dateutil.parser.isoparse(e.get("editedAt")), + "data": None, + }) - data = response.get("data", {}).get("repository", {}).get("issue", {}) - if not data: - return error_response(f"Issue #{item_number} not found.") + # 4. Process Timeline Events (Labels, Renames, Reopens) + label_events = [] + for t in data.get("timelineItems", {}).get("nodes", []): + if not t: + continue - # --- Data Parsing --- - issue_author = data.get("author", {}).get("login") - labels_list = [l["name"] for l in data.get("labels", {}).get("nodes", [])] + etype = t.get("__typename") + actor = t.get("actor", {}).get("login") + time_val = dateutil.parser.isoparse(t.get("createdAt")) - # We build a unified list of ALL events to replay history chronologically. - history = [] - last_bot_alert_time = None + if etype == "LabeledEvent": + if t.get("label", {}).get("name") == STALE_LABEL_NAME: + label_events.append(time_val) + continue - # 1. Baseline: Issue Creation + if actor and not actor.endswith("[bot]"): + pretty_type = ( + "renamed_title" if etype == "RenamedTitleEvent" else "reopened" + ) history.append({ - "type": "created", - "actor": issue_author, - "time": dateutil.parser.isoparse(data["createdAt"]), + "type": pretty_type, + "actor": actor, + "time": time_val, "data": None, }) - # 2. Process Comments - for c in data.get("comments", {}).get("nodes", []): - actor = c.get("author", {}).get("login") - c_body = c.get("body", "") - c_time = dateutil.parser.isoparse(c.get("createdAt")) - - # Check if the bot has already alerted about a silent edit in this thread - if BOT_ALERT_SIGNATURE in c_body: - if last_bot_alert_time is None or c_time > last_bot_alert_time: - last_bot_alert_time = c_time - - # Add human comments to history - if actor and not actor.endswith("[bot]"): - e_time = c.get("lastEditedAt") - # Use edit time if available, otherwise creation time - actual_time = dateutil.parser.isoparse(e_time) if e_time else c_time - history.append({ - "type": "commented", - "actor": actor, - "time": actual_time, - "data": c_body, - }) - - # 3. Process Body Edits - for e in data.get("userContentEdits", {}).get("nodes", []): - actor = e.get("editor", {}).get("login") - if actor and not actor.endswith("[bot]"): - history.append({ - "type": "edited_description", - "actor": actor, - "time": dateutil.parser.isoparse(e.get("editedAt")), - "data": None, - }) - - # 4. Process Timeline Events (Labels, Renames, Reopens) - label_events = [] - for t in data.get("timelineItems", {}).get("nodes", []): - etype = t.get("__typename") - actor = t.get("actor", {}).get("login") - time_val = dateutil.parser.isoparse(t.get("createdAt")) - - # Store stale label events separately for timing calculations - if etype == "LabeledEvent": - if t.get("label", {}).get("name") == STALE_LABEL_NAME: - label_events.append(time_val) - continue - - if actor and not actor.endswith("[bot]"): - pretty_type = ( - "renamed_title" if etype == "RenamedTitleEvent" else "reopened" - ) - history.append({ - "type": pretty_type, - "actor": actor, - "time": time_val, - "data": None, - }) - - # --- History Replay (Chronological Sort) --- - history.sort(key=lambda x: x["time"]) - - last_action_role = "author" # Default start state - last_activity_time = history[0]["time"] - last_action_type = "created" - last_comment_text = None + # --- History Replay (Chronological Sort) --- + history.sort(key=lambda x: x["time"]) + + last_action_role = "author" + last_activity_time = history[0]["time"] + last_action_type = "created" + last_comment_text = None - logger.debug(f"--- Activity Trace for #{item_number} ---") + logger.debug(f"--- Activity Trace for #{item_number} ---") - for event in history: - actor = event["actor"] - etype = event["type"] + for event in history: + actor = event["actor"] + etype = event["type"] - # Determine Role - role = "other_user" - if actor == issue_author: - role = "author" - elif actor in maintainers: - role = "maintainer" + role = "other_user" + if actor == issue_author: + role = "author" + elif actor in maintainers: + role = "maintainer" - # Log the event trace for debugging - logger.debug( - f" [{event['time'].strftime('%m-%d %H:%M')}] " - f"{etype.upper()} by {actor} ({role})" - ) + logger.debug( + f" [{event['time'].strftime('%m-%d %H:%M')}] " + f"{etype.upper()} by {actor} ({role})" + ) - # Update State (The last event in the list wins) - last_action_role = role - last_activity_time = event["time"] - last_action_type = etype + last_action_role = role + last_activity_time = event["time"] + last_action_type = etype - if etype == "commented": - last_comment_text = event["data"] - else: - last_comment_text = None + if etype == "commented": + last_comment_text = event["data"] + else: + last_comment_text = None - # --- Spam Prevention / Alert Logic --- + # --- Spam Prevention / Alert Logic --- + maintainer_alert_needed = False + if ( + last_action_role in ["author", "other_user"] + and last_action_type == "edited_description" + ): + if last_bot_alert_time and last_bot_alert_time > last_activity_time: maintainer_alert_needed = False - # If the User edited the description (silent action) and we haven't alerted AFTER that edit... - if ( - last_action_role in ["author", "other_user"] - and last_action_type == "edited_description" - ): - if last_bot_alert_time and last_bot_alert_time > last_activity_time: - maintainer_alert_needed = False - logger.info( - f"#{item_number}: Silent edit detected, but Bot already alerted at " - f"{last_bot_alert_time.strftime('%m-%d %H:%M')}. No spam." - ) - else: - maintainer_alert_needed = True - logger.info(f"#{item_number}: Silent edit detected. Alert needed.") - - # --- Final Metric Calculations --- - current_time = datetime.now(timezone.utc) - days_since_activity = ( - current_time - last_activity_time - ).total_seconds() / 86400 - - is_stale = STALE_LABEL_NAME in labels_list - days_since_stale_label = 0.0 - if is_stale and label_events: - # Calculate time from the MOST RECENT application of the stale label - latest_label_time = max(label_events) - days_since_stale_label = ( - current_time - latest_label_time - ).total_seconds() / 86400 - - logger.debug( - f" -> FINAL VERDICT: Last Actor = {last_action_role.upper()}, " - f"Idle = {days_since_activity:.2f} days" + logger.info( + f"#{item_number}: Silent edit detected, but Bot already alerted at " + f"{last_bot_alert_time.strftime('%m-%d %H:%M')}. No spam." ) + else: + maintainer_alert_needed = True + logger.info(f"#{item_number}: Silent edit detected. Alert needed.") + + # --- Final Metric Calculations --- + current_time = datetime.now(timezone.utc) + days_since_activity = ( + current_time - last_activity_time + ).total_seconds() / 86400 + + is_stale = STALE_LABEL_NAME in labels_list + days_since_stale_label = 0.0 + if is_stale and label_events: + latest_label_time = max(label_events) + days_since_stale_label = ( + current_time - latest_label_time + ).total_seconds() / 86400 + + logger.debug( + f" -> FINAL VERDICT: Last Actor = {last_action_role.upper()}, " + f"Idle = {days_since_activity:.2f} days" + ) - return { - "status": "success", - "last_action_role": last_action_role, - "last_action_type": last_action_type, - "maintainer_alert_needed": maintainer_alert_needed, - "is_stale": is_stale, - "days_since_activity": days_since_activity, - "days_since_stale_label": days_since_stale_label, - "last_comment_text": last_comment_text, - "current_labels": labels_list, - "stale_threshold_days": STALE_HOURS_THRESHOLD / 24, - "close_threshold_days": CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24, - } + return { + "status": "success", + "last_action_role": last_action_role, + "last_action_type": last_action_type, + "maintainer_alert_needed": maintainer_alert_needed, + "is_stale": is_stale, + "days_since_activity": days_since_activity, + "days_since_stale_label": days_since_stale_label, + "last_comment_text": last_comment_text, + "current_labels": labels_list, + "stale_threshold_days": STALE_HOURS_THRESHOLD / 24, + "close_threshold_days": CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24, + } - except RequestException as e: - return error_response(f"Network Error: {e}") + except RequestException as e: + return error_response(f"Network Error: {e}") # --- Tool Definitions --- def add_label_to_issue(item_number: int, label_name: str) -> dict[str, Any]: - """Adds a label to the issue.""" - logger.debug(f"Adding label '{label_name}' to issue #{item_number}.") - url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/labels" - try: - post_request(url, [label_name]) - return {"status": "success"} - except RequestException as e: - return error_response(f"Error adding label: {e}") - - -def remove_label_from_issue(item_number: int, label_name: str) -> dict[str, Any]: - """Removes a label from the issue.""" - logger.debug(f"Removing label '{label_name}' from issue #{item_number}.") - url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/labels/{label_name}" - try: - delete_request(url) - return {"status": "success"} - except RequestException as e: - return error_response(f"Error removing label: {e}") + """ + Adds a label to the issue. + + Args: + item_number (int): The GitHub issue number. + label_name (str): The name of the label to add. + """ + logger.debug(f"Adding label '{label_name}' to issue #{item_number}.") + url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/labels" + try: + post_request(url, [label_name]) + return {"status": "success"} + except RequestException as e: + return error_response(f"Error adding label: {e}") + + +def remove_label_from_issue( + item_number: int, label_name: str +) -> dict[str, Any]: + """ + Removes a label from the issue. + + Args: + item_number (int): The GitHub issue number. + label_name (str): The name of the label to remove. + """ + logger.debug(f"Removing label '{label_name}' from issue #{item_number}.") + url = f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/labels/{label_name}" + try: + delete_request(url) + return {"status": "success"} + except RequestException as e: + return error_response(f"Error removing label: {e}") def add_stale_label_and_comment(item_number: int) -> dict[str, Any]: - """Marks the issue as stale with a comment and label.""" - comment = ( - "This issue has been automatically marked as stale because it has not" - " had recent activity after a maintainer requested clarification. It" - " will be closed if no further activity occurs within" - f" {CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24:.0f} days." + """ + Marks the issue as stale with a comment and label. + + Args: + item_number (int): The GitHub issue number. + """ + # Format days cleanly (e.g., "7" instead of "7.0", or "0.5" instead of "0") + days = CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24 + days_str = f"{days:.1f}" if days % 1 != 0 else f"{int(days)}" + + comment = ( + "This issue has been automatically marked as stale because it has not" + " had recent activity after a maintainer requested clarification. It" + " will be closed if no further activity occurs within" + f" {days_str} days." + ) + try: + post_request( + f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/comments", + {"body": comment}, ) - try: - post_request( - f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/comments", - {"body": comment}, - ) - post_request( - f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/labels", - [STALE_LABEL_NAME], - ) - return {"status": "success"} - except RequestException as e: - return error_response(f"Error marking issue as stale: {e}") + post_request( + f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/labels", + [STALE_LABEL_NAME], + ) + return {"status": "success"} + except RequestException as e: + return error_response(f"Error marking issue as stale: {e}") def alert_maintainer_of_edit(item_number: int) -> dict[str, Any]: - """Posts a comment alerting maintainers of a silent description update.""" - comment = ( - "**Notification:** The author has updated the issue description. " - "Maintainers, please review." + """ + Posts a comment alerting maintainers of a silent description update. + + Args: + item_number (int): The GitHub issue number. + """ + # Uses the constant signature to ensure detection logic in get_issue_state works. + comment = f"{BOT_ALERT_SIGNATURE}. Maintainers, please review." + try: + post_request( + f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/comments", + {"body": comment}, ) - try: - post_request( - f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/comments", - {"body": comment}, - ) - return {"status": "success"} - except RequestException as e: - return error_response(f"Error posting alert: {e}") + return {"status": "success"} + except RequestException as e: + return error_response(f"Error posting alert: {e}") def close_as_stale(item_number: int) -> dict[str, Any]: - """Closes the issue as not planned/stale.""" - comment = ( - "This has been automatically closed because it has been marked as stale" - f" for over {CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24:.0f} days." + """ + Closes the issue as not planned/stale. + + Args: + item_number (int): The GitHub issue number. + """ + days = CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24 + days_str = f"{days:.1f}" if days % 1 != 0 else f"{int(days)}" + + comment = ( + "This has been automatically closed because it has been marked as stale" + f" for over {days_str} days." + ) + try: + post_request( + f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/comments", + {"body": comment}, ) - try: - post_request( - f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/comments", - {"body": comment}, - ) - patch_request( - f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}", - {"state": "closed"}, - ) - return {"status": "success"} - except RequestException as e: - return error_response(f"Error closing issue: {e}") + patch_request( + f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}", + {"state": "closed"}, + ) + return {"status": "success"} + except RequestException as e: + return error_response(f"Error closing issue: {e}") + root_agent = Agent( model=LLM_MODEL_NAME, @@ -438,11 +500,11 @@ def close_as_stale(item_number: int) -> dict[str, Any]: close_threshold_days=CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24, ), tools=[ - alert_maintainer_of_edit, - get_issue_state, add_label_to_issue, - remove_label_from_issue, add_stale_label_and_comment, + alert_maintainer_of_edit, close_as_stale, + get_issue_state, + remove_label_from_issue, ], -) \ No newline at end of file +) diff --git a/contributing/samples/adk_stale_agent/main.py b/contributing/samples/adk_stale_agent/main.py index e91ee72cf3..36a2a691c6 100644 --- a/contributing/samples/adk_stale_agent/main.py +++ b/contributing/samples/adk_stale_agent/main.py @@ -15,14 +15,16 @@ import asyncio import logging import time +from typing import Tuple from adk_stale_agent.agent import root_agent -from adk_stale_agent.settings import OWNER, REPO, STALE_HOURS_THRESHOLD, CONCURRENCY_LIMIT -from adk_stale_agent.utils import ( - get_api_call_count, - get_old_open_issue_numbers, - reset_api_call_count, -) +from adk_stale_agent.settings import CONCURRENCY_LIMIT +from adk_stale_agent.settings import OWNER +from adk_stale_agent.settings import REPO +from adk_stale_agent.settings import STALE_HOURS_THRESHOLD +from adk_stale_agent.utils import get_api_call_count +from adk_stale_agent.utils import get_old_open_issue_numbers +from adk_stale_agent.utils import reset_api_call_count from google.adk.cli.utils import logs from google.adk.runners import InMemoryRunner from google.genai import types @@ -33,22 +35,42 @@ APP_NAME = "stale_bot_app" USER_ID = "stale_bot_user" -async def process_single_issue(issue_number: int): - """Processes a single GitHub issue and logs its metrics.""" - issue_start_time = time.time() - # Reset counter for each individual issue to get isolated metrics - reset_api_call_count() - + +async def process_single_issue(issue_number: int) -> Tuple[float, int]: + """ + Processes a single GitHub issue using the AI agent and logs execution metrics. + + Args: + issue_number (int): The GitHub issue number to audit. + + Returns: + Tuple[float, int]: A tuple containing: + - duration (float): Time taken to process the issue in seconds. + - api_calls (int): The number of API calls made during this specific execution. + + Raises: + Exception: catches generic exceptions to prevent one failure from stopping the batch. + """ + start_time = time.perf_counter() + + start_api_calls = get_api_call_count() + logger.info(f"Processing Issue #{issue_number}...") - - runner = InMemoryRunner(agent=root_agent, app_name=APP_NAME) - session = await runner.session_service.create_session( - user_id=USER_ID, app_name=APP_NAME - ) - prompt_text = f"Audit Issue #{issue_number}." - prompt_message = types.Content(role="user", parts=[types.Part(text=prompt_text)]) + logger.debug(f"#{issue_number}: Initializing runner and session.") try: + runner = InMemoryRunner(agent=root_agent, app_name=APP_NAME) + session = await runner.session_service.create_session( + user_id=USER_ID, app_name=APP_NAME + ) + + prompt_text = f"Audit Issue #{issue_number}." + prompt_message = types.Content( + role="user", parts=[types.Part(text=prompt_text)] + ) + + logger.debug(f"#{issue_number}: Sending prompt to agent.") + async for event in runner.run_async( user_id=USER_ID, session_id=session.id, new_message=prompt_message ): @@ -59,32 +81,48 @@ async def process_single_issue(issue_number: int): ): text = event.content.parts[0].text if text: - logger.info(f"#{issue_number} Decision: {text[:150]}...") + clean_text = text[:150].replace("\n", " ") + logger.info(f"#{issue_number} Decision: {clean_text}...") + except Exception as e: - logger.error(f"Error processing issue #{issue_number}: {e}") - - # --- Logging is now inside this function --- - issue_duration = time.time() - issue_start_time - issue_api_calls = get_api_call_count() + logger.error(f"Error processing issue #{issue_number}: {e}", exc_info=True) + + duration = time.perf_counter() - start_time + + end_api_calls = get_api_call_count() + issue_api_calls = end_api_calls - start_api_calls logger.info( - f"Issue #{issue_number} finished in {issue_duration:.2f} seconds " - f"with {issue_api_calls} API calls." + f"Issue #{issue_number} finished in {duration:.2f}s " + f"with ~{issue_api_calls} API calls." ) - # Return metrics for final summary - return issue_duration, issue_api_calls + + return duration, issue_api_calls async def main(): - """Main function to run the stale issue bot concurrently.""" + """ + Main entry point to run the stale issue bot concurrently. + + Fetches old issues and processes them in batches to respect API rate limits + and concurrency constraints. + """ logger.info(f"--- Starting Stale Bot for {OWNER}/{REPO} ---") logger.info(f"Concurrency level set to {CONCURRENCY_LIMIT}") reset_api_call_count() + filter_days = STALE_HOURS_THRESHOLD / 24 - - all_issues = get_old_open_issue_numbers(OWNER, REPO, days_old=filter_days) + logger.debug(f"Fetching issues older than {filter_days:.2f} days...") + + try: + all_issues = get_old_open_issue_numbers(OWNER, REPO, days_old=filter_days) + except Exception as e: + logger.critical(f"Failed to fetch issue list: {e}", exc_info=True) + return + total_count = len(all_issues) + search_api_calls = get_api_call_count() if total_count == 0: @@ -96,46 +134,59 @@ async def main(): f"(Initial search used {search_api_calls} API calls)." ) - total_processing_time = 0 + total_processing_time = 0.0 total_issue_api_calls = 0 processed_count = 0 - # --- Concurrency Logic --- # Process the list in chunks of size CONCURRENCY_LIMIT for i in range(0, total_count, CONCURRENCY_LIMIT): - chunk = all_issues[i:i + CONCURRENCY_LIMIT] - - # Create a list of tasks for the current chunk + chunk = all_issues[i : i + CONCURRENCY_LIMIT] + current_chunk_num = i // CONCURRENCY_LIMIT + 1 + + logger.info( + f"--- Starting chunk {current_chunk_num}: Processing issues {chunk} ---" + ) + tasks = [process_single_issue(issue_num) for issue_num in chunk] - - logger.info(f"--- Starting chunk {i//CONCURRENCY_LIMIT + 1}: Processing issues {chunk} ---") - - # Run the tasks in the chunk concurrently + results = await asyncio.gather(*tasks) - # Aggregate the results from the chunk for duration, api_calls in results: - total_processing_time += duration - total_issue_api_calls += api_calls + total_processing_time += duration + total_issue_api_calls += api_calls + processed_count += len(chunk) + logger.info( + f"--- Finished chunk {current_chunk_num}. Progress:" + f" {processed_count}/{total_count} ---" + ) - logger.info(f"--- Finished chunk. Processed so far: {processed_count}/{total_count} ---") - - # A small delay between chunks to be respectful to the GitHub API if (i + CONCURRENCY_LIMIT) < total_count: - time.sleep(1.5) + logger.debug("Sleeping for 1.5s to respect rate limits...") + await asyncio.sleep(1.5) total_api_calls_for_run = search_api_calls + total_issue_api_calls - avg_time_per_issue = total_processing_time / total_count if total_count > 0 else 0 + avg_time_per_issue = ( + total_processing_time / total_count if total_count > 0 else 0 + ) logger.info("--- Stale Agent Run Finished ---") logger.info(f"Successfully processed {processed_count} issues.") logger.info(f"Total API calls made this run: {total_api_calls_for_run}") - logger.info(f"Average time per issue: {avg_time_per_issue:.2f} seconds.") + logger.info( + f"Average processing time per issue: {avg_time_per_issue:.2f} seconds." + ) if __name__ == "__main__": - start_time = time.time() - asyncio.run(main()) - duration = time.time() - start_time - logger.info(f"Full audit finished in {duration/60:.2f} minutes.") \ No newline at end of file + start_time = time.perf_counter() + + try: + asyncio.run(main()) + except KeyboardInterrupt: + logger.warning("Bot execution interrupted manually.") + except Exception as e: + logger.critical(f"Unexpected fatal error: {e}", exc_info=True) + + duration = time.perf_counter() - start_time + logger.info(f"Full audit finished in {duration/60:.2f} minutes.") diff --git a/contributing/samples/adk_stale_agent/settings.py b/contributing/samples/adk_stale_agent/settings.py index b7f727452d..d9ed4b65b9 100644 --- a/contributing/samples/adk_stale_agent/settings.py +++ b/contributing/samples/adk_stale_agent/settings.py @@ -46,4 +46,4 @@ # --- Performance Configuration --- # The number of issues to process concurrently. # Higher values are faster but increase the immediate rate of API calls -CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 3)) \ No newline at end of file +CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 3)) diff --git a/contributing/samples/adk_stale_agent/utils.py b/contributing/samples/adk_stale_agent/utils.py index 56d7a3cded..a396c22ac7 100644 --- a/contributing/samples/adk_stale_agent/utils.py +++ b/contributing/samples/adk_stale_agent/utils.py @@ -12,68 +12,84 @@ # See the License for the specific language governing permissions and # limitations under the License. +from datetime import datetime +from datetime import timedelta +from datetime import timezone import logging -from datetime import datetime, timedelta, timezone +import threading +from typing import Any +from typing import Dict +from typing import List +from typing import Optional + +from adk_stale_agent.settings import GITHUB_TOKEN +from adk_stale_agent.settings import STALE_HOURS_THRESHOLD import dateutil.parser -from typing import Any, Dict, List, Optional - import requests -from adk_stale_agent.settings import GITHUB_TOKEN, STALE_HOURS_THRESHOLD from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry -# --- Module-level logger setup --- logger = logging.getLogger("google_adk." + __name__) # --- API Call Counter for Monitoring --- _api_call_count = 0 +_counter_lock = threading.Lock() def get_api_call_count() -> int: - """Returns the total number of API calls made since the last reset.""" + """ + Returns the total number of API calls made since the last reset. + + Returns: + int: The global count of API calls. + """ + with _counter_lock: return _api_call_count def reset_api_call_count() -> None: - """Resets the global API call counter to zero.""" - global _api_call_count + """Resets the global API call counter to zero.""" + global _api_call_count + with _counter_lock: _api_call_count = 0 def _increment_api_call_count() -> None: - """Atomically increments the global API call counter.""" - global _api_call_count + """ + Atomically increments the global API call counter. + Required because the agent may run tools in parallel threads. + """ + global _api_call_count + with _counter_lock: _api_call_count += 1 # --- Production-Ready HTTP Session with Exponential Backoff --- -# Configure the retry strategy. This implements exponential backoff automatically. -# - total=6: Allow up to 6 total retries. -# - backoff_factor=2: A key factor for exponential delay. The time between retries -# will be {backoff_factor} * (2 ** ({number_of_retries} - 1)). -# e.g., waits for [2s, 4s, 8s, 16s, 32s] between retries. -# - status_forcelist: A set of HTTP status codes that will trigger a retry. -# These are common codes for temporary server errors or rate limiting. +# Configure the retry strategy: retry_strategy = Retry( total=6, backoff_factor=2, status_forcelist=[429, 500, 502, 503, 504], - allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "OPTIONS", "TRACE", "PATCH"], + allowed_methods=[ + "HEAD", + "GET", + "POST", + "PUT", + "DELETE", + "OPTIONS", + "TRACE", + "PATCH", + ], ) -# Create an adapter with the retry strategy. adapter = HTTPAdapter(max_retries=retry_strategy) -# Create a single, reusable Session object for the entire application. -# This is crucial for performance as it enables connection pooling. +# Create a single, reusable Session object for connection pooling _session = requests.Session() - -# Mount the adapter to the session for both http and https protocols. _session.mount("https://", adapter) _session.mount("http://", adapter) -# Set common headers for all requests made with this session. _session.headers.update({ "Authorization": f"token {GITHUB_TOKEN}", "Accept": "application/vnd.github.v3+json", @@ -81,177 +97,164 @@ def _increment_api_call_count() -> None: def get_request(url: str, params: Optional[Dict[str, Any]] = None) -> Any: - """ - Sends a GET request to the GitHub API with configured retries. + """ + Sends a GET request to the GitHub API with automatic retries. - Args: - url: The URL endpoint to send the request to. - params: An optional dictionary of URL parameters. + Args: + url (str): The URL endpoint. + params (Optional[Dict[str, Any]]): Query parameters. - Returns: - The JSON response from the API as a dictionary or list. + Returns: + Any: The JSON response parsed into a dict or list. - Raises: - requests.exceptions.RequestException: For network errors or HTTP status - codes that are not resolved by retries. - """ - _increment_api_call_count() - try: - response = _session.get(url, params=params or {}, timeout=60) - response.raise_for_status() # Raise an exception for HTTP error codes - return response.json() - except requests.exceptions.RequestException as e: - logger.error(f"GET request failed for {url}: {e}") - raise + Raises: + requests.exceptions.RequestException: If retries are exhausted. + """ + _increment_api_call_count() + try: + response = _session.get(url, params=params or {}, timeout=60) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + logger.error(f"GET request failed for {url}: {e}") + raise def post_request(url: str, payload: Any) -> Any: - """ - Sends a POST request to the GitHub API with configured retries. + """ + Sends a POST request to the GitHub API with automatic retries. + + Args: + url (str): The URL endpoint. + payload (Any): The JSON payload. + + Returns: + Any: The JSON response. + """ + _increment_api_call_count() + try: + response = _session.post(url, json=payload, timeout=60) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + logger.error(f"POST request failed for {url}: {e}") + raise - Args: - url: The URL endpoint to send the request to. - payload: The JSON payload to send with the request. - Returns: - The JSON response from the API as a dictionary or list. +def patch_request(url: str, payload: Any) -> Any: + """ + Sends a PATCH request to the GitHub API with automatic retries. + + Args: + url (str): The URL endpoint. + payload (Any): The JSON payload. + + Returns: + Any: The JSON response. + """ + _increment_api_call_count() + try: + response = _session.patch(url, json=payload, timeout=60) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + logger.error(f"PATCH request failed for {url}: {e}") + raise - Raises: - requests.exceptions.RequestException: For network errors or HTTP status - codes that are not resolved by retries. - """ - _increment_api_call_count() - try: - response = _session.post(url, json=payload, timeout=60) - response.raise_for_status() - return response.json() - except requests.exceptions.RequestException as e: - logger.error(f"POST request failed for {url}: {e}") - raise +def delete_request(url: str) -> Any: + """ + Sends a DELETE request to the GitHub API with automatic retries. + + Args: + url (str): The URL endpoint. + + Returns: + Any: A success dict if 204, else the JSON response. + """ + _increment_api_call_count() + try: + response = _session.delete(url, timeout=60) + response.raise_for_status() + if response.status_code == 204: + return {"status": "success", "message": "Deletion successful."} + return response.json() + except requests.exceptions.RequestException as e: + logger.error(f"DELETE request failed for {url}: {e}") + raise -def patch_request(url: str, payload: Any) -> Any: - """ - Sends a PATCH request to the GitHub API with configured retries. - Args: - url: The URL endpoint to send the request to. - payload: The JSON payload to send with the request. +def error_response(error_message: str) -> Dict[str, Any]: + """ + Creates a standardized error response dictionary for tool outputs. - Returns: - The JSON response from the API as a dictionary or list. + Args: + error_message (str): The error details. - Raises: - requests.exceptions.RequestException: For network errors or HTTP status - codes that are not resolved by retries. - """ - _increment_api_call_count() - try: - response = _session.patch(url, json=payload, timeout=60) - response.raise_for_status() - return response.json() - except requests.exceptions.RequestException as e: - logger.error(f"PATCH request failed for {url}: {e}") - raise + Returns: + Dict[str, Any]: Standardized error object. + """ + return {"status": "error", "message": error_message} -def delete_request(url: str) -> Any: - """ - Sends a DELETE request to the GitHub API with configured retries. +def get_old_open_issue_numbers( + owner: str, repo: str, days_old: Optional[float] = None +) -> List[int]: + """ + Finds open issues older than the specified threshold using server-side filtering. + + OPTIMIZATION: + Instead of fetching ALL issues and filtering in Python (which wastes API calls), + this uses the GitHub Search API `created: Dict[str, Any]: - """ - Creates a standardized error response dictionary for tool outputs. + for item in items: + if "pull_request" not in item: + issue_numbers.append(item["number"]) - Args: - error_message: A descriptive message of the error that occurred. + if len(items) < 100: + break - Returns: - A dictionary containing the error status and message. - """ - return {"status": "error", "message": error_message} + page += 1 + except requests.exceptions.RequestException as e: + logger.error(f"GitHub search failed on page {page}: {e}") + break -def get_old_open_issue_numbers( - owner: str, repo: str, days_old: Optional[float] = None -) -> List[int]: - """ - Finds open issues older than the precise `days_old` threshold. - - This function first fetches ALL open issues from the repository and then - applies a precise, client-side filter to find the ones that are - older than the specified threshold. - """ - if days_old is None: - days_old = STALE_HOURS_THRESHOLD / 24 - - # 1. Calculate the PRECISE cutoff time in UTC. - now_utc = datetime.now(timezone.utc) - precise_cutoff_datetime = now_utc - timedelta(days=days_old) - - # 2. Build a query to get ALL open issues. The date filter is removed. - query = f"repo:{owner}/{repo} is:issue state:open" - logger.info(f"Fetching all open issues from '{owner}/{repo}'...") - - all_open_issues = [] - page = 1 - url = "https://api.github.com/search/issues" - - # Stage 1: Fetch all open issues via API - while True: - params = {"q": query, "per_page": 100, "page": page} - try: - data = get_request(url, params=params) - items = data.get("items", []) - if not items: - break - - all_open_issues.extend(items) - - if len(items) < 100: - break - page += 1 - except requests.exceptions.RequestException as e: - logger.error(f"GitHub search failed on page {page}: {e}") - break - - logger.info( - f"Fetched {len(all_open_issues)} total open issues. " - f"Now filtering for those created before: {precise_cutoff_datetime.isoformat()}" - ) - - # Stage 2: Apply the precise time filter in Python - final_issue_numbers = [] - for item in all_open_issues: - if "pull_request" in item: - continue - - issue_creation_time = dateutil.parser.isoparse(item["created_at"]) - - if issue_creation_time < precise_cutoff_datetime: - final_issue_numbers.append(item["number"]) - - logger.info(f"Found {len(final_issue_numbers)} issues that are older than the threshold.") - return final_issue_numbers \ No newline at end of file + logger.info(f"Found {len(issue_numbers)} stale issues.") + return issue_numbers diff --git a/contributing/samples/gepa/experiment.py b/contributing/samples/gepa/experiment.py index 2f5d03a772..f68b349d9c 100644 --- a/contributing/samples/gepa/experiment.py +++ b/contributing/samples/gepa/experiment.py @@ -43,7 +43,6 @@ from tau_bench.types import EnvRunResult from tau_bench.types import RunConfig import tau_bench_agent as tau_bench_agent_lib - import utils diff --git a/contributing/samples/gepa/run_experiment.py b/contributing/samples/gepa/run_experiment.py index cfd850b3a3..1bc4ee58c8 100644 --- a/contributing/samples/gepa/run_experiment.py +++ b/contributing/samples/gepa/run_experiment.py @@ -25,7 +25,6 @@ from absl import flags import experiment from google.genai import types - import utils _OUTPUT_DIR = flags.DEFINE_string( From 2080b4c1f3dba38c76a3645473c53e32d2ad78a4 Mon Sep 17 00:00:00 2001 From: Rohit Yanamadala Date: Mon, 24 Nov 2025 11:58:12 -0800 Subject: [PATCH 5/9] Feat:add stale agent workflow (testing) --- .github/workflows/stale-bot.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/stale-bot.yml b/.github/workflows/stale-bot.yml index 6948b56459..53e1be7c6d 100644 --- a/.github/workflows/stale-bot.yml +++ b/.github/workflows/stale-bot.yml @@ -2,6 +2,9 @@ name: ADK Stale Issue Auditor on: workflow_dispatch: + push: + branches: + - feat/improve-stale-agent schedule: # This runs at 6:00 AM UTC (10 PM PST) From bf6249e75ba7502ec9dba7ee78d384fe06307c97 Mon Sep 17 00:00:00 2001 From: Rohit Yanamadala Date: Mon, 24 Nov 2025 13:15:58 -0800 Subject: [PATCH 6/9] Feat:modify stale agent workflow --- .github/workflows/stale-bot.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/stale-bot.yml b/.github/workflows/stale-bot.yml index 53e1be7c6d..6948b56459 100644 --- a/.github/workflows/stale-bot.yml +++ b/.github/workflows/stale-bot.yml @@ -2,9 +2,6 @@ name: ADK Stale Issue Auditor on: workflow_dispatch: - push: - branches: - - feat/improve-stale-agent schedule: # This runs at 6:00 AM UTC (10 PM PST) From 0e30a65c8cce83944b8db87be3d66c575d9a8432 Mon Sep 17 00:00:00 2001 From: Rohit Yanamadala Date: Mon, 24 Nov 2025 15:38:30 -0800 Subject: [PATCH 7/9] feat: make GraphQL limits and sleep duration configurable --- contributing/samples/adk_stale_agent/agent.py | 22 ++++++++++++++----- contributing/samples/adk_stale_agent/main.py | 5 +++-- .../samples/adk_stale_agent/settings.py | 14 ++++++++++++ 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/contributing/samples/adk_stale_agent/agent.py b/contributing/samples/adk_stale_agent/agent.py index 9948f26584..305bcbeae7 100644 --- a/contributing/samples/adk_stale_agent/agent.py +++ b/contributing/samples/adk_stale_agent/agent.py @@ -29,6 +29,9 @@ from adk_stale_agent.settings import REQUEST_CLARIFICATION_LABEL from adk_stale_agent.settings import STALE_HOURS_THRESHOLD from adk_stale_agent.settings import STALE_LABEL_NAME +from adk_stale_agent.settings import GRAPHQL_COMMENT_LIMIT +from adk_stale_agent.settings import GRAPHQL_EDIT_LIMIT +from adk_stale_agent.settings import GRAPHQL_TIMELINE_LIMIT from adk_stale_agent.utils import delete_request from adk_stale_agent.utils import error_response from adk_stale_agent.utils import get_request @@ -148,8 +151,8 @@ def get_issue_state(item_number: int) -> Dict[str, Any]: createdAt labels(first: 20) { nodes { name } } - # 1. Comments (Fetch last 30 to scan for previous bot alerts) - comments(last: 30) { + # 1. Comments (Fetch last commentLimit) + comments(last: $commentLimit) { nodes { author { login } body @@ -158,8 +161,8 @@ def get_issue_state(item_number: int) -> Dict[str, Any]: } } - # 2. Description Edits (Fetch last 10) - userContentEdits(last: 10) { + # 2. Description Edits (Fetch last editLimit) + userContentEdits(last: $editLimit) { nodes { editor { login } editedAt @@ -167,7 +170,7 @@ def get_issue_state(item_number: int) -> Dict[str, Any]: } # 3. Timeline Events (Renames, Reopens, Labels) - timelineItems(itemTypes: [LABELED_EVENT, RENAMED_TITLE_EVENT, REOPENED_EVENT], last: 20) { + timelineItems(itemTypes: [LABELED_EVENT, RENAMED_TITLE_EVENT, REOPENED_EVENT], last: $timelineLimit) { nodes { __typename ... on LabeledEvent { @@ -190,7 +193,14 @@ def get_issue_state(item_number: int) -> Dict[str, Any]: } """ - variables = {"owner": OWNER, "name": REPO, "number": item_number} + variables = { + "owner": OWNER, + "name": REPO, + "number": item_number, + "commentLimit": GRAPHQL_COMMENT_LIMIT, + "editLimit": GRAPHQL_EDIT_LIMIT, + "timelineLimit": GRAPHQL_TIMELINE_LIMIT + } try: response = post_request( diff --git a/contributing/samples/adk_stale_agent/main.py b/contributing/samples/adk_stale_agent/main.py index 36a2a691c6..63e818536a 100644 --- a/contributing/samples/adk_stale_agent/main.py +++ b/contributing/samples/adk_stale_agent/main.py @@ -22,6 +22,7 @@ from adk_stale_agent.settings import OWNER from adk_stale_agent.settings import REPO from adk_stale_agent.settings import STALE_HOURS_THRESHOLD +from adk_stale_agent.settings import SLEEP_BETWEEN_CHUNKS from adk_stale_agent.utils import get_api_call_count from adk_stale_agent.utils import get_old_open_issue_numbers from adk_stale_agent.utils import reset_api_call_count @@ -162,8 +163,8 @@ async def main(): ) if (i + CONCURRENCY_LIMIT) < total_count: - logger.debug("Sleeping for 1.5s to respect rate limits...") - await asyncio.sleep(1.5) + logger.debug(f"Sleeping for {SLEEP_BETWEEN_CHUNKS}s to respect rate limits...") + await asyncio.sleep(SLEEP_BETWEEN_CHUNKS) total_api_calls_for_run = search_api_calls + total_issue_api_calls avg_time_per_issue = ( diff --git a/contributing/samples/adk_stale_agent/settings.py b/contributing/samples/adk_stale_agent/settings.py index d9ed4b65b9..eff5b6267f 100644 --- a/contributing/samples/adk_stale_agent/settings.py +++ b/contributing/samples/adk_stale_agent/settings.py @@ -47,3 +47,17 @@ # The number of issues to process concurrently. # Higher values are faster but increase the immediate rate of API calls CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 3)) + +# --- GraphQL Query Limits --- +# The number of most recent comments to fetch for context analysis. +GRAPHQL_COMMENT_LIMIT = int(os.getenv("GRAPHQL_COMMENT_LIMIT", 30)) + +# The number of most recent description edits to fetch. +GRAPHQL_EDIT_LIMIT = int(os.getenv("GRAPHQL_EDIT_LIMIT", 10)) + +# The number of most recent timeline events (labels, renames, reopens) to fetch. +GRAPHQL_TIMELINE_LIMIT = int(os.getenv("GRAPHQL_TIMELINE_LIMIT", 20)) + +# --- Rate Limiting --- +# Time in seconds to wait between processing chunks. +SLEEP_BETWEEN_CHUNKS = float(os.getenv("SLEEP_BETWEEN_CHUNKS", 1.5)) \ No newline at end of file From b1167ddea27d51e5168c9cd8f02471dd47a79937 Mon Sep 17 00:00:00 2001 From: Rohit Yanamadala Date: Mon, 24 Nov 2025 15:51:33 -0800 Subject: [PATCH 8/9] refactor(agent): reuse runner instance and extract date formatter --- contributing/samples/adk_stale_agent/agent.py | 34 ++++++++++++------- contributing/samples/adk_stale_agent/main.py | 6 ++-- .../samples/adk_stale_agent/settings.py | 2 +- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/contributing/samples/adk_stale_agent/agent.py b/contributing/samples/adk_stale_agent/agent.py index 305bcbeae7..0a0686f2e0 100644 --- a/contributing/samples/adk_stale_agent/agent.py +++ b/contributing/samples/adk_stale_agent/agent.py @@ -23,15 +23,15 @@ from adk_stale_agent.settings import CLOSE_HOURS_AFTER_STALE_THRESHOLD from adk_stale_agent.settings import GITHUB_BASE_URL +from adk_stale_agent.settings import GRAPHQL_COMMENT_LIMIT +from adk_stale_agent.settings import GRAPHQL_EDIT_LIMIT +from adk_stale_agent.settings import GRAPHQL_TIMELINE_LIMIT from adk_stale_agent.settings import LLM_MODEL_NAME from adk_stale_agent.settings import OWNER from adk_stale_agent.settings import REPO from adk_stale_agent.settings import REQUEST_CLARIFICATION_LABEL from adk_stale_agent.settings import STALE_HOURS_THRESHOLD from adk_stale_agent.settings import STALE_LABEL_NAME -from adk_stale_agent.settings import GRAPHQL_COMMENT_LIMIT -from adk_stale_agent.settings import GRAPHQL_EDIT_LIMIT -from adk_stale_agent.settings import GRAPHQL_TIMELINE_LIMIT from adk_stale_agent.utils import delete_request from adk_stale_agent.utils import error_response from adk_stale_agent.utils import get_request @@ -199,7 +199,7 @@ def get_issue_state(item_number: int) -> Dict[str, Any]: "number": item_number, "commentLimit": GRAPHQL_COMMENT_LIMIT, "editLimit": GRAPHQL_EDIT_LIMIT, - "timelineLimit": GRAPHQL_TIMELINE_LIMIT + "timelineLimit": GRAPHQL_TIMELINE_LIMIT, } try: @@ -383,6 +383,18 @@ def get_issue_state(item_number: int) -> Dict[str, Any]: # --- Tool Definitions --- +def _format_days(hours: float) -> str: + """ + Formats a duration in hours into a clean day string. + + Example: + 168.0 -> "7" + 12.0 -> "0.5" + """ + days = hours / 24 + return f"{days:.1f}" if days % 1 != 0 else f"{int(days)}" + + def add_label_to_issue(item_number: int, label_name: str) -> dict[str, Any]: """ Adds a label to the issue. @@ -426,15 +438,14 @@ def add_stale_label_and_comment(item_number: int) -> dict[str, Any]: Args: item_number (int): The GitHub issue number. """ - # Format days cleanly (e.g., "7" instead of "7.0", or "0.5" instead of "0") - days = CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24 - days_str = f"{days:.1f}" if days % 1 != 0 else f"{int(days)}" + stale_days_str = _format_days(STALE_HOURS_THRESHOLD) + close_days_str = _format_days(CLOSE_HOURS_AFTER_STALE_THRESHOLD) comment = ( "This issue has been automatically marked as stale because it has not" - " had recent activity after a maintainer requested clarification. It" - " will be closed if no further activity occurs within" - f" {days_str} days." + f" had recent activity for {stale_days_str} days after a maintainer" + " requested clarification. It will be closed if no further activity" + f" occurs within {close_days_str} days." ) try: post_request( @@ -476,8 +487,7 @@ def close_as_stale(item_number: int) -> dict[str, Any]: Args: item_number (int): The GitHub issue number. """ - days = CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24 - days_str = f"{days:.1f}" if days % 1 != 0 else f"{int(days)}" + days_str = _format_days(CLOSE_HOURS_AFTER_STALE_THRESHOLD) comment = ( "This has been automatically closed because it has been marked as stale" diff --git a/contributing/samples/adk_stale_agent/main.py b/contributing/samples/adk_stale_agent/main.py index 63e818536a..d4fe58dd63 100644 --- a/contributing/samples/adk_stale_agent/main.py +++ b/contributing/samples/adk_stale_agent/main.py @@ -21,8 +21,8 @@ from adk_stale_agent.settings import CONCURRENCY_LIMIT from adk_stale_agent.settings import OWNER from adk_stale_agent.settings import REPO -from adk_stale_agent.settings import STALE_HOURS_THRESHOLD from adk_stale_agent.settings import SLEEP_BETWEEN_CHUNKS +from adk_stale_agent.settings import STALE_HOURS_THRESHOLD from adk_stale_agent.utils import get_api_call_count from adk_stale_agent.utils import get_old_open_issue_numbers from adk_stale_agent.utils import reset_api_call_count @@ -163,7 +163,9 @@ async def main(): ) if (i + CONCURRENCY_LIMIT) < total_count: - logger.debug(f"Sleeping for {SLEEP_BETWEEN_CHUNKS}s to respect rate limits...") + logger.debug( + f"Sleeping for {SLEEP_BETWEEN_CHUNKS}s to respect rate limits..." + ) await asyncio.sleep(SLEEP_BETWEEN_CHUNKS) total_api_calls_for_run = search_api_calls + total_issue_api_calls diff --git a/contributing/samples/adk_stale_agent/settings.py b/contributing/samples/adk_stale_agent/settings.py index eff5b6267f..599c6ef2ea 100644 --- a/contributing/samples/adk_stale_agent/settings.py +++ b/contributing/samples/adk_stale_agent/settings.py @@ -60,4 +60,4 @@ # --- Rate Limiting --- # Time in seconds to wait between processing chunks. -SLEEP_BETWEEN_CHUNKS = float(os.getenv("SLEEP_BETWEEN_CHUNKS", 1.5)) \ No newline at end of file +SLEEP_BETWEEN_CHUNKS = float(os.getenv("SLEEP_BETWEEN_CHUNKS", 1.5)) From 888064eff125ae74f7c3a9ad6c74f98de80243a2 Mon Sep 17 00:00:00 2001 From: Rohit Yanamadala Date: Mon, 24 Nov 2025 16:08:16 -0800 Subject: [PATCH 9/9] refactor(agent): decompose issue analysis logic and standardize formatting --- contributing/samples/adk_stale_agent/agent.py | 393 ++++++++++-------- 1 file changed, 230 insertions(+), 163 deletions(-) diff --git a/contributing/samples/adk_stale_agent/agent.py b/contributing/samples/adk_stale_agent/agent.py index 0a0686f2e0..5235e0352f 100644 --- a/contributing/samples/adk_stale_agent/agent.py +++ b/contributing/samples/adk_stale_agent/agent.py @@ -20,6 +20,7 @@ from typing import Dict from typing import List from typing import Optional +from typing import Tuple from adk_stale_agent.settings import CLOSE_HOURS_AFTER_STALE_THRESHOLD from adk_stale_agent.settings import GITHUB_BASE_URL @@ -119,39 +120,28 @@ def load_prompt_template(filename: str) -> str: PROMPT_TEMPLATE = load_prompt_template("PROMPT_INSTRUCTION.txt") -def get_issue_state(item_number: int) -> Dict[str, Any]: +def _fetch_graphql_data(item_number: int) -> Dict[str, Any]: """ - Retrieves the comprehensive state of a GitHub issue using GraphQL. - - This function constructs a unified timeline of comments, body edits, - renames, and reopens to determine who the *absolute last* actor was. - It handles 'Ghost Edits' (description updates without comments) and - prevents spamming alerts if the bot has already notified maintainers. + Executes the GraphQL query to fetch raw issue data, including comments, + edits, and timeline events. Args: item_number (int): The GitHub issue number. Returns: - Dict[str, Any]: A dictionary containing metadata for the LLM. - - last_action_role (str): 'author', 'maintainer', or 'other_user'. - - is_stale (bool): Whether the issue is currently marked stale. - - maintainer_alert_needed (bool): True if a silent edit needs an alert. - - days_since_activity (float): Days since the last human action. - - last_comment_text (str|None): The text of the last comment, if applicable. - - current_labels (List[str]): List of labels on the issue. - """ - maintainers = _get_cached_maintainers() + Dict[str, Any]: The raw 'issue' object from the GraphQL response. - # GraphQL Query: Fetches Comments, Edits, and Timeline Events in one go. + Raises: + RequestException: If the GraphQL query returns errors or the issue is not found. + """ query = """ - query($owner: String!, $name: String!, $number: Int!) { + query($owner: String!, $name: String!, $number: Int!, $commentLimit: Int!, $timelineLimit: Int!) { repository(owner: $owner, name: $name) { issue(number: $number) { author { login } createdAt labels(first: 20) { nodes { name } } - # 1. Comments (Fetch last commentLimit) comments(last: $commentLimit) { nodes { author { login } @@ -161,7 +151,6 @@ def get_issue_state(item_number: int) -> Dict[str, Any]: } } - # 2. Description Edits (Fetch last editLimit) userContentEdits(last: $editLimit) { nodes { editor { login } @@ -169,7 +158,6 @@ def get_issue_state(item_number: int) -> Dict[str, Any]: } } - # 3. Timeline Events (Renames, Reopens, Labels) timelineItems(itemTypes: [LABELED_EVENT, RENAMED_TITLE_EVENT, REOPENED_EVENT], last: $timelineLimit) { nodes { __typename @@ -202,153 +190,209 @@ def get_issue_state(item_number: int) -> Dict[str, Any]: "timelineLimit": GRAPHQL_TIMELINE_LIMIT, } - try: - response = post_request( - f"{GITHUB_BASE_URL}/graphql", {"query": query, "variables": variables} - ) + response = post_request( + f"{GITHUB_BASE_URL}/graphql", {"query": query, "variables": variables} + ) - if "errors" in response: - msg = response["errors"][0]["message"] - return error_response(f"GraphQL Error: {msg}") - - data = response.get("data", {}).get("repository", {}).get("issue", {}) - if not data: - return error_response(f"Issue #{item_number} not found.") - - issue_author = data.get("author", {}).get("login") - labels_list = [l["name"] for l in data.get("labels", {}).get("nodes", [])] - - # Unified list of ALL events to replay history chronologically. - history = [] - last_bot_alert_time = None - - # 1. Baseline: Issue Creation - history.append({ - "type": "created", - "actor": issue_author, - "time": dateutil.parser.isoparse(data["createdAt"]), - "data": None, - }) - - # 2. Process Comments - for c in data.get("comments", {}).get("nodes", []): - if not c: - continue - - actor = c.get("author", {}).get("login") - c_body = c.get("body", "") - c_time = dateutil.parser.isoparse(c.get("createdAt")) - - if BOT_ALERT_SIGNATURE in c_body: - if last_bot_alert_time is None or c_time > last_bot_alert_time: - last_bot_alert_time = c_time - - if actor and not actor.endswith("[bot]"): - e_time = c.get("lastEditedAt") - actual_time = dateutil.parser.isoparse(e_time) if e_time else c_time - history.append({ - "type": "commented", - "actor": actor, - "time": actual_time, - "data": c_body, - }) - - # 3. Process Body Edits - for e in data.get("userContentEdits", {}).get("nodes", []): - if not e: - continue - - actor = e.get("editor", {}).get("login") - if actor and not actor.endswith("[bot]"): - history.append({ - "type": "edited_description", - "actor": actor, - "time": dateutil.parser.isoparse(e.get("editedAt")), - "data": None, - }) - - # 4. Process Timeline Events (Labels, Renames, Reopens) - label_events = [] - for t in data.get("timelineItems", {}).get("nodes", []): - if not t: - continue - - etype = t.get("__typename") - actor = t.get("actor", {}).get("login") - time_val = dateutil.parser.isoparse(t.get("createdAt")) - - if etype == "LabeledEvent": - if t.get("label", {}).get("name") == STALE_LABEL_NAME: - label_events.append(time_val) - continue - - if actor and not actor.endswith("[bot]"): - pretty_type = ( - "renamed_title" if etype == "RenamedTitleEvent" else "reopened" - ) - history.append({ - "type": pretty_type, - "actor": actor, - "time": time_val, - "data": None, - }) - - # --- History Replay (Chronological Sort) --- - history.sort(key=lambda x: x["time"]) - - last_action_role = "author" - last_activity_time = history[0]["time"] - last_action_type = "created" - last_comment_text = None - - logger.debug(f"--- Activity Trace for #{item_number} ---") - - for event in history: - actor = event["actor"] - etype = event["type"] - - role = "other_user" - if actor == issue_author: - role = "author" - elif actor in maintainers: - role = "maintainer" - - logger.debug( - f" [{event['time'].strftime('%m-%d %H:%M')}] " - f"{etype.upper()} by {actor} ({role})" + if "errors" in response: + raise RequestException(f"GraphQL Error: {response['errors'][0]['message']}") + + data = response.get("data", {}).get("repository", {}).get("issue", {}) + if not data: + raise RequestException(f"Issue #{item_number} not found.") + + return data + + +def _build_history_timeline( + data: Dict[str, Any], +) -> Tuple[List[Dict[str, Any]], List[datetime], Optional[datetime]]: + """ + Parses raw GraphQL data into a unified, chronologically sorted history list. + Also extracts specific event times needed for logic checks. + + Args: + data (Dict[str, Any]): The raw issue data from `_fetch_graphql_data`. + + Returns: + Tuple[List[Dict], List[datetime], Optional[datetime]]: + - history: A list of normalized event dictionaries sorted by time. + - label_events: A list of timestamps when the stale label was applied. + - last_bot_alert_time: Timestamp of the last bot silent-edit alert (if any). + """ + issue_author = data.get("author", {}).get("login") + history = [] + label_events = [] + last_bot_alert_time = None + + # 1. Baseline: Issue Creation + history.append({ + "type": "created", + "actor": issue_author, + "time": dateutil.parser.isoparse(data["createdAt"]), + "data": None, + }) + + # 2. Process Comments + for c in data.get("comments", {}).get("nodes", []): + if not c: + continue + + actor = c.get("author", {}).get("login") + c_body = c.get("body", "") + c_time = dateutil.parser.isoparse(c.get("createdAt")) + + # Track bot alerts for spam prevention + if BOT_ALERT_SIGNATURE in c_body: + if last_bot_alert_time is None or c_time > last_bot_alert_time: + last_bot_alert_time = c_time + + if actor and not actor.endswith("[bot]"): + # Use edit time if available, otherwise creation time + e_time = c.get("lastEditedAt") + actual_time = dateutil.parser.isoparse(e_time) if e_time else c_time + history.append({ + "type": "commented", + "actor": actor, + "time": actual_time, + "data": c_body, + }) + + # 3. Process Body Edits ("Ghost Edits") + for e in data.get("userContentEdits", {}).get("nodes", []): + if not e: + continue + actor = e.get("editor", {}).get("login") + if actor and not actor.endswith("[bot]"): + history.append({ + "type": "edited_description", + "actor": actor, + "time": dateutil.parser.isoparse(e.get("editedAt")), + "data": None, + }) + + # 4. Process Timeline Events + for t in data.get("timelineItems", {}).get("nodes", []): + if not t: + continue + + etype = t.get("__typename") + actor = t.get("actor", {}).get("login") + time_val = dateutil.parser.isoparse(t.get("createdAt")) + + if etype == "LabeledEvent": + if t.get("label", {}).get("name") == STALE_LABEL_NAME: + label_events.append(time_val) + continue + + if actor and not actor.endswith("[bot]"): + pretty_type = ( + "renamed_title" if etype == "RenamedTitleEvent" else "reopened" ) + history.append({ + "type": pretty_type, + "actor": actor, + "time": time_val, + "data": None, + }) - last_action_role = role - last_activity_time = event["time"] - last_action_type = etype + # Sort chronologically + history.sort(key=lambda x: x["time"]) + return history, label_events, last_bot_alert_time - if etype == "commented": - last_comment_text = event["data"] - else: - last_comment_text = None - # --- Spam Prevention / Alert Logic --- - maintainer_alert_needed = False - if ( - last_action_role in ["author", "other_user"] - and last_action_type == "edited_description" - ): - if last_bot_alert_time and last_bot_alert_time > last_activity_time: - maintainer_alert_needed = False - logger.info( - f"#{item_number}: Silent edit detected, but Bot already alerted at " - f"{last_bot_alert_time.strftime('%m-%d %H:%M')}. No spam." - ) - else: - maintainer_alert_needed = True - logger.info(f"#{item_number}: Silent edit detected. Alert needed.") +def _replay_history_to_find_state( + history: List[Dict[str, Any]], maintainers: List[str], issue_author: str +) -> Dict[str, Any]: + """ + Replays the unified event history to determine the absolute last actor and their role. - # --- Final Metric Calculations --- + Args: + history (List[Dict]): Chronologically sorted list of events. + maintainers (List[str]): List of maintainer usernames. + issue_author (str): Username of the issue author. + + Returns: + Dict[str, Any]: A dictionary containing the last state of the issue: + - last_action_role (str): 'author', 'maintainer', or 'other_user'. + - last_activity_time (datetime): Timestamp of the last human action. + - last_action_type (str): The type of the last action (e.g., 'commented'). + - last_comment_text (Optional[str]): The text of the last comment. + """ + last_action_role = "author" + last_activity_time = history[0]["time"] + last_action_type = "created" + last_comment_text = None + + for event in history: + actor = event["actor"] + etype = event["type"] + + role = "other_user" + if actor == issue_author: + role = "author" + elif actor in maintainers: + role = "maintainer" + + last_action_role = role + last_activity_time = event["time"] + last_action_type = etype + + # Only store text if it was a comment (resets on other events like labels/edits) + if etype == "commented": + last_comment_text = event["data"] + else: + last_comment_text = None + + return { + "last_action_role": last_action_role, + "last_activity_time": last_activity_time, + "last_action_type": last_action_type, + "last_comment_text": last_comment_text, + } + + +def get_issue_state(item_number: int) -> Dict[str, Any]: + """ + Retrieves the comprehensive state of a GitHub issue using GraphQL. + + This function orchestrates the fetching, parsing, and analysis of the issue's + history to determine if it is stale, active, or pending maintainer review. + + Args: + item_number (int): The GitHub issue number. + + Returns: + Dict[str, Any]: A comprehensive state dictionary for the LLM agent. + Contains keys such as 'last_action_role', 'is_stale', 'days_since_activity', + and 'maintainer_alert_needed'. + """ + try: + maintainers = _get_cached_maintainers() + + # 1. Fetch + raw_data = _fetch_graphql_data(item_number) + + issue_author = raw_data.get("author", {}).get("login") + labels_list = [ + l["name"] for l in raw_data.get("labels", {}).get("nodes", []) + ] + + # 2. Parse & Sort + history, label_events, last_bot_alert_time = _build_history_timeline( + raw_data + ) + + # 3. Analyze (Replay) + state = _replay_history_to_find_state(history, maintainers, issue_author) + + # 4. Final Calculations & Alert Logic current_time = datetime.now(timezone.utc) days_since_activity = ( - current_time - last_activity_time + current_time - state["last_activity_time"] ).total_seconds() / 86400 + # Stale Checks is_stale = STALE_LABEL_NAME in labels_list days_since_stale_label = 0.0 if is_stale and label_events: @@ -357,20 +401,38 @@ def get_issue_state(item_number: int) -> Dict[str, Any]: current_time - latest_label_time ).total_seconds() / 86400 + # Silent Edit Alert Logic + maintainer_alert_needed = False + if ( + state["last_action_role"] in ["author", "other_user"] + and state["last_action_type"] == "edited_description" + ): + if ( + last_bot_alert_time + and last_bot_alert_time > state["last_activity_time"] + ): + logger.info( + f"#{item_number}: Silent edit detected, but Bot already alerted. No" + " spam." + ) + else: + maintainer_alert_needed = True + logger.info(f"#{item_number}: Silent edit detected. Alert needed.") + logger.debug( - f" -> FINAL VERDICT: Last Actor = {last_action_role.upper()}, " - f"Idle = {days_since_activity:.2f} days" + f"#{item_number} VERDICT: Role={state['last_action_role']}, " + f"Idle={days_since_activity:.2f}d" ) return { "status": "success", - "last_action_role": last_action_role, - "last_action_type": last_action_type, + "last_action_role": state["last_action_role"], + "last_action_type": state["last_action_type"], "maintainer_alert_needed": maintainer_alert_needed, "is_stale": is_stale, "days_since_activity": days_since_activity, "days_since_stale_label": days_since_stale_label, - "last_comment_text": last_comment_text, + "last_comment_text": state["last_comment_text"], "current_labels": labels_list, "stale_threshold_days": STALE_HOURS_THRESHOLD / 24, "close_threshold_days": CLOSE_HOURS_AFTER_STALE_THRESHOLD / 24, @@ -378,6 +440,11 @@ def get_issue_state(item_number: int) -> Dict[str, Any]: except RequestException as e: return error_response(f"Network Error: {e}") + except Exception as e: + logger.error( + f"Unexpected error analyzing #{item_number}: {e}", exc_info=True + ) + return error_response(f"Analysis Error: {e}") # --- Tool Definitions ---