Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport #49314 to 23.4: Fallback auth gh api #49333

Merged
merged 1 commit into from Apr 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
53 changes: 47 additions & 6 deletions tests/ci/build_download_helper.py
Expand Up @@ -6,11 +6,12 @@
import sys
import time
from pathlib import Path
from typing import Any, Callable, List, Optional
from typing import Any, Callable, List

import requests # type: ignore

from ci_config import CI_CONFIG
from get_robot_token import ROBOT_TOKEN, get_best_robot_token

DOWNLOAD_RETRIES_COUNT = 5

Expand All @@ -24,22 +25,62 @@ def get_with_retries(
logging.info(
"Getting URL with %i tries and sleep %i in between: %s", retries, sleep, url
)
exc = None # type: Optional[Exception]
exc = Exception("A placeholder to satisfy typing and avoid nesting")
for i in range(retries):
try:
response = requests.get(url, **kwargs)
response.raise_for_status()
break
return response
except Exception as e:
if i + 1 < retries:
logging.info("Exception '%s' while getting, retry %i", e, i + 1)
time.sleep(sleep)

exc = e
else:
raise Exception(exc)

return response
raise exc


def get_gh_api(
url: str,
retries: int = DOWNLOAD_RETRIES_COUNT,
sleep: int = 3,
**kwargs: Any,
) -> requests.Response:
"""It's a wrapper around get_with_retries that requests GH api w/o auth by
default, and falls back to the get_best_robot_token in case of receiving
"403 rate limit exceeded" error
It sets auth automatically when ROBOT_TOKEN is already set by get_best_robot_token
"""

def set_auth_header():
if "headers" in kwargs:
if "Authorization" not in kwargs["headers"]:
kwargs["headers"]["Authorization"] = f"Bearer {get_best_robot_token()}"
else:
kwargs["headers"] = {"Authorization": f"Bearer {get_best_robot_token()}"}

if ROBOT_TOKEN is not None:
set_auth_header()

for _ in range(retries):
try:
response = get_with_retries(url, 1, sleep, **kwargs)
response.raise_for_status()
return response
except requests.HTTPError as exc:
if (
exc.response.status_code == 403
and b"rate limit exceeded"
in exc.response._content # pylint:disable=protected-access
):
logging.warning(
"Received rate limit exception, setting the auth header and retry"
)
set_auth_header()
break

return get_with_retries(url, retries, sleep, **kwargs)


def get_build_name_for_check(check_name: str) -> str:
Expand Down
4 changes: 2 additions & 2 deletions tests/ci/env_helper.py
@@ -1,7 +1,7 @@
import os
from os import path as p

from build_download_helper import get_with_retries
from build_download_helper import get_gh_api

module_dir = p.abspath(p.dirname(__file__))
git_root = p.abspath(p.join(module_dir, "..", ".."))
Expand Down Expand Up @@ -46,7 +46,7 @@ def GITHUB_JOB_ID() -> str:
jobs = []
page = 1
while not _GITHUB_JOB_ID:
response = get_with_retries(
response = get_gh_api(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
f"actions/runs/{GITHUB_RUN_ID}/jobs?per_page=100&page={page}"
)
Expand Down
24 changes: 16 additions & 8 deletions tests/ci/get_robot_token.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import logging
from dataclasses import dataclass
from typing import Optional

import boto3 # type: ignore
from github import Github
Expand All @@ -20,15 +21,20 @@ def get_parameter_from_ssm(name, decrypt=True, client=None):
return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]


ROBOT_TOKEN = None # type: Optional[Token]


def get_best_robot_token(token_prefix_env_name="github_robot_token_"):
global ROBOT_TOKEN
if ROBOT_TOKEN is not None:
return ROBOT_TOKEN.value
client = boto3.client("ssm", region_name="us-east-1")
parameters = client.describe_parameters(
ParameterFilters=[
{"Key": "Name", "Option": "BeginsWith", "Values": [token_prefix_env_name]}
]
)["Parameters"]
assert parameters
token = None

for token_name in [p["Name"] for p in parameters]:
value = get_parameter_from_ssm(token_name, True, client)
Expand All @@ -38,15 +44,17 @@ def get_best_robot_token(token_prefix_env_name="github_robot_token_"):
user = gh.get_user()
rest, _ = gh.rate_limiting
logging.info("Get token with %s remaining requests", rest)
if token is None:
token = Token(user, value, rest)
if ROBOT_TOKEN is None:
ROBOT_TOKEN = Token(user, value, rest)
continue
if token.rest < rest:
token.user, token.value, token.rest = user, value, rest
if ROBOT_TOKEN.rest < rest:
ROBOT_TOKEN.user, ROBOT_TOKEN.value, ROBOT_TOKEN.rest = user, value, rest

assert token
assert ROBOT_TOKEN
logging.info(
"User %s with %s remaining requests is used", token.user.login, token.rest
"User %s with %s remaining requests is used",
ROBOT_TOKEN.user.login,
ROBOT_TOKEN.rest,
)

return token.value
return ROBOT_TOKEN.value
12 changes: 6 additions & 6 deletions tests/ci/pr_info.py
Expand Up @@ -6,7 +6,7 @@

from unidiff import PatchSet # type: ignore

from build_download_helper import get_with_retries
from build_download_helper import get_gh_api
from env_helper import (
GITHUB_REPOSITORY,
GITHUB_SERVER_URL,
Expand Down Expand Up @@ -45,7 +45,7 @@ def get_pr_for_commit(sha, ref):
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{sha}/pulls"
)
try:
response = get_with_retries(try_get_pr_url, sleep=RETRY_SLEEP)
response = get_gh_api(try_get_pr_url, sleep=RETRY_SLEEP)
data = response.json()
our_prs = [] # type: List[Dict]
if len(data) > 1:
Expand Down Expand Up @@ -105,7 +105,7 @@ def __init__(
# workflow completed event, used for PRs only
if "action" in github_event and github_event["action"] == "completed":
self.sha = github_event["workflow_run"]["head_sha"]
prs_for_sha = get_with_retries(
prs_for_sha = get_gh_api(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{self.sha}"
"/pulls",
sleep=RETRY_SLEEP,
Expand All @@ -117,7 +117,7 @@ def __init__(
self.number = github_event["pull_request"]["number"]
if pr_event_from_api:
try:
response = get_with_retries(
response = get_gh_api(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}"
f"/pulls/{self.number}",
sleep=RETRY_SLEEP,
Expand Down Expand Up @@ -159,7 +159,7 @@ def __init__(
self.user_login = github_event["pull_request"]["user"]["login"]
self.user_orgs = set([])
if need_orgs:
user_orgs_response = get_with_retries(
user_orgs_response = get_gh_api(
github_event["pull_request"]["user"]["organizations_url"],
sleep=RETRY_SLEEP,
)
Expand Down Expand Up @@ -255,7 +255,7 @@ def fetch_changed_files(self):
raise TypeError("The event does not have diff URLs")

for diff_url in self.diff_urls:
response = get_with_retries(
response = get_gh_api(
diff_url,
sleep=RETRY_SLEEP,
)
Expand Down