# GitHub Client Implementation

Required imports

In [9]:
import abc
from typing import Optional, Any, Dict, Union, List, Iterator, Tuple, ItemsView
from types import TracebackType
from typing_extensions import Self
import urllib
import json
import requests
from requests import Response
from requests.utils import get_encoding_from_headers
from requests.models import CaseInsensitiveDict
import urllib3
from urllib3.util import Retry
from urllib3.connectionpool import ConnectionPool
from urllib3.exceptions import MaxRetryError
from urllib3.response import HTTPResponse
import io
import time
from datetime import datetime, timezone

import logging
logger = logging.getLogger('my_logger')

logging.basicConfig()

Defining required constants

In [51]:
Consts = {
    'DEFAULT_BASE_URL': 'https://api.github.com',
    'DEFAULT_TIMEOUT': 15,
    'DEFAULT_USER_AGENT': 'Github API client by Github:@ibraym',
    'DEFAULT_PER_PAGE': 30,
    'DEFAULT_SECONDS_BETWEEN_REQUESTS': 1,
    'DEFAULT_SECONDARY_RATE_WAIT': 60,
    'headerRateRemaining': 'x-ratelimit-remaining',
    'headerRateLimit': 'x-ratelimit-limit',
    'headerRateReset': "X-RateLimit-Reset",
    'headerRawJSON': 'application/vnd.github.raw+json',
    'headerHtmlJSON': 'application/vnd.github.html+json',
    'headerObjectJSON': 'application/vnd.github.object+json',
}


Mimic the httplib connection and response objects by defining two class:
   -  `RequestsResponse` class: it handles the response object to extract the status, headers and text fields. Also, it provides two methods to get the headers and read the response text.
   - `HTTPSRequestsConnectionClass` class: It is a wrapper around the requests library in Python, designed to mimic the behavior of an httplib connection object. This class offers features like retries, connection pooling, and HTTPS connection management. It providers three methods:
      - `request` method: It prepares a request with the specified HTTP method (verb), URL (url), body (input), and headers (headers).
      - `getresponse` method: It executes the previously prepared HTTP request using the session object.
      - `close` method: It closes the session, releasing any resources (e.g., connections in the pool).

In [11]:
class RequestsResponse:
    """
    A wrapper for `requests.Response` to mimic the `httplib` response object.

    Attributes:
        status (int): The HTTP status code of the response.
        headers (requests.structures.CaseInsensitiveDict): The headers of the response.
        text (str): The text content of the response.
    """
    def __init__(self, r: requests.Response):
        """
        Initializes the RequestsResponse object with the provided requests.Response.

        Args:
            r (requests.Response): The response object to wrap.
        """
        self.status = r.status_code
        self.headers = r.headers
        self.text = r.text

    def getheaders(self) -> ItemsView[str, str]:
        """
        Returns the headers of the response as an ItemsView.

        Returns:
            ItemsView[str, str]: The headers of the response.
        """
        return self.headers.items()

    def read(self) -> str:
        """
        Returns the text content of the response.

        Returns:
            str: The response body.
        """
        return self.text

def noopAuth(request: requests.models.PreparedRequest) -> requests.models.PreparedRequest:
    """
    A no-operation authentication handler for requests.

    Args:
        request (requests.models.PreparedRequest): The request object.

    Returns:
        requests.models.PreparedRequest: The unchanged request object.
    """
    return request

class HTTPSRequestsConnectionClass:
    retry: Union[int, Retry]

    """
    Mimics an `httplib` connection object using the `requests` library.

    Attributes:
        host (str): The target host for the connection.
        port (int): The port number for the connection (default is 443 for HTTPS).
        protocol (str): The protocol used, fixed to "https".
        timeout (Optional[int]): The timeout for requests, if any.
        verify (bool): Whether to verify SSL certificates (default is True).
        session (requests.Session): The requests session object used for connections.
        retry (Union[int, Retry]): The retry configuration for the HTTPAdapter.
        pool_size (int): The maximum number of connections for the pool.
    """
    def __init__(
        self,
        host: str,
        port: Optional[int] = None,
        strict: bool = False,
        timeout: Optional[int] = None,
        retry: Optional[Union[int, Retry]] = None,
        pool_size: Optional[int] = None,
        **kwargs: Any,
    ) -> None:
        """
        Initializes the HTTPSRequestsConnectionClass with the given configuration.

        Args:
            host (str): The target host for the connection.
            port (Optional[int]): The port for the connection. Defaults to 443.
            strict (bool): Unused parameter included for API compatibility.
            timeout (Optional[int]): The request timeout in seconds.
            retry (Optional[Union[int, Retry]]): Retry configuration or number of retries.
            pool_size (Optional[int]): The maximum size of the connection pool.
            **kwargs (Any): Additional arguments, such as SSL verification flags.
        """
        self.port = port if port else 443
        self.host = host
        self.protocol = "https"
        self.timeout = timeout
        self.verify = kwargs.get("verify", True)
        self.session = requests.Session()

        self.session.auth = noopAuth

        if retry is None:
            self.retry = requests.adapters.DEFAULT_RETRIES
        else:
            self.retry = retry

        if pool_size is None:
            self.pool_size = requests.adapters.DEFAULT_POOLSIZE
        else:
            self.pool_size = pool_size

        self.adapter = requests.adapters.HTTPAdapter(
            max_retries=self.retry,
            pool_connections=self.pool_size,
            pool_maxsize=self.pool_size,
        )
        self.session.mount("https://", self.adapter)

    def request(
        self,
        verb: str,
        url: str,
        input: Optional[Union[str, io.BufferedReader]],
        headers: Dict[str, str],
    ) -> None:
        """
        Prepares a request to be executed.

        Args:
            verb (str): The HTTP method (e.g., "GET", "POST").
            url (str): The URL path for the request.
            input (Optional[Union[str, io.BufferedReader]]): The request body, if any.
            headers (Dict[str, str]): The headers for the request.
        """
        self.verb = verb
        self.url = url
        self.input = input
        self.headers = headers

    def getresponse(self) -> RequestsResponse:
        """
        Executes the prepared request and returns the response.

        Returns:
            RequestsResponse: The wrapped response object.
        """
        verb = getattr(self.session, self.verb.lower())
        url = f"{self.protocol}://{self.host}:{self.port}{self.url}"
        r = verb(
            url,
            headers=self.headers,
            data=self.input,
            timeout=self.timeout,
            verify=self.verify,
            allow_redirects=False,
        )
        return RequestsResponse(r)

    def close(self) -> None:
        """
        Closes the session and cleans up resources.
        """
        self.session.close()

To handle authentication for HTTP requests. We use an abstract base class `Auth` to standardize the structure of authentication mechanisms, and a concrete implementation `Token` to provide authentication using a single, constant token.

In [12]:
class Auth(abc.ABC):
    """
    Base class of all authentication methods.
    """
    @property
    @abc.abstractmethod
    def token_type(self) -> str:
        """
        The type of the auth token, e.g. Bearer or Basic.

        :return: token type

        """
    @property
    @abc.abstractmethod
    def token(self) -> str:
        """
        The auth token as used in the HTTP Authorization header.

        :return: token

        """
    def authentication(self, headers: dict) -> None:
        """
        Add authorization to the headers.
        """
        headers["Authorization"] = f"{self.token_type} {self.token}"


class Token(Auth):
    """
    This class is used to authenticate with a single constant token.
    """

    def __init__(self, token: str):
        assert isinstance(token, str)
        assert len(token) > 0
        self._token = token

    @property
    def token_type(self) -> str:
        return "token"

    @property
    def token(self) -> str:
        return self._token

GithubRetry class is a Github-specific implementation of `urllib3.Retry`.
Github requests are retry-able when the response provides a `"Retry-After"` header, or the content indicates a rate limit error.

The class includes methods to distinguish between different types of rate-limit errors:
   - `isRateLimitError`: Combines checks for both primary and secondary rate-limit errors.
   - `isPrimaryRateLimitError`: Detects "API rate limit exceeded" errors in the response message.
   - `isSecondaryRateLimitError`: Detects other messages associated with secondary rate limits, such as "please retry your request again later."

Retry Logic: The `increment` method is overridden to handle retries when specific rate-limit conditions are encountered.
   - Enhanced Error Handling:
      - If the response status is 403, the method inspects the response for rate-limit messages or Retry-After headers.
      - When a rate-limit error is detected:
         - Primary Rate Limit: Uses the `X-RateLimit-Reset` header to determine how long to back off.
         - Secondary Rate Limit: Uses `secondary_rate_wait` for backoff time.
      - The calculated backoff is set for the next retry attempt using a custom `get_backoff_time`.
   - Fallbacks: If no retry conditions are met, the method either raises an appropriate exception or proceeds with standard retry logic.

In [35]:
class GithubRetry(Retry):

    __datetime = datetime

    def __init__(self, secondary_rate_wait: float = Consts['DEFAULT_SECONDARY_RATE_WAIT'], **kwargs: Any) -> None:
        """
        :param secondary_rate_wait: seconds to wait before retrying secondary rate limit errors
        :param kwargs: see urllib3.Retry for more arguments
        """
        self.secondary_rate_wait = secondary_rate_wait
        kwargs["status_forcelist"] = kwargs.get("status_forcelist", list(range(500, 600))) + [403]
        kwargs["allowed_methods"] = kwargs.get("allowed_methods", Retry.DEFAULT_ALLOWED_METHODS.union({"GET", "POST"}))
        super().__init__(**kwargs)

    def new(self, **kw: Any) -> Self:
        kw.update(dict(secondary_rate_wait=self.secondary_rate_wait))
        return super().new(**kw)

    def isRateLimitError(self, message: str) -> bool:
        return self.isPrimaryRateLimitError(message) or self.isSecondaryRateLimitError(message)

    def isPrimaryRateLimitError(self, message: str) -> bool:
        if not message:
            return False

        message = message.lower()
        return message.startswith("api rate limit exceeded")

    def isSecondaryRateLimitError(self, message: str) -> bool:
        if not message:
            return False

        message = message.lower()
        return (
            message.startswith("you have exceeded a secondary rate limit")
            or message.endswith("please retry your request again later.")
            or message.endswith("please wait a few minutes before you try again.")
        )

    def increment(
        self,
        method: Optional[str] = None,
        url: Optional[str] = None,
        response: Optional[HTTPResponse] = None,
        error: Optional[Exception] = None,
        _pool: Optional[ConnectionPool] = None,
        _stacktrace: Optional[TracebackType] = None,
    ) -> Retry:
        if response:
            if response.status == 403:
                if "Retry-After" in response.headers:
                    # Sleeping 'Retry-After' seconds is implemented in urllib3.Retry.sleep() and called by urllib3
                    logger.info(f'Retrying after {response.headers.get("Retry-After")} seconds')
                else:
                    content = response.reason
                    # to identify retry-able methods, we inspect the response body
                    try:
                        content = self.get_content(response, url)  # type: ignore
                        content = json.loads(content)  # type: ignore
                        message = content.get("message")  # type: ignore
                    except Exception as e:
                        raise RuntimeError("Failed to inspect response message") from e

                    try:
                        if self.isRateLimitError(message):
                            rate_type = "primary" if self.isPrimaryRateLimitError(message) else "secondary"

                            # check early that we are retrying at all
                            retry = super().increment(method, url, response, error, _pool, _stacktrace)

                            # we backoff primary rate limit at least until X-RateLimit-Reset,
                            # we backoff secondary rate limit at for secondary_rate_wait seconds
                            backoff = 0.0

                            if self.isPrimaryRateLimitError(message):
                                if Consts['headerRateReset'] in response.headers:
                                    value = response.headers.get(Consts['headerRateReset'])
                                    if value and value.isdigit():
                                        reset = self.__datetime.fromtimestamp(int(value), timezone.utc)
                                        delta = reset - self.__datetime.now(timezone.utc)
                                        resetBackoff = delta.total_seconds()

                                        if resetBackoff > 0:
                                            logger.debug(f"Reset occurs in {str(delta)} ({value} / {reset})")

                                        # plus 1s as it is not clear when in that second the reset occurs
                                        backoff = resetBackoff + 1
                            else:
                                backoff = self.secondary_rate_wait

                            # we backoff at least retry's next backoff
                            retry_backoff = retry.get_backoff_time()
                            if retry_backoff > backoff:
                                if backoff > 0:
                                    logger.debug(
                                        f"Retry backoff of {retry_backoff}s exceeds "
                                        f"required rate limit backoff of {backoff}s".replace(".0s", "s"),
                                    )
                                backoff = retry_backoff

                            def get_backoff_time() -> float:
                                return backoff

                            logger.info(
                                f"Setting next backoff to {backoff}s".replace(".0s", "s"),
                            )
                            retry.get_backoff_time = get_backoff_time  # type: ignore
                            return retry

                        logger.debug(
                            "Response message does not indicate retry-able error",
                        )
                        raise Exception(f'{response.status} {content}')  # type: ignore
                    except MaxRetryError:
                        raise
                    except Exception as e:
                        raise RuntimeError("Failed to determine retry backoff") from e

                    raise Exception(f'{response.status} {content}')

        # retry the request as usual
        return super().increment(method, url, response, error, _pool, _stacktrace)

    @staticmethod
    def get_content(resp: HTTPResponse, url: str) -> bytes:
        # logic taken from HTTPAdapter.build_response (requests.adapters)
        response = Response()

        # Fallback to None if there's no status_code, for whatever reason.
        response.status_code = getattr(resp, "status", None)  # type: ignore

        # Make headers case-insensitive.
        response.headers = CaseInsensitiveDict(getattr(resp, "headers", {}))

        # Set encoding.
        response.encoding = get_encoding_from_headers(response.headers)
        response.raw = resp
        response.reason = response.raw.reason  # type: ignore

        response.url = url

        return response.content

Utility functions to manipulate URLs and parse HTTP headers, particularly for adding query parameters to URLs and extracting useful links from HTTP response headers.

In [43]:
def add_parameters_to_url(
    url: str,
    parameters: Dict[str, Any],
) -> str:
    """
    Add query parameters to a given URL.

    :param url: The base URL to which parameters will be added.
    :param parameters: A dictionary of parameters to add to the URL.
                       Existing parameters in the URL will be overwritten if they have the same keys.
    :return: The URL with the updated query parameters.
    """
    scheme, netloc, url, params, query, fragment = urllib.parse.urlparse(url)
    url_params = urllib.parse.parse_qs(query)
    # union parameters in url with given parameters, the latter have precedence
    url_params.update(**{k: v if isinstance(v, list) else [v] for k, v in parameters.items()})
    parameter_list = [(key, value) for key, values in url_params.items() for value in values]
    # remove query from url
    url = urllib.parse.urlunparse((scheme, netloc, url, params, "", fragment))

    if len(parameter_list) == 0:
        return url
    else:
        return f"{url}?{urllib.parse.urlencode(parameter_list)}"

def parseLinkHeader(headers: Dict[str, Union[str, int]]) -> Dict[str, str]:
    """
    Parse the `Link` header from HTTP response headers.

    :param headers: A dictionary containing HTTP response headers.
    :return: A dictionary mapping relation types (e.g., "next", "prev") to URLs.
    """
    links = {}
    if "link" in headers and isinstance(headers["link"], str):
        linkHeaders = headers["link"].split(", ")
        for linkHeader in linkHeaders:
            url, rel, *rest = linkHeader.split("; ")
            url = url[1:-1]
            rel = rel[5:-1]
            links[rel] = url
    return links

def is_iso_format(date_string):
    # Attempt to parse the string using the ISO 8601 format
    try:
        datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%SZ")
        return True
    except ValueError:
        return False

The Github class provides a Python client to interact with the GitHub API v3, enabling authenticated access to endpoints, handling paginated responses, and enforcing API rate limits. It simplifies making API requests and managing authentication, retries, and response parsing.

## Documentation for the `Github` Class

The `Github` class provides a Python client to interact with the GitHub API v3, enabling authenticated access to endpoints, handling paginated responses, and enforcing API rate limits. It simplifies making API requests and managing authentication, retries, and response parsing.

---

### **Constructor: `__init__`**
Initializes a `Github` instance.

#### Parameters:
- **auth** (`Auth`): An `Auth` instance for API authentication.
- **base_url** (`str`, optional): Base URL for the GitHub API. Default: `Consts['DEFAULT_BASE_URL']`.
- **timeout** (`int`, optional): Timeout for API requests in seconds. Default: `Consts['DEFAULT_TIMEOUT']`.
- **user_agent** (`str`, optional): User-agent string to identify the client. Default: `Consts['DEFAULT_USER_AGENT']`.
- **per_page** (`int`, optional): Number of items per page for paginated responses. Default: `Consts['DEFAULT_PER_PAGE']`.
- **verify** (`bool | str`, optional): SSL verification. Can be `True`, `False`, or a CA_BUNDLE file path.
- **retry** (`int | Retry | None`, optional): Retry strategy for failed requests. Default: `default_retry`.
- **seconds_between_requests** (`float | None`, optional): Delay between consecutive requests to prevent rate-limiting. Default: `Consts['DEFAULT_SECONDS_BETWEEN_REQUESTS']`.

---

### **Methods**

#### **Private Methods**

- **`__getConnection()`**
  - Configures and returns an HTTPS connection object if not already initialized.
  - Ensures proper retries, timeout, and SSL verification settings.

- **`__deferRequest()`**
  - Implements delay logic to enforce the minimum time interval between consecutive API requests.

- **`__send_request(method, url, headers, input=None)`**
  - Sends an HTTP request to the API.
  - **Parameters**:
    - `method` (`str`): HTTP method (e.g., "GET", "POST").
    - `url` (`str`): Target URL.
    - `headers` (`Dict[str, str]`): HTTP headers.
    - `input` (`Optional[Any]`): Request body or payload.
  - **Returns**: A tuple of HTTP status, response headers, and response content.

- **`__makeAbsoluteUrl(url)`**
  - Converts a relative URL to an absolute URL based on the base URL.

- **`__check_response(status, responseHeaders, output)`**
  - Validates the HTTP response and decodes JSON data if applicable.
  - Raises an exception for errors (status codes >= 400).

- **`__get(url, parameters=None, headers=None)`**
  - Sends a `GET` request to the API.
  - **Parameters**:
    - `url` (`str`): Target URL.
    - `parameters` (`Optional[Dict[str, Any]]`): Query parameters.
    - `headers` (`Optional[Dict[str, str]]`): HTTP headers.
  - **Returns**: A tuple of response headers and decoded response data.

#### **Pagination Methods**

- **`_getPage(data, headers)`**
  - Extracts items from a paginated API response.
  - Updates the `__nextUrl` for subsequent pagination requests.
  - **Parameters**:
    - `data` (`Any`): Response data.
    - `headers` (`Optional[Dict[str, Union[str, int]]]`): HTTP headers containing pagination links.
  - **Returns**: A list of items.

- **`paginator(url, params=None, headers=None)`**
  - Provides an iterator for paginated API results.
  - **Parameters**:
    - `url` (`str`): API endpoint URL.
    - `params` (`Optional[Dict[str, Any]]`): Query parameters.
    - `headers` (`Optional[Dict[str, Union[str, int]]]`): HTTP headers.
  - **Yields**: Items from all paginated responses.

#### **Public Methods**

- **`close()`**
  - Closes the connection to the API server.

- **`search_repositories(query, sort=None, order=None, **qualifiers)`**
  - Searches repositories using the GitHub API.
  - **Parameters**:
    - `query` (`str`): Search keywords.
    - `sort` (`Optional[str]`): Sorting criteria (`'stars'`, `'forks'`, `'updated'`).
    - `order` (`Optional[str]`): Sort order (`'asc'`, `'desc'`).
    - `qualifiers` (`**kwargs`): Additional search qualifiers (e.g., language, topic).
  - **Returns**: An iterator for repository search results.

- **`commits(owner: str, repo: str, sha: Optional[str] = None, path: Optional[str] = None, author: Optional[str] = None, committer: Optional[str] = None, since: Optional[str] = None, until: Optional[str] = None)`**
  - Retrieve a list of commits for a repository.
  - **Parameters**:
    - `owner`: GitHub username or organization owning the repository..
    - `repo`: Repository name..
    - `sha` (optional): Branch/commit SHA to start listing commits from.
    - `path` (optional): Restrict results to commits affecting the specified file/directory.
    - `author` (optional): Filter by author (username or email).
    - `committer` (optional): Filter by committer (username or email).
    - `since` (optional): ISO 8601 string to filter commits after a date.
    - `until` (optional): ISO 8601 string to filter commits before a date.
  - **Returns**: Iterator over commit objects.

- **`contents(owner: str, repo: str, path: str, ref: Optional[str] = None, content_type: Optional[str] = None)`**
  - Retrieve the content of a file or directory in a repository..
  - **Parameters**:
    - `owner`: GitHub username or organization owning the repository..
    - `repo`: Repository name..
    - `path`: File/directory path within the repository.
    - `ref` (optional): Commit/branch/tag name (default: repo's default branch).
    - `content_type` (optional): Desired response format ('raw', 'html', 'object').
  - **Returns**: Iterator over dictionaries representing the file or directory contents, or raw content as a string.

---

### **Key Features**
- **Authentication**: Integrates with the `Auth` class to handle token-based authentication.
- **Rate Limit Management**: Tracks and enforces API rate limits to avoid throttling.
- **Retry Strategy**: Handles retries for network errors and rate-limit responses.
- **Pagination**: Simplifies handling of paginated API responses.
- **Extensibility**: Designed for integration with additional API endpoints and functionalities.

In [181]:
default_retry = GithubRetry()

class Github:
    """
    The main class to access the GitHub API v3.
    Provides methods for performing authenticated API requests and managing paginated responses.
    """

    def __init__(
        self,
        auth: Auth,
        base_url: str = Consts['DEFAULT_BASE_URL'],
        timeout: int = Consts['DEFAULT_TIMEOUT'],
        user_agent: str = Consts['DEFAULT_USER_AGENT'],
        per_page: int = Consts['DEFAULT_PER_PAGE'],
        verify: bool | str = True,
        retry: int | Retry | None = default_retry,
        seconds_between_requests: float | None = Consts['DEFAULT_SECONDS_BETWEEN_REQUESTS'],
    )-> None:
        """
        Initialize the GitHub API client.

        :param auth: An instance of the `Auth` class for authentication.
        :param base_url: Base URL for GitHub API (defaults to `Consts['DEFAULT_BASE_URL']`).
        :param timeout: Timeout for API requests in seconds.
        :param user_agent: User agent string for the client.
        :param per_page: Number of items per page for paginated responses.
        :param verify: SSL verification (can be `True`, `False`, or a path to a CA_BUNDLE file).
        :param retry: Retry configuration, either an integer or a `Retry` object.
        :param seconds_between_requests: Minimum delay between requests to avoid rate-limiting.
        """
        assert isinstance(auth, Auth), auth
        assert isinstance(timeout, int), timeout
        assert user_agent is None or isinstance(user_agent, str), user_agent
        assert isinstance(per_page, int), per_page
        assert isinstance(verify, (bool, str)), verify
        assert retry is None or isinstance(retry, int) or isinstance(retry, urllib3.util.Retry), retry
        assert seconds_between_requests is None or seconds_between_requests >= 0

        self.__auth = auth
        self.__base_url = base_url

        o = urllib.parse.urlparse(base_url)
        assert o.scheme == 'https'
        self.__hostname = o.hostname
        self.__port = o.port
        self.__prefix = o.path

        self.__timeout = timeout
        self.__retry = retry
        self.__seconds_between_requests = seconds_between_requests
        self.__connection = None

        self.rate_limiting = (-1, -1)
        self.rate_limiting_resettime = 0
        self.per_page = per_page

        assert user_agent is not None # github now requires a user-agent.
        self.__userAgent = user_agent
        self.__verify = verify
        self.__last_requests: Dict[str, float] = dict()

    def __getConnection(self):
        """
        Create and configure the HTTP connection object if it does not already exist.

        :return: Configured HTTPS connection object.
        """
        if self.__connection is not None:
            return self.__connection

        return HTTPSRequestsConnectionClass(
            self.__hostname,
            self.__port,
            retry=self.__retry,
            timeout=self.__timeout,
            verify=self.__verify,
        )

    def __deferRequest(self) -> None:
        """
        Enforce a delay between consecutive requests to respect the API's rate limits.
        """
        requests = self.__last_requests.values()

        last_request = max(requests) if requests else 0

        next_request = (last_request + self.__seconds_between_requests) if self.__seconds_between_requests else 0

        defer = max(next_request - datetime.now(timezone.utc).timestamp(), 0)
        time.sleep(defer)

    def __send_request(
        self,
        method: str,
        url: str,
        headers: Dict[str, str],
        input: Optional[Any] = None,
    ):
        """
        Send an HTTP request using the configured connection.

        :param method: HTTP method (e.g., "GET", "POST").
        :param url: The target URL for the request.
        :param headers: Dictionary of HTTP headers for the request.
        :param input: Optional payload or body for the request.
        :return: Tuple containing status, response headers, and response content.
        """
        self.__deferRequest()

        try:
            self.__connection = self.__getConnection()

            self.__connection.request(method, url, input, headers)
            response = self.__connection.getresponse()

            status = response.status
            responseHeaders = {k.lower(): v for k, v in response.getheaders()}
            output = response.read()

            return status, responseHeaders, output
        finally:
            self.__last_requests[method] = datetime.now(timezone.utc).timestamp()

    def __makeAbsoluteUrl(self, url: str) -> str:
        """
        Convert a relative URL to an absolute URL based on the base URL.

        :param url: Relative or absolute URL.
        :return: Fully qualified absolute URL.
        """
        if url.startswith("/"):
            url = f"{self.__prefix}{url}"
        else:
            o = urllib.parse.urlparse(url)
            assert o.hostname in [
                self.__hostname,
                "uploads.github.com",
                "status.github.com",
                "github.com",
            ], o.hostname
            assert o.path.startswith((self.__prefix, "/api/", "/login/oauth")), o.path
            assert o.port == self.__port, o.port
            url = o.path
            if o.query != "":
                url += f"?{o.query}"
        return url

    def __check_response(
        self,
        status: int,
        responseHeaders: Dict[str, Any],
        output: str,
    ) -> Tuple[Dict[str, Any], Any]:
        """
        Check the API response for errors and decode the response content.

        :param status: HTTP status code.
        :param responseHeaders: Dictionary of HTTP response headers.
        :param output: Raw response content.
        :return: Decoded response data (JSON if applicable).
        """
        data = output
        is_JSON = False
        if isinstance(output, bytes):
            data = output.decode('utf-8')
        if status >= 400:
            raise Exception(f'{status} {data}')
        if 'content-type' in responseHeaders:
            if Consts['headerRawJSON'] in responseHeaders['content-type'] or \
               Consts['headerHtmlJSON'] in responseHeaders['content-type']:
                return responseHeaders, data
        if len(data) == 0:
            return None
        else:
            try:
                data = json.loads(data)
            except ValueError:
                raise
        return responseHeaders, data


    def __get(self,
        url: str,
        parameters: Optional[Dict[str, Any]] = None,
        headers: Optional[Dict[str, str]] = None,
    ) -> Tuple[Dict[str, Any], Any]:
        """
        Perform a GET request to the GitHub API.

        :param url: Target URL for the request.
        :param parameters: Optional query parameters for the request.
        :param headers: Optional HTTP headers for the request.
        :return: Tuple containing response headers and data.
        """
        if parameters is None:
            parameters = {}
        if headers is None:
            headers = {}
        if self.__auth is not None:
            self.__auth.authentication(headers)
        headers['User-Agent'] = self.__userAgent

        url = self.__makeAbsoluteUrl(url)
        url = add_parameters_to_url(url, parameters)

        status, responseHeaders, output = self.__send_request('get', url, headers)

        if Consts['headerRateRemaining'] in responseHeaders and Consts['headerRateLimit'] in responseHeaders:
            self.rate_limiting = (
                int(float(responseHeaders[Consts['headerRateRemaining']])),
                int(float(responseHeaders[Consts['headerRateLimit']])),
            )
        if Consts['headerRateReset'] in responseHeaders:
            self.rate_limiting_resettime = int(float(responseHeaders[Consts['headerRateReset']]))

        return self.__check_response(status, responseHeaders, output)

    def paginator(self,
            url: str,
            params: Optional[Dict[str, Any]] = None,
            headers: Optional[Dict[str, Union[str, int]]] = None) -> Iterator[Dict] | str:
        """
        Create a generator to iterate over paginated results.

        :param url: API endpoint URL.
        :param params: Query parameters for the request.
        :param headers: HTTP headers for the request.
        :return: Iterator yielding items from all pages.
        """
        nextParams: Dict[str, Any] = params or {}
        nextUrl = url
        if self.per_page != 30:
            nextParams['per_page'] = self.per_page
        while nextUrl is not None:
            headers, data = self.__get(nextUrl, nextParams, headers)
            if 'content-type' in headers:
                if Consts['headerRawJSON'] in headers['content-type'] or \
                    Consts['headerHtmlJSON'] in headers['content-type']:
                    content = data
                    yield content
            data = data if data else []
            nextUrl = None
            if len(data) > 0:
                links = parseLinkHeader(headers)
                if "next" in links:
                    nextUrl = links["next"]
            nextParams = {}
            if 'items' in data:
                totalCount = data.get('total_count')
                data = data['items']
            content = [
                element
                for element in data
                if element is not None
            ]
            yield from content


    def close(self) -> None:
        """
        Close the API client's connection to the server.
        """
        self.__connection.close()

    def search_repositories(
        self,
        query: str,
        sort: Optional[str] = None,
        order: Optional[str] = None,
        qualifiers: Optional[Dict] = None,
    ) -> Iterator[Dict]:
        """
        :calls: `GET /search/repositories <https://docs.github.com/en/rest/reference/search>`
        :param query: string
        :param sort: string ('stars', 'forks', 'updated')
        :param order: string ('asc', 'desc')
        :param qualifiers: dict query qualifiers
        """
        assert isinstance(query, str), query
        url_parameters = dict()
        if sort is not None:
            assert sort in ("stars", "forks", "updated"), sort
            url_parameters["sort"] = sort
        if order is not None:
            assert order in ("asc", "desc"), order
            url_parameters["order"] = order

        query_chunks = []
        if query:
            query_chunks.append(query)

        for qualifier, value in qualifiers.items():
            query_chunks.append(f"{qualifier}:{value}")

        url_parameters["q"] = " ".join(query_chunks)
        assert url_parameters["q"], "need at least one qualifier"

        return self.paginator(
            "/search/repositories",
            url_parameters,
        )

    def commits(
        self,
        owner: str,
        repo: str,
        sha: Optional[str] = None,
        path: Optional[str] = None,
        author: Optional[str] = None,
        committer: Optional[str] = None,
        since: Optional[str] = None,
        until: Optional[str] = None,
    ) -> Iterator[Dict]:
        """
        Retrieve a list of commits for a repository.

        :param owner: The GitHub username or organization that owns the repository.
        :param repo: The name of the repository.
        :param sha: Optional. The SHA or branch to start listing commits from.
        :param path: Optional. Restrict results to commits that affect the specified file or directory path.
        :param author: Optional. Filter commits by a specific author, using their GitHub username or email address.
        :param committer: Optional. Filter commits by a specific committer, using their GitHub username or email address.
        :param since: Optional. ISO 8601 date string to filter commits after the specified date.
        :param until: Optional. ISO 8601 date string to filter commits before the specified date.
        :return: An iterator over dictionaries, where each dictionary represents a commit object.

        **Example Usage:**

        ```python
        commits = github.commits(
            owner="octocat",
            repo="Hello-World",
            sha="main",
            since="2023-01-01T00:00:00Z",
            until="2023-12-31T23:59:59Z",
        )
        for commit in commits:
            print(commit["sha"])
        ```

        **Filters:**
        - `sha`: Retrieves commits starting from a specific branch or commit.
        - `path`: Limits the results to a specific file or directory.
        - `author` / `committer`: Filters results based on the author's or committer's identity.
        - `since` / `until`: Limits the results to a specific time range using ISO 8601 date strings (e.g., `"2023-01-01T00:00:00Z"`).

        **Notes:**
        - Ensure `since` and `until` are valid ISO 8601 date strings.
        - The method returns an iterator, so it efficiently handles paginated responses from the GitHub API.
        """
        assert isinstance(owner, str), owner
        assert isinstance(repo, str), repo
        url_parameters = dict()
        if sha is not None:
            assert isinstance(sha, str), sha
            url_parameters["sha"] = sha
        if path is not None:
            assert isinstance(path, str), path
            url_parameters["path"] = path
        if author is not None:
            assert isinstance(author, str), author
            url_parameters["author"] = author
        if committer is not None:
            assert isinstance(committer, str), committer
            url_parameters["committer"] = committer
        if since is not None:
            assert isinstance(since, str) and is_iso_format(since), since
            url_parameters["since"] = since
        if until is not None:
            assert isinstance(until, str) and is_iso_format(until), until
            url_parameters["until"] = until
        return self.paginator(
            f"/repos/{owner}/{repo}/commits",
            url_parameters,
        )

    def contents(
        self,
        owner: str,
        repo: str,
        path: str,
        ref: Optional[str] = None,
        content_type: Optional[str] = None,
    ) -> Iterator[Dict] | str:
        """
        Retrieve the content of a file or directory in a repository.

        :param owner: The GitHub username or organization that owns the repository.
        :param repo: The name of the repository.
        :param path: The path to the file or directory within the repository.
        :param ref: Optional. The name of the commit/branch/tag. Defaults to the repository's default branch (usually `main`).
        :param content_type: Optional. Specifies the format of the returned content.
                            Must be one of `'raw'`, `'html'`, or `'object'`.
                            - `'raw'`: Returns raw content.
                            - `'html'`: Returns content rendered as HTML.
                            - `'object'`: Returns a JSON representation of the object.
        :return: An iterator over dictionaries, where each dictionary represents the content of the specified path.

        **Notes:**
        - The `path` parameter can refer to either a file or a directory.
        - If `ref` is not provided, the method retrieves content from the repository's default branch.
        - Use `content_type` to control how the content is returned (raw bytes, HTML, or JSON object).
        - This method supports paginated responses for directories containing multiple items.

        **Headers:**
        - If `content_type` is specified, custom `Accept` headers are added to define the desired response format.
        - For `'raw'`, the header `application/vnd.github.raw` is used.
        - For `'html'`, the header `application/vnd.github.html` is used.
        - For `'object'`, the header `application/vnd.github.object` is used.
        """
        assert isinstance(owner, str), owner
        assert isinstance(repo, str), repo
        assert isinstance(path, str), path
        url_parameters = dict()
        if ref is not None:
            assert isinstance(ref, str), ref
            url_parameters["ref"] = ref
        headers: Optional[Dict[str, str]] = None
        if content_type is not None:
            assert content_type in ['raw', 'html', 'object'], content_type
            if content_type == 'raw':
                headers = {'Accept': Consts['headerRawJSON']}
            if content_type == 'html':
                headers = {'Accept': Consts['headerHtmlJSON']}
            if content_type == 'object':
                headers = {'Accept': Consts['headerObjectJSON']}

        return self.paginator(
            f"/repos/{owner}/{repo}/contents/{path}",
            url_parameters,
            headers=headers,
        )

# Data extraction examples:

### Adding an access token

In [182]:
# using an access token
ACCESS_TOKEN = 'YOUR_API_KEY_HERE'
auth = Token(ACCESS_TOKEN)

### Defining the client

In [183]:
# First create a Github instance:
g = Github(auth=auth)

### Search repositories

In [184]:
# We will search for repositories that contains `OpenAI` in its readme file and has more than 10000 followers
repos = g.search_repositories(query='OpenAI', qualifiers={'in':'readme', 'followers': '>=10000'})
for repo in repos:
    print(repo)

{'id': 599394820, 'node_id': 'R_kgDOI7oKBA', 'name': 'chatgpt-web', 'full_name': 'Chanzhaoyu/chatgpt-web', 'private': False, 'owner': {'login': 'Chanzhaoyu', 'id': 24789441, 'node_id': 'MDQ6VXNlcjI0Nzg5NDQx', 'avatar_url': 'https://avatars.githubusercontent.com/u/24789441?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/Chanzhaoyu', 'html_url': 'https://github.com/Chanzhaoyu', 'followers_url': 'https://api.github.com/users/Chanzhaoyu/followers', 'following_url': 'https://api.github.com/users/Chanzhaoyu/following{/other_user}', 'gists_url': 'https://api.github.com/users/Chanzhaoyu/gists{/gist_id}', 'starred_url': 'https://api.github.com/users/Chanzhaoyu/starred{/owner}{/repo}', 'subscriptions_url': 'https://api.github.com/users/Chanzhaoyu/subscriptions', 'organizations_url': 'https://api.github.com/users/Chanzhaoyu/orgs', 'repos_url': 'https://api.github.com/users/Chanzhaoyu/repos', 'events_url': 'https://api.github.com/users/Chanzhaoyu/events{/privacy}', 'received_events_u

### List commits

In [185]:
# List up to 10 commits from official Linux kernel repo
owner = 'torvalds'
repo = 'linux'
commits = g.commits(owner=owner, repo=repo)
count = 0
for commit in commits:
    if count > 10:
        break
    print(commit)
    count += 1

{'sha': '7eef7e306d3c40a0c5b9ff6adc9b273cc894dbd5', 'node_id': 'C_kwDOACN7MtoAKDdlZWY3ZTMwNmQzYzQwYTBjNWI5ZmY2YWRjOWIyNzNjYzg5NGRiZDU', 'commit': {'author': {'name': 'Linus Torvalds', 'email': 'torvalds@linux-foundation.org', 'date': '2024-11-26T02:54:00Z'}, 'committer': {'name': 'Linus Torvalds', 'email': 'torvalds@linux-foundation.org', 'date': '2024-11-26T02:54:00Z'}, 'message': 'Merge tag \'for-6.13/dm-changes\' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm\n\nPull device mapper updates from Mikulas Patocka:\n\n - remove unused functions and variables\n\n - rate-limit error messages in syslog\n\n - fix typo\n\n - remove u64 alignment requirement for murmurhash\n\n - reset bi_ioprio to the default for dm-vdo\n\n - add support for get_unique_id\n\n - Add missing destroy_work_on_stack() to dm-thin\n\n - use kmalloc to allocate power-of-two sized buffers in bufio\n\n* tag \'for-6.13/dm-changes\' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/li

### List contents

In [186]:
# List contents of this repository
owner = 'ibraym'
repo = 'Data-Source-API-Analyst-Test'
path = ''
contents = g.contents(owner=owner, repo=repo, path=path)

for content in contents:
    print(content)

{'name': 'README.md', 'path': 'README.md', 'sha': '0ad91889ad2a4f2a75bbef8f3642d280a59f2e32', 'size': 3444, 'url': 'https://api.github.com/repos/ibraym/Data-Source-API-Analyst-Test/contents/README.md?ref=master', 'html_url': 'https://github.com/ibraym/Data-Source-API-Analyst-Test/blob/master/README.md', 'git_url': 'https://api.github.com/repos/ibraym/Data-Source-API-Analyst-Test/git/blobs/0ad91889ad2a4f2a75bbef8f3642d280a59f2e32', 'download_url': 'https://raw.githubusercontent.com/ibraym/Data-Source-API-Analyst-Test/master/README.md', 'type': 'file', '_links': {'self': 'https://api.github.com/repos/ibraym/Data-Source-API-Analyst-Test/contents/README.md?ref=master', 'git': 'https://api.github.com/repos/ibraym/Data-Source-API-Analyst-Test/git/blobs/0ad91889ad2a4f2a75bbef8f3642d280a59f2e32', 'html': 'https://github.com/ibraym/Data-Source-API-Analyst-Test/blob/master/README.md'}}
{'name': 'content', 'path': 'content', 'sha': '2716a8a77c115292e2022aa22b02cf0806693932', 'size': 0, 'url': 'ht

In [190]:
# List contents of this repository
owner = 'ibraym'
repo = 'Data-Source-API-Analyst-Test'
path = '/content/docs'
contents = g.contents(owner=owner, repo=repo, path=path)
for content in contents:
    print(content)

{'name': 'auth.md', 'path': 'content/docs/auth.md', 'sha': '1ab51dac3a4f9e6d0d700570cf8f5e734be78538', 'size': 2730, 'url': 'https://api.github.com/repos/ibraym/Data-Source-API-Analyst-Test/contents/content/docs/auth.md?ref=master', 'html_url': 'https://github.com/ibraym/Data-Source-API-Analyst-Test/blob/master/content/docs/auth.md', 'git_url': 'https://api.github.com/repos/ibraym/Data-Source-API-Analyst-Test/git/blobs/1ab51dac3a4f9e6d0d700570cf8f5e734be78538', 'download_url': 'https://raw.githubusercontent.com/ibraym/Data-Source-API-Analyst-Test/master/content/docs/auth.md', 'type': 'file', '_links': {'self': 'https://api.github.com/repos/ibraym/Data-Source-API-Analyst-Test/contents/content/docs/auth.md?ref=master', 'git': 'https://api.github.com/repos/ibraym/Data-Source-API-Analyst-Test/git/blobs/1ab51dac3a4f9e6d0d700570cf8f5e734be78538', 'html': 'https://github.com/ibraym/Data-Source-API-Analyst-Test/blob/master/content/docs/auth.md'}}
{'name': 'commits.md', 'path': 'content/docs/co

In [188]:
# get raw file contents
owner = 'ibraym'
repo = 'Data-Source-API-Analyst-Test'
path = '/README.md'
contents = g.contents(owner=owner, repo=repo, path=path, content_type='raw')
for content in contents:
  print(content)
  break

# Data-Source-API-Analyst-Test

The goal of this repository is to provide an analysis of [GitHub API](https://docs.github.com/en/rest?apiVersion=2022-11-28) including authentication methods, requests logic, pagination, rate limits and error handling.

## Scope
Based on the client needs, we are interested in the following endpoints:

1. [Search Repositories (public)](https://docs.github.com/en/rest/search/search?apiVersion=2022-11-28#search-repositories): \
[GitHub API](https://docs.github.com/en/rest?apiVersion=2022-11-28) provides the `/search` endpoint that can be used search for specific item. We can search for `commits`, `issues`, `labels`, ...etc. To search for repositories, we should use `/search/repositories`.

2. [Commits](https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28): To get a list of commits of a specific `repo` owned by a specific user `owner` from [GitHub API](https://docs.github.com/en/rest?apiVersion=2022-11-28), we should use `/repos/{owner}/{rep