In [49]:
import requests
import time
import math
from typing import List, Dict, Optional, Any, NoReturn

In [50]:
class GitHubAPI:
    """Class for interacting with the GitHub API."""

    def __init__(self, token: str) -> None:
        """
        Initialize the GitHubAPI instance with a Personal Access Token.

        :param token: GitHub Personal Access Token for authentication.
        """
        self.base_url: str = "https://api.github.com"
        self.headers: Dict[str, str] = {
            "Authorization": f"Bearer {token}",
            "Accept": "application/vnd.github+json"
        }

    def _handle_rate_limit(self, response: requests.Response) -> requests.Response:
        """
        Handle rate limits by waiting until the reset time if exceeded.

        :param response: The response object from the API call.
        :return: The response object after handling rate limits.
        """
        if response.status_code == 403 and "X-RateLimit-Remaining" in response.headers:
            remaining_requests = int(response.headers.get("X-RateLimit-Remaining", 0))
            reset_time = int(response.headers.get("X-RateLimit-Reset", time.time()))
            wait_time = reset_time - int(time.time())
            if remaining_requests == 0:
                print(f"Rate limit exceeded. Waiting for {wait_time} seconds...")
                time.sleep(wait_time + 1)
        return response

    def search_repositories(
        self, query: str, sort: str = "stars", order: str = "desc",
        per_page: int = 10, max_repos: int = 100, delay: float = 1.0
    ) -> List[Dict[str, Any]]:
        """
        Search for public repositories with pagination, limiting results and adding delays.

        :param query: Search query string.
        :param sort: Sort by (e.g., "stars", "forks", "updated").
        :param order: Order of results ("asc" or "desc").
        :param per_page: Number of results per page.
        :param max_repos: Maximum number of repositories to retrieve.
        :param delay: Time in seconds to wait between page queries.
        :return: List of repository data.
        """
        all_results: List[Dict[str, Any]] = []
        page: int = 1

        # Calculate total pages to query
        url = f"{self.base_url}/search/repositories"
        params = {"q": query, "sort": sort, "order": order, "per_page": per_page, "page": page}
        response = requests.get(url, headers=self.headers, params=params)

        if response.status_code != 200:
            if response.status_code == 403:
                print("Rate limit exceeded. Retrying after a delay...")
                self._handle_rate_limit(response)
            else:
                print(f"Error: {response.status_code}, {response.text}")
            return []

        data = response.json()
        total_count: int = data.get("total_count", 0)
        total_pages: int = math.ceil(min(total_count, max_repos) / per_page)

        # Process first page
        all_results.extend(data.get("items", []))

        # Process remaining pages
        for page in range(2, total_pages + 1):
            if len(all_results) >= max_repos:
                break

            params["page"] = page
            response = requests.get(url, headers=self.headers, params=params)

            if response.status_code != 200:
                if response.status_code == 403:
                    print("Rate limit exceeded. Retrying after a delay...")
                    self._handle_rate_limit(response)
                    continue
                else:
                    print(f"Error: {response.status_code}, {response.text}")
                    break

            data = response.json()
            items = data.get("items", [])
            all_results.extend(items)

            time.sleep(delay)

        return all_results[:max_repos]

    def list_commits(self, repo_owner: str, repo_name: str, per_page: int = 10) -> List[Dict[str, Any]]:
        """
        List commits for a specific repository with pagination.

        :param repo_owner: Owner of the repository.
        :param repo_name: Name of the repository.
        :param per_page: Number of commits to fetch per page.
        :return: List of commits.
        """
        all_results: List[Dict[str, Any]] = []
        page: int = 1

        while True:
            url = f"{self.base_url}/repos/{repo_owner}/{repo_name}/commits"
            params = {
                "per_page": per_page,
                "page": page
            }
            response = requests.get(url, headers=self.headers, params=params)
            response = self._handle_rate_limit(response)

            if response.status_code == 200:
                data = response.json()
                all_results.extend(data)
                if len(data) < per_page:  # No more pages
                    break
                page += 1
            else:
                print(f"Error: {response.status_code}, {response.text}")
                break

        return all_results

    def get_content(self, repo_owner: str, repo_name: str, path: str) -> Optional[Dict[str, Any]]:
        """
        Get metadata or content of a file or folder in a repository.

        :param repo_owner: Owner of the repository.
        :param repo_name: Name of the repository.
        :param path: Path to the file or folder.
        :return: Dictionary containing file/folder metadata, or None if an error occurs.
        """
        url = f"{self.base_url}/repos/{repo_owner}/{repo_name}/contents/{path}"
        response = requests.get(url, headers=self.headers)
        response = self._handle_rate_limit(response)

        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error: {response.status_code}, {response.text}")
            return None

    @staticmethod
    def exit_with_error(message: str) -> NoReturn:
        """
        Exit the program with an error message.

        :param message: The error message to display.
        """
        print(f"Error: {message}")
        exit(1)

# Using API (Github PAT is required)

In [None]:
# Initialize token
token: str = "token_placeholder"

if token == "token_placeholder":
    token = input("Enter your GitHub Personal Access Token (PAT): ").strip()
    if not token:
        GitHubAPI.exit_with_error("Personal Access Token is required.")

# Initialize GitHub API client
github_api = GitHubAPI(token)

# Example: Search for repositories

In [52]:
query = "machine learning"
per_page = 10
max_repos = 30
delay_time = 1

repos = github_api.search_repositories(query=query, per_page=per_page, max_repos=max_repos, delay=delay_time)
print(f"Total repositories fetched: {len(repos)}")
for repo in repos:
    print(f"Repo Name: {repo['name']}, Stars: {repo['stargazers_count']}, Url: {repo['html_url']}")

Total repositories fetched: 30
Repo Name: tensorflow, Stars: 186553, Url: https://github.com/tensorflow/tensorflow
Repo Name: transformers, Stars: 135533, Url: https://github.com/huggingface/transformers
Repo Name: ML-For-Beginners, Stars: 69919, Url: https://github.com/microsoft/ML-For-Beginners
Repo Name: funNLP, Stars: 69377, Url: https://github.com/fighting41love/funNLP
Repo Name: awesome-machine-learning, Stars: 66118, Url: https://github.com/josephmisiti/awesome-machine-learning
Repo Name: scikit-learn, Stars: 60253, Url: https://github.com/scikit-learn/scikit-learn
Repo Name: gradio, Stars: 34210, Url: https://github.com/gradio-app/gradio
Repo Name: C-Plus-Plus, Stars: 30831, Url: https://github.com/TheAlgorithms/C-Plus-Plus
Repo Name: linkedin-skill-assessments-quizzes, Stars: 28514, Url: https://github.com/Ebazhanov/linkedin-skill-assessments-quizzes
Repo Name: netron, Stars: 28262, Url: https://github.com/lutzroeder/netron
Repo Name: machine-learning-for-software-engineers, S

# Example: List commits

In [53]:
repo_owner="octocat"
repo_name="hello-world"
per_page=5

commits = github_api.list_commits(repo_owner=repo_owner, repo_name=repo_name, per_page=per_page)
if commits:
    for commit in commits:
        print(f"Commit SHA: {commit['sha']},\n Commit message: {commit['commit']['message']}")

Commit SHA: 7fd1a60b01f91b314f59955a4e4d4e80d8edf11d,
 Commit message: Merge pull request #6 from Spaceghost/patch-1

New line at end of file.
Commit SHA: 762941318ee16e59dabbacb1b4049eec22f0d303,
 Commit message: New line at end of file. --Signed off by Spaceghost
Commit SHA: 553c2077f0edc3d5dc5d17262f6aa498e69d6f8e,
 Commit message: first commit


# Example: Get content

In [54]:
repo_owner="octocat"
repo_name="hello-world"
path="README"

content = github_api.get_content(repo_owner=repo_owner, repo_name=repo_name, path=path)
if content:
    print(content)

{'name': 'README', 'path': 'README', 'sha': '980a0d5f19a64b4b30a87d4206aade58726b60e3', 'size': 13, 'url': 'https://api.github.com/repos/octocat/Hello-World/contents/README?ref=master', 'html_url': 'https://github.com/octocat/Hello-World/blob/master/README', 'git_url': 'https://api.github.com/repos/octocat/Hello-World/git/blobs/980a0d5f19a64b4b30a87d4206aade58726b60e3', 'download_url': 'https://raw.githubusercontent.com/octocat/Hello-World/master/README', 'type': 'file', 'content': 'SGVsbG8gV29ybGQhCg==\n', 'encoding': 'base64', '_links': {'self': 'https://api.github.com/repos/octocat/Hello-World/contents/README?ref=master', 'git': 'https://api.github.com/repos/octocat/Hello-World/git/blobs/980a0d5f19a64b4b30a87d4206aade58726b60e3', 'html': 'https://github.com/octocat/Hello-World/blob/master/README'}}
