-
Notifications
You must be signed in to change notification settings - Fork 0
/
github.py
127 lines (97 loc) · 4.34 KB
/
github.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""Interface to GitHub."""
import requests
def _get_github_api_url(repo: str) -> str:
"""Get GitHub API URL for a repository, accepting a flexible range of inputs.
Args:
repo (str): Repository in the form "user/repo" or full repository URL.
Returns:
str: GitHub issues API URL for the repository.
"""
if repo.startswith("https://api.github.com/repos/"):
# Assume it's already a GitHub API URL
if repo.endswith("/"):
repo = repo[:-1]
return repo
# Assume it's a GitHub repository URL and accept a flexible range of inputs
# Normalize the URL
if repo.startswith("https://github.com/"):
repo = repo.replace("https://github.com/", "")
if repo.endswith(".git"):
repo = repo[:-4]
if repo.endswith("/"):
repo = repo[:-1]
# Create the GitHub API URL from the normalized URL
if "/" in repo:
return f"https://api.github.com/repos/{repo}"
raise ValueError("Invalid repository format. Must be in the form 'user/repo' or full repository URL.")
def _invoke_github_api(repo: str, endpoint: str) -> dict:
"""Invoke the GitHub API for a repository.
Args:
repo (str): Repository in the form "user/repo" or "https://..."
endpoint (str): API endpoint and its parameters, e.g. "issues/1234".
"""
url = _get_github_api_url(repo)
if endpoint:
url = f"{url}/{endpoint}"
response = requests.get(url, timeout=10)
response.raise_for_status()
return response.json()
def get_issue(repo: str, issue_id: str = "") -> dict:
"""Get a specific issue from GitHub.
Args:
repo (str): Repository in the form "user/repo" or the full URL to the issue, e.g.
"https://github.com/qjebbs/vscode-plantuml/issues/255".
issue_id (int): Issue number. Optional if the URL already contains the issue number.
Returns:
dict: Issue data.
"""
if "/issues/" in repo:
# Assume it's already a fully-formed GitHub issue API URL
return _invoke_github_api(repo, "")
return _invoke_github_api(repo, f"issues/{issue_id}")
def get_issue_comments(issue: dict) -> dict:
"""Get comments for a specific issue.
Args:
issue (dict): Issue data, as returned by GitHub (the JSON response).
Returns:
dict: Comments data.
"""
url = issue["comments_url"]
# Request the maximum number of comments per page
# Note that pagination is not supported - if there are more than 100 comments, only the first 100 will be returned
url += "?per_page=100"
return _invoke_github_api(url, "")
def parse_issue(issue: dict) -> str:
"""Parse issue data returned by GitHub into a text format.
The goal is to translate the JSON into a text format that has only the information we need to pass to an LLM. The
shorter format helps the LLM understand the data better and saves tokens (and thus cost).
The text includes some annotations to help guide the LLM. For example, using delimiters to indicate the start and
end of the body text, and using a prefix to indicate the start of each field. This is based on previous experiments
with LLMs. It may need to be adjusted for different LLMs. Other LLMs may not need as much guidance.
Args:
issue (dict): Issue data, as returned by GitHub (the JSON response).
Returns:
str: Parsed issue data.
"""
parsed = f"Title: {issue['title']}\n"
parsed += f"Body (between '''):\n'''\n{issue['body']}\n'''\n"
parsed += f"Submitted by: {issue['user']['login']}\n"
parsed += f"Submitted on: {issue['created_at']}\n"
parsed += f"Submitter association: {issue['author_association']}\n"
parsed += f"State: {issue['state']}\n"
parsed += f"Labels: {', '.join([label['name'] for label in issue['labels']])}\n"
return parsed
def parse_comments(comments: dict) -> str:
"""Parse comments data returned by GitHub into a text format.
See comments for parse_issue() for more details.
Args:
comments (dict): Comments data, as returned by GitHub (the JSON response).
Returns:
str: Parsed comments data.
"""
parsed = ""
for comment in comments:
parsed += f"Comment by: {comment['user']['login']}\n"
parsed += f"Comment on: {comment['created_at']}\n"
parsed += f"Body (between '''):\n'''\n{comment['body']}\n'''\n"
return parsed