Use fetch-issues.py to fetch issues

httpwg · Oct 19, 2015 · b69b534 · b69b534
1 parent 36ff6d8
commit b69b534
Show file tree

Hide file tree

Showing 3 changed files with 133 additions and 2 deletions.
diff --git a/Makefile b/Makefile
@@ -124,5 +124,6 @@ endif
 endif
 
 # backup issues
-issues:
-	curl https://api.github.com/repos/httpwg/http-extensions/issues?state=open > issues.json
+.PHONY: issues.json
+issues.json:
+	python lib/fetch-issues.py > issues.json
diff --git a/lib/fetch-issues.py b/lib/fetch-issues.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+"""
+Exports Issues from a specified repository to a CSV file
+Uses basic authentication (Github username + password) to retrieve Issues
+from a repository that username has access to. Supports Github API v3.
+"""
+
+import json
+from parse_link import parse_link_value
+import requests
+import sys
+
+repo = 'httpwg/http-extensions'  # format is username/repo
+repo_url = 'https://api.github.com/repos/%s/issues?state=all' % repo
+
+def getIssues(url, issues=None):
+    sys.stderr.write("* %s\n" % url)
+    if not issues:
+        issues = []
+    res = requests.get(url)
+    issues.append(res.json()[:])
+
+    if 'link' in res.headers:
+        links = parse_link_value(res.headers['link'])
+        rel_next = rel_last = None
+        for link, params in links.items():
+            rel = params.get('rel', None)
+            if rel == 'next':
+                rel_next = link
+            elif rel == 'last':
+                rel_last = link
+        if rel_next:
+            getIssues(rel_next, issues)
+        elif rel_last:
+            getIssues(rel_last, issues)
+    return issues
+
+issues = getIssues(repo_url)
+print json.dumps(issues, indent=1)
+
+
diff --git a/lib/parse_link.py b/lib/parse_link.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+"""
+HTTP Link Header Parsing
+Simple routines to parse and manipulate Link headers.
+"""
+
+__license__ = """
+Copyright (c) 2009 Mark Nottingham
+ 
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+ 
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+ 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import re
+
+TOKEN = r'(?:[^\(\)<>@,;:\\"/\[\]\?={} \t]+?)'
+QUOTED_STRING = r'(?:"(?:\\"|[^"])*")'
+PARAMETER = r'(?:%(TOKEN)s(?:=(?:%(TOKEN)s|%(QUOTED_STRING)s))?)' % locals()
+LINK = r'<[^>]*>\s*(?:;\s*%(PARAMETER)s?\s*)*' % locals()
+COMMA = r'(?:\s*(?:,\s*)+)'
+LINK_SPLIT = r'%s(?=%s|\s*$)' % (LINK, COMMA)
+
+def _unquotestring(instr):
+    if instr[0] == instr[-1] == '"':
+        instr = instr[1:-1]
+        instr = re.sub(r'\\(.)', r'\1', instr)
+    return instr
+def _splitstring(instr, item, split):
+    if not instr: 
+        return []
+    return [ h.strip() for h in re.findall(r'%s(?=%s|\s*$)' % (item, split), instr)]
+
+link_splitter = re.compile(LINK_SPLIT)
+
+def parse_link_value(instr):
+	"""
+	Given a link-value (i.e., after separating the header-value on commas), 
+	return a dictionary whose keys are link URLs and values are dictionaries
+	of the parameters for their associated links.
+	
+	Note that internationalised parameters (e.g., title*) are 
+	NOT percent-decoded.
+	
+	Also, only the last instance of a given parameter will be included.
+	
+	For example, 
+	
+	>>> parse_link_value('</foo>; rel="self"; title*=utf-8\'de\'letztes%20Kapitel')
+	{'/foo': {'title*': "utf-8'de'letztes%20Kapitel", 'rel': 'self'}}
+	
+	"""
+	out = {}
+	if not instr: 
+		return out
+	for link in [h.strip() for h in link_splitter.findall(instr)]:
+		url, params = link.split(">", 1)
+		url = url[1:]
+		param_dict = {}
+		for param in _splitstring(params, PARAMETER, "\s*;\s*"):
+			try:
+				a, v = param.split("=", 1)
+				param_dict[a.lower()] = _unquotestring(v)
+			except ValueError:
+				param_dict[param.lower()] = None
+		out[url] = param_dict
+	return out
+
+
+if __name__ == '__main__':
+	import sys
+	if len(sys.argv) > 1:
+		print parse_link_value(sys.argv[1])