Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 310 lines (278 sloc) 11.2 KB
#!/usr/bin/env python
This script uses the GitHub API (via PyGithub) to list all of a user's
repositories (optionally including forks), searches each repository for
RepoStatus information in ````, ````, or any file
in the root of the repository starting with ``readme`` (case-insensitive),
and outputs a listing of repos and their statuses.
* Python 2.7 or newer.
* pygithub - `pip install pygithub` <>
* requests - `pip install requests`
You'll need to set your GitHub API token in your git config;
use `git config --global github.token <your token>` to set it
if not already present.
Copyright 2014-2018 Jason Antman <> <>
Free for any use provided that patches are submitted back to me.
The latest version of this script can be found at:
2018-04-01 jantman:
- Give user some help if non-standard library modules can't be imported
- Make RepoStatusOrg_GitHub_Checker a new-style class
- Update summary at top of this docstring
- Add -F/--fail-on-unknown option
- Python3 fixes
2016-05-18 jantman:
- add links to repo in HTML output
2016-05-17 jantman:
- add JSON and HTML output options
2014-12-25 jantman:
- initial script
import sys
import argparse
import logging
import re
import subprocess
from base64 import b64decode
import json
from datetime import datetime
import requests
except ImportError:
'ERROR importing "requests". If it is not installed, please '
'"pip install requests"\n'
from github import Github
except ImportError:
'ERROR importing "github". If it is not installed, please '
'"pip install pygithub"\n'
FORMAT = "[%(levelname)s %(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s"
logging.basicConfig(level=logging.INFO, format=FORMAT)
class RepoStatusOrg_GitHub_Checker(object):
""" check a user's GitHub repos for status identifiers """
readme_re = re.compile(r'^readme.*$', flags=re.I)
url_re = re.compile(r'http[s]?:\/\/.*repostatus\.org\/badges\/(.+)\/(.+)\.svg', flags=re.I)
def __init__(self, verbose=False):
self.logger = logging.getLogger(self.__class__.__name__)
if verbose:
# try to get GitHub credentials
token = subprocess.check_output(['git', 'config', '--global', 'github.token']).strip()
if isinstance(token, type(b'')):
token = token.decode()
self.logger.debug("got github token: {t}".format(t=token))
except subprocess.CalledProcessError:
self.logger.error("ERROR: no github token found. Set 'git config --global github.token' to your API token.")
raise SystemExit(1)
self.logger.debug("connecting to GitHub API")
self.g = Github(login_or_token=token)
def check(self, github_user, include_forks=False):
Check all repositories of a given GitHub user (or organization) for identifiers.
returns a dictionary of repo name to status name (or None if no status found)
:param github_user: github user or organization to check repos for, or None for logged in user
:type github_user: string
:rtype: dict
res = {}
if github_user is None:
github_user = self.g.get_user().login
self.username = github_user
self.logger.debug("checking repos for user {u}".format(u=github_user))
user = self.g.get_user(github_user)
if user.type == 'Organization':
self.logger.debug("user is an Organization; using organization instead")
user = self.g.get_organization(user.login)
self.logger.debug("user has {r} public repos and {p} owned private repos".format(r=user.public_repos, p=user.owned_private_repos))
repos = user.get_repos()
count = 0
forks = 0
for repo in repos:
if repo.fork and not include_forks:
self.logger.debug("ignoring fork: {r}".format(
forks += 1
count += 1
self.logger.debug("checking repo {r}".format(
candidates = self._find_candidate_files(repo)
self.logger.debug("found {c} candidate files".format(c=len(candidates)))
if len(candidates) == 0:
status = self._find_status_for_files(repo, candidates)
if status is not None:
self.logger.debug("found status {s} for repo {r}".format(s=status,
res[] = status
self.logger.debug("found no status for repo {r}".format(
res[] = None
self.logger.debug("checked {c} repos for user; ignored {f} forks".format(c=count, f=forks))
return res
def _find_status_for_files(self, repo, flist):
Given a list of files to search, returns the version
and status name of the first matching status identifier URL found;
searches the files in list order. Returns None if no match found
:param repo: repository to check
:type repo: github.Repository.Repository
:param flist: list of files to search through, in order
:type flist: list of strings (file paths)
:rtype: 2-tuple (version, status name) or None
for f in flist:
content = repo.get_file_contents(f)
s = ''
if content.encoding == 'base64':
s = b64decode(content.content)
if isinstance(s, type(b'')):
s = s.decode()
"unknown encoding '%s' on file %s in repository %s",
content.encoding, content.path,
res =
if res is not None:
self.logger.debug("Match found in {f}: {u}".format(f=content.path,
return (,
return None
def _find_candidate_files(self, repo):
Return a list of all files in the top directory/path of the repository
which should be examined for a repostatus identifier.
List is in the order they should be checked.
:param repo: repository to check
:type repo: github.Repository.Repository
:rtype: list of string filenames
files = []
for x in repo.get_dir_contents('/'):
if x.type != 'file':
candidates = []
# sort files lexicographically
for fname in sorted(files, key=lambda x: x.lower()):
if self.readme_re.match(fname):
if '' in files:
if '' in files:
return candidates
def parse_args(argv):
parse command line arguments/options
p = argparse.ArgumentParser(description=' GitHub parser')
p.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False,
help='verbose output (internal debugging).')
p.add_argument('-u', '--user', dest='user', type=str, default=None,
help='GitHub user or organization to check repos for; defaults to current user')
p.add_argument('-f', '--forks', dest='forks', action='store_true', default=False,
help='also include forks')
p.add_argument('-o', '--output-format', dest='format', action='store',
choices=['text', 'json', 'html'],
default='text', help='output format - (text|json|html) - default "text"')
p.add_argument('-F', '--fail-on-unknown', dest='fail_on_unknown',
action='store_true', default=False,
help='exit 1 if any repos have an unknown status')
args = p.parse_args(argv)
return args
def htmlout(output, username):
out = """
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "">
<html xmlns="">
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
<title> parse results for {user}</title>
<script src="" type="text/javascript"></script>
<script src="" type="text/javascript"></script>
<script src="" type="text/javascript"></script>
<link rel="stylesheet" href="" />
<table id="myTable" class="tablesorter">
<th>Repo Name</th>
<p><em>Generated by parser at {dt}</em></p>
script = """
<script type="text/javascript">
tbody = ''
for repo in sorted(output):
href = '' % (username, repo)
tbody += " <tr><td><a href=\"%s\">%s</a></td><td>%s</td></tr>\n" % (
href, repo, output[repo]
dt =
out = out.format(user=username, dt=dt, tbody=tbody, script=script)
return out
if __name__ == "__main__":
args = parse_args(sys.argv[1:])
# initialize the class
checker = RepoStatusOrg_GitHub_Checker(verbose=args.verbose)
# run the check
results = checker.check(args.user, include_forks=args.forks)
total = 0
unknown = []
output = {}
maxlen = 0
for repo in results:
if results[repo] is None:
s = results[repo][1]
total += 1
output[repo] = s
if len(repo) > maxlen:
maxlen = len(repo)
if args.format == 'html':
print(htmlout(output, checker.username))
elif args.format == 'json':
# text
fs = '{:<%d} {}' % ( maxlen + 1 )
for repo in sorted(output):
print(fs.format(repo, output[repo]))
"Found %d repos, %d with unknown status", total, len(unknown)
if len(unknown) > 0:'Unknown repos: %s', unknown)
if args.fail_on_unknown:
raise SystemExit(1)