In [1]:
from github import Github

## You should authenticate github with your token which can be done by Github('token')

In [2]:
g = Github()

## Get the openshift organization

In [3]:
org = g.get_organization('openshift')

In [2]:
repo_list = []

In [15]:
repo_list_json = []

## Get the repository list

In [5]:
org_repos = org.get_repos()

In [9]:
for r in org_repos:
    languages = set(r.get_languages())
    if 'Go' in languages:
        repo_list.append(r.clone_url)

In [11]:
len(repo_list)

192

In [1]:
import json

In [13]:
with open('repo-list.json', 'w') as f:
    json.dump(repo_list, f)

In [16]:
for r in repo_list:
    repo_list_json.append({
        "name": r
    })

In [18]:
with open('repo-list.json', 'w') as f:
    json.dump(repo_list_json, f)

## Read the report generated by the depcheck tool

In [50]:
with open('report.json', 'r') as f:
    repo_list = json.load(f)

In [51]:
repo_list[0]

{'digest': '782f4967', 'name': 'github.com/davecgh/go-spew', 'version': ''}

In [52]:
len(repo_list)

12677

In [54]:
repo_set = set()

## So, basically an entry can contain directly a repository as well. We take that if we get it else we use the package name as repository name for now. We have written custom logic to map package name to repository which can be found in upstream.go file.

In [55]:
for repo in repo_list:
    if repo.get('repository'):
        repo_set.add(repo.get('repository'))
    else:
        repo_set.add(repo.get('name'))

In [56]:
len(repo_set)

1815

## We write the list of potential repositories to a file, which is read by upstream.go

In [15]:
with open('repo-list.txt', 'w') as f:
    for repo in repo_set:
        f.write("%s\n" % repo)

In [59]:
repo_set = set()
with open('gh-repo-list.txt', 'r') as f:
    for l in f.readlines():
        repo_set.add(l)

In [62]:
len(repo_set)

680

In [60]:
prefix_set = {x.split('/')[2] for x in repo_set}

In [61]:
prefix_set

{'bitbucket.org',
 'code.googlesource.com',
 'github.com',
 'go.googlesource.com',
 'launchpad.net'}

In [63]:
with open('openshift-repo-list.txt', 'w') as f:
    for repo in repo_set:
        f.write("%s" % repo)

## All the go.googlesource.com are mirrored on github.com/golang/{package_name} but the issues are tracked at https://github.com/golang/go

## Replace the urls manually instead of programatically since that's easier

## Also, both the launchpad.net packages are deprecated and hence we don't care about them right now. They have been moved to gopkg.in and if the manifest file is updated we will get the upstream monitored repos.

## For both the code.googlesource.com packages, I had to manually go and find the upstream. Need to find an automated way of doing that, but since there are only two packages, we don't care.

### Let's combine the parent repositories and their dependent repositories so that we have a final list

In [75]:
child_repo_set = set()
parent_repo_set = set()

with open('openshift-repo-list.txt', 'r') as f:
    for line in f.readlines():
        line = line.strip('\n')
        if 'https' in line:
            if line.endswith('.git'):
                child_repo_set.add(line[:-4])
            else:
                child_repo_set.add(line)

with open('repo-list.json', 'r') as f:
    repo_list = json.load(f)
    for repo in repo_list:
        parent_repo_set.add(repo.get('name')[:-4])

In [78]:
final_set = child_repo_set.union(parent_repo_set)

In [79]:
len(final_set)

851

In [81]:
with open('openshift-repo-list-consolidated.txt', 'w') as f:
    for repo in final_set:
        f.write("%s\n" % repo)