# Generate Deep Review stats for templated use in the Meta Review

Note that some statistics are generated from the deep-review git submodule, and correspond to the version specified by `commit`. Other statistics are generated from the GitHub API and reflect the repositories status at runtime, as specified by `creation_time_utc`.

In [1]:
import collections
import datetime
import json
import pathlib

import pandas
import requests
import yaml

In [2]:
stats = collections.OrderedDict()
stats['creation_time_utc'] = datetime.datetime.utcnow().isoformat()

## Deep Review git repository stats

In [3]:
commit_df = pandas.read_table('commits.tsv')
commit_df.tail(2)

Unnamed: 0,commit,author_name,author_email,committer_name,authored_datetime,committed_datetime,summary,count,merge,parents,words_added,words_deleted,characters_added,characters_deleted
753,9c2d9c26b8fd40c321a80b854e2e74677cd38c79,Anthony Gitter,agitter@users.noreply.github.com,GitHub,2018-03-05 16:23:27-06:00,2018-03-05 16:23:27-06:00,Remove newline (#830),754,0,fafc92abf2716166060a22a3bc60d123011d3e5f,36,1,320,9
754,63d2468883ea69ad7ad638c39efab0fcbe026298,Anthony Gitter,agitter@users.noreply.github.com,Casey Greene,2018-03-06 07:17:34-06:00,2018-03-06 08:17:34-05:00,Remove duplicate reference (#831),755,0,9c2d9c26b8fd40c321a80b854e2e74677cd38c79,26,1,241,46


In [4]:
# State of the git repo (deep-review submodule)
stats['commit'] = commit_df.commit.iloc[-1]

In [5]:
stats['total_commits'] = len(commit_df)

In [6]:
# Number of non-merge commits that modified the manuscript markdown source
writing_commit_df = commit_df.query("(characters_added > 0 or characters_deleted > 0) and merge == 0")
stats['manuscript_commits'] = len(writing_commit_df)

### Number of formal Deep Review authors

In [7]:
path = pathlib.Path('deep-review/content/metadata.yaml')
with path.open() as read_file:
    metadata = yaml.load(read_file)

In [8]:
authors = metadata['author_info']
stats['deep_review_authors'] = len(authors)

Authors who first contributed after May 28, 2017 ([date](https://github.com/greenelab/meta-review/issues/23#issuecomment-338833911) of first submission) but before January 19, 2017 ([date](https://github.com/greenelab/deep-review/issues/810#issue-290029515) of resubmission). Note that author names have not been standardized here.

In [9]:
pre_submission_authors = set(writing_commit_df.query("authored_datetime <= '2017-05-28'").author_name)
post_submission_authors = set(writing_commit_df.query("'2017-05-28' < authored_datetime <= '2018-01-19'").author_name)
new_authors = post_submission_authors - pre_submission_authors
new_authors

{'Amr Alexandari',
 'Austin Huang',
 'Christopher Lavender',
 'David Slochower',
 'Michael Hoffman',
 'Michael Zietz',
 'Ruibang Luo',
 'Simina M. Boca',
 'Srini Turaga',
 'Venkat Malladi',
 'Yanjun Qi',
 'Zhiyong Lu'}

In [10]:
stats['deep_review_post_submission_authors'] = len(new_authors)

## GitHub repo stats

In [11]:
# https://developer.github.com/v3/repos/#get
response = requests.get('https://api.github.com/repos/greenelab/deep-review')
result = response.json()
stats['github_stars'] = result['stargazers_count']
stats['github_forks'] = result['forks_count']

### Number of pull requests

In [12]:
def github_issue_search(query):
    """
    Search issues and pull requests on GitHub.

    https://developer.github.com/v3/search/#search-issues
    https://help.github.com/articles/searching-issues-and-pull-requests/
    """
    url = 'https://api.github.com/search/issues'
    params = {
        'q': query,
        'sort': 'created',
        'order': 'asc',
    }
    response = requests.get(url, params)
    print(response.url)
    assert response.status_code == 200
    result = response.json()
    assert not result['incomplete_results']
    return result

In [13]:
# Merged PRs
result = github_issue_search('repo:greenelab/deep-review type:pr is:merged')
stats['merged_pull_requests'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fdeep-review+type%3Apr+is%3Amerged&sort=created&order=asc


In [14]:
# Closed PRs that were not merged
result = github_issue_search('repo:greenelab/deep-review type:pr is:unmerged state:closed')
stats['declined_pull_requests'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fdeep-review+type%3Apr+is%3Aunmerged+state%3Aclosed&sort=created&order=asc


In [15]:
# Open PRs
result = github_issue_search('repo:greenelab/deep-review type:pr state:open')
stats['open_pull_requests'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fdeep-review+type%3Apr+state%3Aopen&sort=created&order=asc


In [16]:
# Open Issues
result = github_issue_search('repo:greenelab/deep-review type:issue state:open')
stats['open_issues'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fdeep-review+type%3Aissue+state%3Aopen&sort=created&order=asc


In [17]:
# Closed Issues
result = github_issue_search('repo:greenelab/deep-review type:issue state:closed')
stats['closed_issues'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fdeep-review+type%3Aissue+state%3Aclosed&sort=created&order=asc


## Write stats

In [18]:
stats_str = json.dumps(stats, indent=2)
print(stats_str)

{
  "creation_time_utc": "2018-06-14T18:24:51.421326",
  "commit": "63d2468883ea69ad7ad638c39efab0fcbe026298",
  "total_commits": 755,
  "manuscript_commits": 349,
  "deep_review_authors": 36,
  "deep_review_post_submission_authors": 12,
  "github_stars": 616,
  "github_forks": 165,
  "merged_pull_requests": 315,
  "declined_pull_requests": 22,
  "open_pull_requests": 0,
  "open_issues": 322,
  "closed_issues": 215
}


In [19]:
path = pathlib.Path('deep-review-stats.json')
path.write_text(stats_str)

420