# Generate Deep Review stats for templated use in the Meta Review

Note that some statistics are generated from the deep-review git submodule, and correspond to the version specified by `commit`. Other statistics are generated from the GitHub API and reflect the repositories status at runtime, as specified by `creation_time_utc`.

In [47]:
import collections
import datetime
import json
import pathlib

import pandas
import requests
import yaml
import wget

In [48]:
stats = collections.OrderedDict()
now = datetime.datetime.utcnow()
stats['creation_time_utc'] = now.isoformat()
stats['creation_date_pretty'] = f'{now:%B %d, %Y}'

## Deep Review git repository stats

In [49]:
commit_df = pandas.read_csv('commits.tsv', sep='\t')
commit_df.tail(2)

Unnamed: 0,commit,author_name,author_email,committer_name,authored_datetime,committed_datetime,summary,count,merge,parents,words_added,words_deleted,characters_added,characters_deleted
4406,889a954939a92b177c86efd5154f240cf2620d05,Halie Rando,halie.rando@pennmedicine.upenn.edu,Halie Rando,2021-04-26,2021-04-26 19:17:23-04:00,responses to @agitter's structural suggestions,4681.0,0.0,a824ec6b91a77bf839ce561c83db5f221613f9ad,1198.0,954.0,9057.0,7595.0
4407,05acaf59cd3a3701df8ce0ad113773e154774d82,Halie Rando,halie.rando@cuanschutz.edu,GitHub,2021-04-27,2021-04-27 07:59:36-04:00,Merge pull request #939 from rando2/methods,4682.0,1.0,"89dff08a5bb813c1bbc224275a347891ac8a532d, 889a...",2365.0,1491.0,17380.0,12011.0


In [50]:
# State of the git repo (deep-review submodule)
stats['commit'] = commit_df.commit.iloc[-1]

In [51]:
stats['total_commits'] = len(commit_df)

In [52]:
# Number of non-merge commits that modified the manuscript markdown source
writing_commit_df = commit_df.query("(characters_added > 0 or characters_deleted > 0) and merge == 0")
stats['manuscript_commits'] = len(writing_commit_df)

### Number of formal Deep Review authors

In [53]:
metadataURL="https://raw.githubusercontent.com/greenelab/covid19-review/master/content/metadata.yaml"
metadataDownload = wget.download(metadataURL, out="metadata.yaml")
with open("metadata.yaml", "r") as read_file:
    metadata = yaml.load(read_file)
print(metadata)

{'title': 'SARS-CoV-2 and COVID-19: An Evolving Review of Diagnostics and Therapeutics', 'keywords': ['covid-19', 'coronavirus', 'pandemic', 'viral infection'], 'lang': 'en-US', 'authors': [{'github': 'rando2', 'name': 'Halie M. Rando', 'initials': 'HMR', 'orcid': '0000-0001-7688-1770', 'twitter': 'tamefoxtime', 'email': 'halie.rando@cuanschutz.edu', 'contributions': ['Project Administration', 'Writing - Original Draft', 'Writing - Review & Editing', 'Methodology'], 'code of conduct': {'confirmed': True}, 'affiliations': ['Department of Systems Pharmacology and Translational Therapeutics, University of Pennsylvania, Philadelphia, Pennsylvania, United States of America', 'Department of Biochemistry and Molecular Genetics, University of Colorado School of Medicine, Aurora, Colorado, United States of America', 'Center for Health AI, University of Colorado School of Medicine, Aurora, Colorado, United States of America'], 'coi': {'string': 'None', 'lastapproved': '2021-01-20'}, 'funders': [

In [54]:
authors = metadata['authors']
stats['review_authors'] = len(authors)

## GitHub repo stats

In [55]:
# https://developer.github.com/v3/repos/#get
response = requests.get('https://api.github.com/repos/greenelab/covid19-review')
result = response.json()
stats['github_stars'] = result['stargazers_count']
stats['github_forks'] = result['forks_count']

### Number of pull requests

In [56]:
def github_issue_search(query):
    """
    Search issues and pull requests on GitHub.

    https://developer.github.com/v3/search/#search-issues
    https://help.github.com/articles/searching-issues-and-pull-requests/
    """
    url = 'https://api.github.com/search/issues'
    params = {
        'q': query,
        'sort': 'created',
        'order': 'asc',
    }
    response = requests.get(url, params)
    print(response.url)
    assert response.status_code == 200
    result = response.json()
    assert not result['incomplete_results']
    return result

In [57]:
# Merged PRs
result = github_issue_search('repo:greenelab/covid19-review type:pr is:merged')
stats['merged_pull_requests'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fcovid19-review+type%3Apr+is%3Amerged&sort=created&order=asc


In [58]:
# Closed PRs that were not merged
result = github_issue_search('repo:greenelab/covid19-review type:pr is:unmerged state:closed')
stats['declined_pull_requests'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fcovid19-review+type%3Apr+is%3Aunmerged+state%3Aclosed&sort=created&order=asc


In [59]:
# Open PRs
result = github_issue_search('repo:greenelab/covid19-review type:pr state:open')
stats['open_pull_requests'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fcovid19-review+type%3Apr+state%3Aopen&sort=created&order=asc


In [60]:
# Open Issues
result = github_issue_search('repo:greenelab/covid19-review type:issue state:open')
stats['open_issues'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fcovid19-review+type%3Aissue+state%3Aopen&sort=created&order=asc


In [61]:
# Closed Issues
result = github_issue_search('repo:greenelab/covid19-review type:issue state:closed')
stats['closed_issues'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fcovid19-review+type%3Aissue+state%3Aclosed&sort=created&order=asc


## Write stats

In [62]:
stats_str = json.dumps(stats, indent=2)
print(stats_str)

{
  "creation_time_utc": "2021-04-28T16:40:32.756920",
  "creation_date_pretty": "April 28, 2021",
  "commit": "05acaf59cd3a3701df8ce0ad113773e154774d82",
  "total_commits": 4408,
  "manuscript_commits": 2509,
  "review_authors": 49,
  "github_stars": 105,
  "github_forks": 77,
  "merged_pull_requests": 507,
  "declined_pull_requests": 27,
  "open_pull_requests": 9,
  "open_issues": 318,
  "closed_issues": 83
}


In [63]:
path = pathlib.Path('covid19-review-stats.json')
path.write_text(stats_str)

414