# Generatestats for templated use in the Meta Review

Note that some statistics are generated from the git submodule, and correspond to the version specified by `commit`. Other statistics are generated from the GitHub API and reflect the repositories status at runtime, as specified by `creation_time_utc`.

In [1]:
import collections
import datetime
import json
import pathlib
import os

import pandas
import requests
import yaml
import wget

In [2]:
stats = collections.OrderedDict()
now = datetime.datetime.utcnow()
stats['creation_time_utc'] = now.isoformat()
stats['creation_date_pretty'] = f'{now:%B %d, %Y}'

## Git repository stats

In [3]:
commit_df = pandas.read_csv('commits.tsv', sep='\t')
commit_df.tail(2)

Unnamed: 0,commit,author_name,author_email,committer_name,authored_datetime,committed_datetime,summary,count,merge,parents,words_added,words_deleted,characters_added,characters_deleted
5041,48203f74d8283154d866d14aef2c7e87b337cd67,Halie Rando,halie.rando@cuanschutz.edu,GitHub,2021-09-09,2021-09-09 18:05:28-04:00,Merge branch 'master' into jf-edits,5316.0,1.0,"78418ae1e8e3f4c84f2d97310745ffd8d61d02d7, 2bcf...",529.0,0.0,3952.0,93.0
5042,89adbc3fed98ebf280331011aef5bf804bcbf77c,Anthony Gitter,agitter@users.noreply.github.com,GitHub,2021-09-10,2021-09-09 21:31:23-05:00,Merge pull request #1025 from rando2/jf-edits,5317.0,1.0,"2bcf287241085c340b0346af1b798cd9c84ac113, 4820...",1092.0,539.0,8315.0,4086.0


In [4]:
# State of the git repo (deep-review submodule)
stats['commit'] = commit_df.commit.iloc[-1]

In [5]:
stats['total_commits'] = len(commit_df)

In [6]:
# Number of non-merge commits that modified the manuscript markdown source
writing_commit_df = commit_df.query("(characters_added > 0 or characters_deleted > 0) and merge == 0")
stats['manuscript_commits'] = len(writing_commit_df)

## GitHub repo stats

In [7]:
# https://developer.github.com/v3/repos/#get
response = requests.get('https://api.github.com/repos/greenelab/covid19-review')
result = response.json()
stats['github_stars'] = result['stargazers_count']
stats['github_forks'] = result['forks_count']

### Number of pull requests

In [8]:
def github_issue_search(query):
    """
    Search issues and pull requests on GitHub.

    https://developer.github.com/v3/search/#search-issues
    https://help.github.com/articles/searching-issues-and-pull-requests/
    """
    url = 'https://api.github.com/search/issues'
    params = {
        'q': query,
        'sort': 'created',
        'order': 'asc',
    }
    response = requests.get(url, params)
    print(response.url)
    assert response.status_code == 200
    result = response.json()
    assert not result['incomplete_results']
    return result

In [9]:
# Merged PRs
result = github_issue_search('repo:greenelab/covid19-review type:pr is:merged')
stats['merged_pull_requests'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fcovid19-review+type%3Apr+is%3Amerged&sort=created&order=asc


In [10]:
# Closed PRs that were not merged
result = github_issue_search('repo:greenelab/covid19-review type:pr is:unmerged state:closed')
stats['declined_pull_requests'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fcovid19-review+type%3Apr+is%3Aunmerged+state%3Aclosed&sort=created&order=asc


In [11]:
# Open PRs
result = github_issue_search('repo:greenelab/covid19-review type:pr state:open')
stats['open_pull_requests'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fcovid19-review+type%3Apr+state%3Aopen&sort=created&order=asc


In [12]:
# Open Issues
result = github_issue_search('repo:greenelab/covid19-review type:issue state:open')
stats['open_issues'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fcovid19-review+type%3Aissue+state%3Aopen&sort=created&order=asc


In [13]:
# Closed Issues
result = github_issue_search('repo:greenelab/covid19-review type:issue state:closed')
stats['closed_issues'] = result['total_count']

https://api.github.com/search/issues?q=repo%3Agreenelab%2Fcovid19-review+type%3Aissue+state%3Aclosed&sort=created&order=asc


## Write stats

In [14]:
stats_str = json.dumps(stats, indent=2)
print(stats_str)

{
  "creation_time_utc": "2021-09-13T22:28:41.603097",
  "creation_date_pretty": "September 13, 2021",
  "commit": "89adbc3fed98ebf280331011aef5bf804bcbf77c",
  "total_commits": 5043,
  "manuscript_commits": 2886,
  "github_stars": 112,
  "github_forks": 76,
  "merged_pull_requests": 575,
  "declined_pull_requests": 33,
  "open_pull_requests": 6,
  "open_issues": 333,
  "closed_issues": 88
}


In [15]:
path = pathlib.Path('covid19-review-stats.json')
path.write_text(stats_str)

394

In [20]:
os.remove("metadata.yaml") 