# Downloading issues and pull requests.

This document provides a basis for researching Github data locally.  It uses the a repository's metadata to access the Github Issues and Pull Requests.

In [1]:
    import pandas; import requests; from functools import partial
    from requests_cache import install_cache
    install_cache('github-data')

In [19]:
    def get(url, **params):
        import os
        if os.environ.get('GITHUB'):
            params['access_token']=os.environ.get('GITHUB')
        return requests.get(
            url +  ('?' + '&'.join(map('='.join, params.items())) if params else ''),
            params=params
        )

In [20]:
    info = lambda x, **params: pandas.Series(get("https://api.github.com/repos/{}".format(x), **params).json())

In [21]:
    events = lambda action, x, **params: pandas.DataFrame(get(
        info(x).loc[f'{action}_url'].format(**{'/number': ''}), **params).json())

In [22]:
    def pulls(
        project: str, 
        iter: int=3, 
        state: ('open', 'closed', 'all')='closed'
    ) -> iter:
        """Download the pull requests over {iter} pages for specific state of
        pull request"""
        for i in range(1, iter):
            result = events('pulls', project, state=state, page=str(i))
            yield result
            if len(result) < 30: break


In [23]:
    def issues(
        project: str, 
        iter: int=3, 
        state: ('open', 'closed', 'all')='closed'
    ) -> iter:
        """Download the pull requests over {iter} pages for specific state of
        pull request"""
        for i in range(1, iter):
            result = events('issues', project, state=state, page=str(i))
            yield result
            if len(result) < 30: break


In [4]:
    def test_jupyterlab(): 
        assert len(pandas.concat(list(pulls('jupyterlab/jupyterlab'))))

In [None]:
    if __name__ == '__main__':
        !ipython -m pytest -- 2018-08-25-Moving-github-data-into-dataframes.ipynb