Documentation for the GitHub API can be found here https://developer.github.com/v3/


Also exploring the PyGitHub module found here https://github.com/PyGithub/PyGithub

### GitHub Native API

Let's explore GitHub's API first before testing the Python module.

The information we need for MVP consists of

- username
- commit history, broken down by day/hour
- languages used in repos

For stretch goal

- popular user owned repositories
- popular repositories that the user contributed to but didn't author

Other items?

- user avatar

In [32]:
# imports
import requests
import json
import pandas as pd
import numpy as np

In [38]:
# get user based on username
def get_user(username):
    """
    Returns JSON information for a given GitHub username
    
    Parameters
    ----------
    username : str
        GitHub username
    
    Returns
    -------
    response : json
        JSON object containing user information
    """
    r = requests.get('https://api.github.com/users/%s' % username)
    
    if r.ok:
        return r.json()
    else: 
        raise ValueError('GitHub user %s not found' % username)
        
# testing
get_user('zangell44')

{'login': 'zangell44',
 'id': 42625717,
 'node_id': 'MDQ6VXNlcjQyNjI1NzE3',
 'avatar_url': 'https://avatars0.githubusercontent.com/u/42625717?v=4',
 'gravatar_id': '',
 'url': 'https://api.github.com/users/zangell44',
 'html_url': 'https://github.com/zangell44',
 'followers_url': 'https://api.github.com/users/zangell44/followers',
 'following_url': 'https://api.github.com/users/zangell44/following{/other_user}',
 'gists_url': 'https://api.github.com/users/zangell44/gists{/gist_id}',
 'starred_url': 'https://api.github.com/users/zangell44/starred{/owner}{/repo}',
 'subscriptions_url': 'https://api.github.com/users/zangell44/subscriptions',
 'organizations_url': 'https://api.github.com/users/zangell44/orgs',
 'repos_url': 'https://api.github.com/users/zangell44/repos',
 'events_url': 'https://api.github.com/users/zangell44/events{/privacy}',
 'received_events_url': 'https://api.github.com/users/zangell44/received_events',
 'type': 'User',
 'site_admin': False,
 'name': 'Zach Angell',
 'c

In [23]:
# get commit history from events
# THIS WILL NOT WORK SINCE EVENTS HAVE A LIMITED HISTORY
# THE LOGIC IS SIMILAR FOR GETTING EVENTS SPECIFIC TO A REPO
def get_commits(username):
    """
    Returns commit history for a given GitHub user
    
    Parameters
    ----------
    username : str
        GitHub username
    
    Returns
    -------
    commits : list
        List of commits including the repository and date/time
    """
    # get user info 
    user_info = get_user(username)
    # get events for user from Events API
    events = requests.get('https://api.github.com/users/%s/events' % username)
    
    # filter through events to get only commits
    commits = []
    for event in events.json():
        # filter down to push events
        if event['type'] == 'PushEvent':
            
            # repo name for commits
            repo = event['repo']['name']
            
            # loop through commits to get more detailed information
            for c in event['payload']['commits']:
                # check if this user made the commit
                if c['author']['name'] == username and c['distinct']:
                    commits.append(c)

    # include in commits list a dictionary containing
    # the following for each commit
    # 1. date and time
    # 2. repository to which the commit was made
    
    return commits

# testing
get_commits('zangell44')

zangell44/DS-Unit-3-Sprint-4-Productization-and-Cloud
zangell44/DS-Unit-3-Sprint-4-Productization-and-Cloud
zangell44/TwitOff
zangell44/TwitOff
zangell44/TwitOff
zangell44/TwitOff
zangell44/TwitOff
zangell44/TwitOff
zangell44/TwitOff
zangell44/TwitOff
zangell44/TwitOff
zangell44/TwitOff


[{'sha': '108e5d5e4f13aea8f0d4e94e6ea86d7e20255624',
  'author': {'email': '42625717+zangell44@users.noreply.github.com',
   'name': 'zangell44'},
  'message': 'sprint challenge commited',
  'distinct': True,
  'url': 'https://api.github.com/repos/zangell44/DS-Unit-3-Sprint-4-Productization-and-Cloud/commits/108e5d5e4f13aea8f0d4e94e6ea86d7e20255624'},
 {'sha': '77024f615dbd434b5b015c91459f7908897e442e',
  'author': {'email': '42625717+zangell44@users.noreply.github.com',
   'name': 'zangell44'},
  'message': 'pulling in aarons edits',
  'distinct': True,
  'url': 'https://api.github.com/repos/zangell44/DS-Unit-3-Sprint-4-Productization-and-Cloud/commits/77024f615dbd434b5b015c91459f7908897e442e'},
 {'sha': '8df2560d1e4a19acf380747f4da1ca18717f7ded',
  'author': {'email': '42625717+zangell44@users.noreply.github.com',
   'name': 'zangell44'},
  'message': 'flask sql alchemy included in pipenv',
  'distinct': True,
  'url': 'https://api.github.com/repos/zangell44/TwitOff/commits/8df2560d1

In [53]:
def get_commit_info(repository, commit_sha):
    """
    Gets more specific information for a given commit id
    """
    r = requests.get('https://api.github.com/repos/%s/git/commits/%s' 
                     % (repository, commit_sha))
    if r.ok:
        return r.json()
    else:
        raise ValueError('Could not find commit id % for repository %s'
                        % (commit_sha, repository))
        
def get_commit_date(repository, commit_sha, username=None):
    """
    Gets the commit timestamp and checks if it was committed
    by a specific user, if provided
    """
    commit_info = get_commit_info(repository, commit_sha)
    if username:
        if username != commit_info['committer']['name']:
            # TODO better error handling
            return None
    return commit_info['committer']['date']

# testing
get_commit_date('zangell44/TwitOff', 'c826db0ee919a917c2aa328de5a45c6b9f010267', 'zangell44')

'2019-03-07T17:23:04Z'

In [55]:
# parse repositories for commits
def get_repo_commits(username):
    """
    Gets list of user repositories
    """
    # get repo url from username
    repo_url = get_user(username)['repos_url']
    repos = requests.get(repo_url)
    
    repo_info = []
    
    # iterate through repos
    for r in repos.json():
        # track the name, stars, forks
        r_dict = {}
        r_dict['full_name'] = r['full_name']
        # for each repo, look through events
        commits = []
        events = requests.get(r['events_url'])
        # find push events that are
        # filter down to push events
        for e in events.json():
            if e['type'] == 'PushEvent':
                # loop through commits to get more detailed information
                for c in e['payload']['commits']:
                    # check if this user made the commit
                    if c['distinct']:
                        print (r['full_name'], c['sha'])
                        commits.append(get_commit_date(r['full_name'], 
                                                       c['sha'],
                                                       username))
        
        r_dict['commits'] = commits
        
        # add dict to list of repos
        repo_info.append(r_dict)
    
    # 1. authored by the username
    # 2. distinct
    # if (1) and (2), add an item with repo name and time stamp
    # time stamp can be found using get_commit_info()
    
    return repo_info

get_repo_commits('zangell44')

ValueError: GitHub user zangell44 not found

In [15]:
import requests
events = requests.get('https://api.github.com/users/jreback/events/public').json()
len(events)

30

In [56]:
requests.get('https://api.github.com/repos/zangell44/AB-Demo/events').json()

{'message': "API rate limit exceeded for 24.34.52.56. (But here's the good news: Authenticated requests get a higher rate limit. Check out the documentation for more details.)",
 'documentation_url': 'https://developer.github.com/v3/#rate-limiting'}