Documentation for the GitHub API can be found here https://developer.github.com/v3/


Also exploring the PyGitHub module found here https://github.com/PyGithub/PyGithub

### GitHub Native API

Let's explore GitHub's API first before testing the Python module.

The information we need for MVP consists of

- username
- commit history, broken down by day/hour
- languages used in repos

For stretch goal

- popular user owned repositories
- popular repositories that the user contributed to but didn't author

Other items?

- user avatar

In [163]:
# imports
import requests
import json
import dateutil.parser

# env vars
HEADERS = headers = {'Authorization': 'token ' + TOKEN}

In [143]:
# get user based on username
def get_user(username):
    """
    Returns JSON information for a given GitHub username
    
    Parameters
    ----------
    username : str
        GitHub username
    
    Returns
    -------
    response : json
        JSON object containing user information
    """
    r = requests.get('https://api.github.com/users/%s' % username,
                    headers=HEADERS)
    if r.ok:
        return r.json()
    else: 
        raise ValueError('GitHub user %s not found' % username)
        
# testing
# get_user('zangell44')

In [144]:
# get commit history from events
# THIS WILL NOT WORK SINCE EVENTS HAVE A LIMITED HISTORY
# THE LOGIC IS SIMILAR FOR GETTING EVENTS SPECIFIC TO A REPO
def get_commits(username):
    """
    Returns commit history for a given GitHub user
    
    Parameters
    ----------
    username : str
        GitHub username
    
    Returns
    -------
    commits : list
        List of commits including the repository and date/time
    """
    # get user info 
    user_info = get_user(username)
    # get events for user from Events API
    events = requests.get('https://api.github.com/users/%s/events' % username)
    
    # filter through events to get only commits
    commits = []
    for event in events.json():
        # filter down to push events
        if event['type'] == 'PushEvent':
            
            # repo name for commits
            repo = event['repo']['name']
            
            # loop through commits to get more detailed information
            for c in event['payload']['commits']:
                # check if this user made the commit
                if c['author']['name'] == username and c['distinct']:
                    commits.append(c)

    # include in commits list a dictionary containing
    # the following for each commit
    # 1. date and time
    # 2. repository to which the commit was made
    
    return commits

# testing
# get_commits('zangell44')

In [145]:
def get_commit_info(repository, commit_sha):
    """
    Gets more specific information for a given commit id
    """
    r = requests.get('https://api.github.com/repos/%s/git/commits/%s' 
                     % (repository, commit_sha),
                    headers=HEADERS)
    if r.ok:
        return r.json()
    else:
        raise ValueError('Could not find commit id % for repository %s'
                        % (commit_sha, repository))
        
def get_commit_date(repository, commit_sha, username=None):
    """
    Gets the commit timestamp and checks if it was committed
    by a specific user, if provided
    """
    commit_info = get_commit_info(repository, commit_sha)
    if username:
        if username != commit_info['committer']['name']:
            # TODO better error handling
            return None
    return commit_info['committer']['date']

# testing
# get_commit_date('zangell44/TwitOff', 'c826db0ee919a917c2aa328de5a45c6b9f010267', 'zangell44')

In [176]:
# parse repositories for commits
def get_repo_commits(username):
    """
    Gets commits made to user repositories
    """
    # get repo url from username
    repo_url = get_user(username)['repos_url'] + '?simple=yes&per_page=100&page=1'
    res = requests.get(repo_url,headers=HEADERS)
    # make sure we have all repos if count
    # exceeds 30
    repos = res.json()
    while 'next' in res.links.keys():
        res = requests.get(res.links['next']['url'],headers=HEADERS)
        repos.extend(res.json())
    
    commit_summary = {}
    commit_summary['day'], commit_summary['hour'] = {}, {}
    for i in range(0,7):
        commit_summary['day'][i] = 0
    for i in range(0,24):
        commit_summary['hour'][i] = 0
    
    # iterate through repos
    for r in repos:
        # for each repo, look through the commits
        commit_url = ('https://api.github.com/repos/%s/commits?author=%s'
                    % (r['full_name'], username))
        commits = requests.get(commit_url,
                               headers=HEADERS).json()
        # loop through commits to get timestamps
        for c in commits:
            date = dateutil.parser.parse(c['commit']['author']['date'])
            commit_summary['day'][date.weekday()] += 1
            commit_summary['hour'][date.hour] += 1
    
    day_new = {}
    for k, d in zip(range(0,7), ['Monday', 'Tuesday', 'Wednesday',
                             'Thursday', 'Friday', 'Saturday',
                             'Sunday']):
        day_new[d] = commit_summary['day'][k]
    
    commit_summary['day'] = day_new
    
    return commit_summary

get_repo_commits('zangell44')

{'day': {'Monday': 38,
  'Tuesday': 19,
  'Wednesday': 18,
  'Thursday': 46,
  'Friday': 36,
  'Saturday': 1,
  'Sunday': 1},
 'hour': {0: 16,
  1: 1,
  2: 6,
  3: 2,
  4: 0,
  5: 0,
  6: 0,
  7: 0,
  8: 0,
  9: 0,
  10: 0,
  11: 0,
  12: 0,
  13: 1,
  14: 0,
  15: 3,
  16: 4,
  17: 20,
  18: 26,
  19: 4,
  20: 6,
  21: 17,
  22: 22,
  23: 31}}

In [174]:
d = {}
d[1] = 2
d[1]

2

In [146]:
def get_user_summary(username):
    """
    Quickly returns a summary of user data.
    """
    # get repo url from username
    user_url = get_user(username)
    repo_url = user_url['repos_url'] + '?simple=yes&per_page=100&page=1'
    res = requests.get(repo_url,headers=HEADERS)
    # make sure we have all repos if count
    # exceeds 30
    repos = res.json()
    while 'next' in res.links.keys():
        res = requests.get(res.links['next']['url'],headers=HEADERS)
        repos.extend(res.json())
        
    # set up data structure to be returned
    data = {}
    data['user'] = username
    data['avatar'] = user_url['avatar_url']
    data['repos'] = []
    data['repo_count'] = 0
    data['languages'] = {}
    data['most_popular'] = (None, -1) # repo name and stargazer count
    
    # iterate through repos and update data
    for r in repos:
        # append new repo
        data['repos'].append(r['full_name'])
        data['repo_count'] += 1
        # update language count
        if r['language'] in data['languages'].keys():
            data['languages'][r['language']] += 1
        else: 
            data['languages'][r['language']] = 1
        # update most popular repos
        if r['stargazers_count'] > data['most_popular'][1]:
            data['most_popular'] = (r['full_name'], r['stargazers_count'])
    return data

get_user_summary('smashwilson')

{'user': 'smashwilson',
 'avatar': 'https://avatars2.githubusercontent.com/u/17565?v=4',
 'repos': ['smashwilson/ansible',
  'smashwilson/ansible-jenkins',
  'smashwilson/ansible-modules-core',
  'smashwilson/ansible-modules-extras',
  'smashwilson/ansibullbot',
  'smashwilson/apm',
  'smashwilson/apm-benchmarking',
  'smashwilson/ascension',
  'smashwilson/atom',
  'smashwilson/atom-achievements',
  'smashwilson/atom-autocomplete-modules',
  'smashwilson/atom-facepalm',
  'smashwilson/atom-giphy',
  'smashwilson/atom-message-panel',
  'smashwilson/auth-store',
  'smashwilson/awscli',
  'smashwilson/az-coordinator',
  'smashwilson/azurefire',
  'smashwilson/azurefire-infra',
  'smashwilson/azurefire-nginx',
  'smashwilson/azurefire-playbook',
  'smashwilson/azurefire-tls',
  'smashwilson/bdtg',
  'smashwilson/benbalter.github.com',
  'smashwilson/bundler',
  'smashwilson/canon-angular',
  'smashwilson/cashmonies',
  'smashwilson/checks-ahoy',
  'smashwilson/cloudpipe',
  'smashwilson/c

In [154]:
commit_url = 'https://api.github.com/repos/%s/commits?author=%s' 
              % ('zangell44/TwitOff', 'zangell44')
commits = requests.get(commit_url,
                      headers=HEADERS)

IndentationError: unexpected indent (<ipython-input-154-35dbf0ffce45>, line 2)

In [155]:
commits.json()[0]

{'sha': 'ab5d7a6a804d0279b25bf69c89fae72ea27d1d39',
 'node_id': 'MDY6Q29tbWl0MTczNzkyMzkwOmFiNWQ3YTZhODA0ZDAyNzliMjViZjY5Yzg5ZmFlNzJlYTI3ZDFkMzk=',
 'commit': {'author': {'name': 'Zach Angell',
   'email': '42625717+zangell44@users.noreply.github.com',
   'date': '2019-03-07T18:08:19Z'},
  'committer': {'name': 'GitHub',
   'email': 'noreply@github.com',
   'date': '2019-03-07T18:08:19Z'},
  'message': 'Delete .env',
  'tree': {'sha': '679580db1f1751b97703250267e8fe6ea5f73a4e',
   'url': 'https://api.github.com/repos/zangell44/TwitOff/git/trees/679580db1f1751b97703250267e8fe6ea5f73a4e'},
  'url': 'https://api.github.com/repos/zangell44/TwitOff/git/commits/ab5d7a6a804d0279b25bf69c89fae72ea27d1d39',
  'comment_count': 0,
  'verification': {'verified': True,
   'reason': 'valid',
   'signature': '-----BEGIN PGP SIGNATURE-----\n\nwsBcBAABCAAQBQJcgV4TCRBK7hj4Ov3rIwAAdHIIAHJX79vUkepJ6gVUIRSGtQDB\ng5Wp/3fW0z98e3F6D2G4x/qS5hVn0JHMB/AcY+yh3D1t9uNymJI7Jm3GTrs2PURX\nuZi2MH5FeCsmPlsYhkl0xowOA3C15c

In [113]:
get_user('zangell44')

{'login': 'zangell44',
 'id': 42625717,
 'node_id': 'MDQ6VXNlcjQyNjI1NzE3',
 'avatar_url': 'https://avatars0.githubusercontent.com/u/42625717?v=4',
 'gravatar_id': '',
 'url': 'https://api.github.com/users/zangell44',
 'html_url': 'https://github.com/zangell44',
 'followers_url': 'https://api.github.com/users/zangell44/followers',
 'following_url': 'https://api.github.com/users/zangell44/following{/other_user}',
 'gists_url': 'https://api.github.com/users/zangell44/gists{/gist_id}',
 'starred_url': 'https://api.github.com/users/zangell44/starred{/owner}{/repo}',
 'subscriptions_url': 'https://api.github.com/users/zangell44/subscriptions',
 'organizations_url': 'https://api.github.com/users/zangell44/orgs',
 'repos_url': 'https://api.github.com/users/zangell44/repos',
 'events_url': 'https://api.github.com/users/zangell44/events{/privacy}',
 'received_events_url': 'https://api.github.com/users/zangell44/received_events',
 'type': 'User',
 'site_admin': False,
 'name': 'Zach Angell',
 'c