In [2]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from urllib.request import urlopen
from bs4 import BeautifulSoup

Load the script: `%load first-git-script.py`

In [8]:
# Basic script to get list of repos for given organization

# %load first-git-script.py
import json
import requests

api_url_base = 'https://api.github.com/'
headers = {'Content-Type': 'application/json',
           'User-Agent': 'Python Student',
           'Accept': 'application/vnd.github.v3+json'}

def get_repos(username):

    api_url = '{}orgs/{}/repos'.format(api_url_base, username)

    response = requests.get(api_url, headers=headers)

    if response.status_code == 200:
        return (response.json())
    else:
        print('[!] HTTP {0} calling [{1}]'.format(response.status_code, api_url))
        return None

In [4]:
recurse = get_repos('recursecenter')

In [46]:
recurse[0]

{'id': 7947018,
 'node_id': 'MDEwOlJlcG9zaXRvcnk3OTQ3MDE4',
 'name': 'hs-cli',
 'full_name': 'recursecenter/hs-cli',
 'private': False,
 'owner': {'login': 'recursecenter',
  'id': 1085202,
  'node_id': 'MDEyOk9yZ2FuaXphdGlvbjEwODUyMDI=',
  'avatar_url': 'https://avatars2.githubusercontent.com/u/1085202?v=4',
  'gravatar_id': '',
  'url': 'https://api.github.com/users/recursecenter',
  'html_url': 'https://github.com/recursecenter',
  'followers_url': 'https://api.github.com/users/recursecenter/followers',
  'following_url': 'https://api.github.com/users/recursecenter/following{/other_user}',
  'gists_url': 'https://api.github.com/users/recursecenter/gists{/gist_id}',
  'starred_url': 'https://api.github.com/users/recursecenter/starred{/owner}{/repo}',
  'subscriptions_url': 'https://api.github.com/users/recursecenter/subscriptions',
  'organizations_url': 'https://api.github.com/users/recursecenter/orgs',
  'repos_url': 'https://api.github.com/users/recursecenter/repos',
  'events_url

In [5]:
# Repos are labelled by attribute 'name'
for i in range(0,len(recurse)):
    print(recurse[i]['name'])

hs-cli
webstack.jl
blaggregator
community
terminal_snake
sprockets-commonjs
proxy
eventmachine
heroku-buildpack-ruby
sprockets
recurse-lisp-workshop
RSVPBot
ca-tools
coding-bee


In [47]:
# There is also a property 'contributors_URL' for each. Let's look at blaggregator, index 2.
recurse[2]['contributors_url']

'https://api.github.com/repos/recursecenter/blaggregator/contributors'

Next step:

- use requests to hit that url
- find out how many commits for each person (number, lines of code)
- get additional details of each user listed (name, email, etc.)

In [54]:
# %load get_contributors.py
headers = {'Content-Type': 'application/json',
           'User-Agent': 'Python Student',
           'Accept': 'application/vnd.github.v3+json'}

# URL format:
# https://api.github.com/repos/recursecenter/blaggregator/contributors

def get_contributors(repo):
    name = repo['name']
    contrib_url = repo['contributors_url']
    
    response = requests.get(contrib_url, headers=headers)

    if response.status_code == 200:
        return (
            {'name': name,
             'contributions': response.json()}
        )
    else:
        print('[!] HTTP {0} calling repo [{1}]'.format(response.status_code, contrib_url))
        return None

In [65]:
blag_contributors = get_contributors(recurse[2])

In [66]:
print(blag_contributors['name'])
print(blag_contributors['contributions'][0])
print(len(blag_contributors['contributions']))

blaggregator
{'login': 'sursh', 'id': 719590, 'node_id': 'MDQ6VXNlcjcxOTU5MA==', 'avatar_url': 'https://avatars1.githubusercontent.com/u/719590?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/sursh', 'html_url': 'https://github.com/sursh', 'followers_url': 'https://api.github.com/users/sursh/followers', 'following_url': 'https://api.github.com/users/sursh/following{/other_user}', 'gists_url': 'https://api.github.com/users/sursh/gists{/gist_id}', 'starred_url': 'https://api.github.com/users/sursh/starred{/owner}{/repo}', 'subscriptions_url': 'https://api.github.com/users/sursh/subscriptions', 'organizations_url': 'https://api.github.com/users/sursh/orgs', 'repos_url': 'https://api.github.com/users/sursh/repos', 'events_url': 'https://api.github.com/users/sursh/events{/privacy}', 'received_events_url': 'https://api.github.com/users/sursh/received_events', 'type': 'User', 'site_admin': False, 'contributions': 278}
18


In [67]:
# Build into a dict:

all_repo_contributions = list()

for i in range(0,len(blag_contributors['contributions'])):
    contributions = dict()
    contributions["repo"] = blag_contributors['name'] # should come from variable in real thing
    contributions["username"] = blag_contributors['contributions'][i]['login']
    contributions["contributions"] = blag_contributors['contributions'][i]['contributions']
    all_repo_contributions.append(contributions)
    
print(all_repo_contributions)

[{'repo': 'blaggregator', 'username': 'sursh', 'contributions': 278}, {'repo': 'blaggregator', 'username': 'punchagan', 'contributions': 249}, {'repo': 'blaggregator', 'username': 'davidbalbert', 'contributions': 12}, {'repo': 'blaggregator', 'username': 'kenyavs', 'contributions': 10}, {'repo': 'blaggregator', 'username': 'stanzheng', 'contributions': 5}, {'repo': 'blaggregator', 'username': 'akaptur', 'contributions': 3}, {'repo': 'blaggregator', 'username': 'porterjamesj', 'contributions': 3}, {'repo': 'blaggregator', 'username': 'santialbo', 'contributions': 3}, {'repo': 'blaggregator', 'username': 'strugee', 'contributions': 2}, {'repo': 'blaggregator', 'username': 'danluu', 'contributions': 2}, {'repo': 'blaggregator', 'username': 'PuercoPop', 'contributions': 2}, {'repo': 'blaggregator', 'username': 'pnf', 'contributions': 2}, {'repo': 'blaggregator', 'username': 'alliejones', 'contributions': 1}, {'repo': 'blaggregator', 'username': 'nnja', 'contributions': 1}, {'repo': 'blaggr

In [40]:
pd.DataFrame.from_dict(all_repo_contributions)

Unnamed: 0,contributions,repo,username
0,278,blaggregator,sursh
1,249,blaggregator,punchagan
2,12,blaggregator,davidbalbert
3,10,blaggregator,kenyavs
4,5,blaggregator,stanzheng
5,3,blaggregator,akaptur
6,3,blaggregator,porterjamesj
7,3,blaggregator,santialbo
8,2,blaggregator,strugee
9,2,blaggregator,danluu


=> Let's build a list with more than one repo to see what we need to change.

In [None]:
def build_contribution_list(repo, contributors):
    all_repo_contributions = list()

    for i in range(0,len(contributors)):
        contributions = dict()
        contributions["repo"] = repo # should come from variable in real thing
        contributions["username"] = contributors[i]['login']
        contributions["contributions"] = contributors[i]['contributions']
        all_repo_contributions.append(contributions)
        return all_repo_contributions

In [44]:
contributors1 = get_contributors(recurse[2]['contributors_url'])
contributors2 = get_contributors(recurse[3]['contributors_url'])



[{'login': 'sursh', 'id': 719590, 'node_id': 'MDQ6VXNlcjcxOTU5MA==', 'avatar_url': 'https://avatars1.githubusercontent.com/u/719590?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/sursh', 'html_url': 'https://github.com/sursh', 'followers_url': 'https://api.github.com/users/sursh/followers', 'following_url': 'https://api.github.com/users/sursh/following{/other_user}', 'gists_url': 'https://api.github.com/users/sursh/gists{/gist_id}', 'starred_url': 'https://api.github.com/users/sursh/starred{/owner}{/repo}', 'subscriptions_url': 'https://api.github.com/users/sursh/subscriptions', 'organizations_url': 'https://api.github.com/users/sursh/orgs', 'repos_url': 'https://api.github.com/users/sursh/repos', 'events_url': 'https://api.github.com/users/sursh/events{/privacy}', 'received_events_url': 'https://api.github.com/users/sursh/received_events', 'type': 'User', 'site_admin': False, 'contributions': 278}, {'login': 'punchagan', 'id': 315678, 'node_id': 'MDQ6VXNlcjMxNTY3OA==', 