# Scraper test w/ OAuth token

In [3]:
# %load clear_get_repo_contributions.py
import json
import requests
import config

session = requests.session()
## Comment out Tor proxy for now.
# session.proxies = {}
# session.proxies['http'] = 'socks5h://localhost:9050'
# session.proxies['https'] = 'socks5h://localhost:9050'

API_TOKEN = config.api_key

api_url_base = 'https://api.github.com/'
headers = {'Content-Type': 'application/json',
           'User-Agent': 'python-requests/3.6.1',
           'Accept': 'application/vnd.github.v3+json',
           'Authorization': 'token %s' % API_TOKEN}


def get_repos(orgname):
    api_url = '{}orgs/{}/repos'.format(api_url_base, orgname)
    # use session.get instead of request
    response = session.get(api_url, headers=headers)

    if response.status_code == 200:
        return (response.json())
    else:
        print('[!] HTTP {0} calling [{1}]'.format(response.status_code, api_url))
        return None


def get_contributors(repo):
    name = repo['name']
    contrib_url = repo['contributors_url']
    
    response = session.get(contrib_url, headers=headers)

    if response.status_code == 200:
        return (
            # returns `contribution_response`
            {'name': name,
             'data': response.json()}
        )
    else:
        print('[!] HTTP {0} calling repo [{1}]'.format(response.status_code, contrib_url))
        return None


def build_contribution_list(contribution_response):
    all_repo_contributions = list()

    for i in range(0,len(contribution_response['data'])):
        ctr = dict()
        ctr["repo"] = contribution_response['name']
        ctr["username"] = contribution_response['data'][i]['login']
        ctr["contributions"] = contribution_response['data'][i]['contributions']
        ctr["avatar_url"] = contribution_response['data'][i]['avatar_url']
        ctr["profile_url"] = contribution_response['data'][i]['url']
        all_repo_contributions.append(ctr)
    
    return all_repo_contributions


def lookup_human_name(profile_url):    
    response = session.get(profile_url, headers=headers)

    if response.status_code == 200:
        return (response.json()['name'])
    else:
        print('[!] HTTP {0} looking up user [{1}]'.format(response.status_code, profile_url))
        return None

# define list variable outside of get_all_contributions function
all_org_contributions = list()
  
def get_all_contributions(org):
    print("Retrieving list of all repos for {}".format(org))
    repos = get_repos(org)

    for repo in repos:
        print("Building contributor commit list for {}".format(repo))
        contributors = get_contributors(repo)
        contribution_list = build_contribution_list(contributors)
        all_org_contributions.append(contribution_list)

    # new API call to add real names to dictionary
    print("Matching real names against contributor usernames...")
    for i in range (0, len(all_org_contributions)):
        print("Searching repo {} of {}".format(i,len(all_org_contributions)))
        for j in range (0, len(all_org_contributions[i])):
            human_name = lookup_human_name(all_org_contributions[i][j]['profile_url'])
            all_org_contributions[i][j]['name'] = human_name

    print("Contribution list complete!")

In [4]:
get_all_contributions('recursecenter')

Retrieving list of all repos for 
Building contributor commit list for 
Building contributor commit list for 
Building contributor commit list for 
Building contributor commit list for 
Building contributor commit list for 
Building contributor commit list for 
Building contributor commit list for 
Building contributor commit list for 
Building contributor commit list for 
Building contributor commit list for 
Building contributor commit list for 
Building contributor commit list for 
Building contributor commit list for 
Building contributor commit list for 
Looking up real names for contributor usernames...
Contribution list complete!


In [7]:
all_org_contributions[0]

[{'repo': 'hs-cli',
  'username': 'zachallaun',
  'contributions': 51,
  'avatar_url': 'https://avatars0.githubusercontent.com/u/503938?v=4',
  'profile_url': 'https://api.github.com/users/zachallaun',
  'name': 'Zach Allaun'},
 {'repo': 'hs-cli',
  'username': 'davidbalbert',
  'contributions': 2,
  'avatar_url': 'https://avatars2.githubusercontent.com/u/123350?v=4',
  'profile_url': 'https://api.github.com/users/davidbalbert',
  'name': 'David Albert'}]

In [8]:
import pandas as pd

In [11]:
df = pd.read_csv('golemfactory.csv', inde)

In [12]:
df.head()

Unnamed: 0,"{'repo': 'golem', 'username': 'badb', 'contributions': 1381, 'avatar_url': 'https://avatars3.githubusercontent.com/u/483392?v=4', 'profile_url': 'https://api.github.com/users/badb', 'name': None}","{'repo': 'golem', 'username': 'mfranciszkiewicz', 'contributions': 1166, 'avatar_url': 'https://avatars2.githubusercontent.com/u/3238836?v=4', 'profile_url': 'https://api.github.com/users/mfranciszkiewicz', 'name': 'Marek Franciszkiewicz'}","{'repo': 'golem', 'username': 'jiivan', 'contributions': 865, 'avatar_url': 'https://avatars0.githubusercontent.com/u/293058?v=4', 'profile_url': 'https://api.github.com/users/jiivan', 'name': 'Dariusz Rybi'}","{'repo': 'golem', 'username': 'maaktweluit', 'contributions': 508, 'avatar_url': 'https://avatars0.githubusercontent.com/u/10008353?v=4', 'profile_url': 'https://api.github.com/users/maaktweluit', 'name': None}","{'repo': 'golem', 'username': 'chfast', 'contributions': 484, 'avatar_url': 'https://avatars1.githubusercontent.com/u/573380?v=4', 'profile_url': 'https://api.github.com/users/chfast', 'name': 'Paweł Bylica'}","{'repo': 'golem', 'username': 'shadeofblue', 'contributions': 431, 'avatar_url': 'https://avatars1.githubusercontent.com/u/7963044?v=4', 'profile_url': 'https://api.github.com/users/shadeofblue', 'name': None}","{'repo': 'golem', 'username': 'Krigpl', 'contributions': 302, 'avatar_url': 'https://avatars0.githubusercontent.com/u/3904252?v=4', 'profile_url': 'https://api.github.com/users/Krigpl', 'name': None}","{'repo': 'golem', 'username': 'etam', 'contributions': 279, 'avatar_url': 'https://avatars1.githubusercontent.com/u/220752?v=4', 'profile_url': 'https://api.github.com/users/etam', 'name': 'Adam Mizerski'}","{'repo': 'golem', 'username': 'banasiakadam60', 'contributions': 234, 'avatar_url': 'https://avatars2.githubusercontent.com/u/11528913?v=4', 'profile_url': 'https://api.github.com/users/banasiakadam60', 'name': None}","{'repo': 'golem', 'username': 'Wiezzel', 'contributions': 205, 'avatar_url': 'https://avatars0.githubusercontent.com/u/9394821?v=4', 'profile_url': 'https://api.github.com/users/Wiezzel', 'name': 'Adam Wierzbicki'}",...,"{'repo': 'golem-rd', 'username': 'Elfoniok', 'contributions': 2, 'avatar_url': 'https://avatars2.githubusercontent.com/u/662433?v=4', 'profile_url': 'https://api.github.com/users/Elfoniok', 'name': None}","{'repo': 'golem-rd', 'username': 'pstiasny', 'contributions': 2, 'avatar_url': 'https://avatars2.githubusercontent.com/u/589320?v=4', 'profile_url': 'https://api.github.com/users/pstiasny', 'name': 'Paweł Stiasny'}","{'repo': 'golem-rd', 'username': 'nieznanysprawiciel', 'contributions': 1, 'avatar_url': 'https://avatars2.githubusercontent.com/u/10420306?v=4', 'profile_url': 'https://api.github.com/users/nieznanysprawiciel', 'name': None}","{'repo': 'golem-rd', 'username': 'lukasz-glen', 'contributions': 1, 'avatar_url': 'https://avatars1.githubusercontent.com/u/29129196?v=4', 'profile_url': 'https://api.github.com/users/lukasz-glen', 'name': None}","{'repo': 'golem-unlimited', 'username': 'prekucki', 'contributions': 201, 'avatar_url': 'https://avatars3.githubusercontent.com/u/56750?v=4', 'profile_url': 'https://api.github.com/users/prekucki', 'name': 'Przemysław Rekucki'}","{'repo': 'golem-unlimited', 'username': 'destruktiw', 'contributions': 184, 'avatar_url': 'https://avatars1.githubusercontent.com/u/22892941?v=4', 'profile_url': 'https://api.github.com/users/destruktiw', 'name': 'Hubert Banaszewski'}","{'repo': 'golem-unlimited', 'username': 'tworec', 'contributions': 141, 'avatar_url': 'https://avatars1.githubusercontent.com/u/12720209?v=4', 'profile_url': 'https://api.github.com/users/tworec', 'name': 'Piotr Chromiec'}","{'repo': 'golem-unlimited', 'username': 'filipgolem', 'contributions': 87, 'avatar_url': 'https://avatars3.githubusercontent.com/u/44880692?v=4', 'profile_url': 'https://api.github.com/users/filipgolem', 'name': None}","{'repo': 'golem-unlimited', 'username': 'marmistrz', 'contributions': 10, 'avatar_url': 'https://avatars1.githubusercontent.com/u/2914938?v=4', 'profile_url': 'https://api.github.com/users/marmistrz', 'name': 'Marcin Mielniczuk'}","{'repo': 'golem-unlimited', 'username': 'badb', 'contributions': 1, 'avatar_url': 'https://avatars3.githubusercontent.com/u/483392?v=4', 'profile_url': 'https://api.github.com/users/badb', 'name': None}"
