In [2]:
# %load clear_get_repo_contributions.py
import json
import requests
import csv
import config

session = requests.session()
## Comment out Tor proxy for now.
# session.proxies = {}
# session.proxies['http'] = 'socks5h://localhost:9050'
# session.proxies['https'] = 'socks5h://localhost:9050'

API_TOKEN = config.api_key

api_url_base = 'https://api.github.com/'
headers = {'Content-Type': 'application/json',
           'User-Agent': 'python-requests/3.6.1',
           'Accept': 'application/vnd.github.v3+json',
           'Authorization': 'token %s' % API_TOKEN}


def get_repos(orgname):
    api_url = '{}orgs/{}/repos'.format(api_url_base, orgname)
    # use session.get instead of request
    response = session.get(api_url, headers=headers)

    if response.status_code == 200:
        return (response.json())
    else:
        print('[!] HTTP {0} calling [{1}]'.format(response.status_code, api_url))
        return None


def get_contributors(repo):
    name = repo['name']
    contrib_url = repo['contributors_url']
    
    response = session.get(contrib_url, headers=headers)

    if response.status_code == 200:
        return (
            # returns `contribution_response`
            {'name': name,
             'data': response.json()}
        )
    else:
        print('[!] HTTP {0} calling repo [{1}]'.format(response.status_code, contrib_url))
        return None


def build_contribution_list(contribution_response):
    all_repo_contributions = list()

    for i in range(0,len(contribution_response['data'])):
        ctr = dict()
        ctr["repo"] = contribution_response['name']
        ctr["username"] = contribution_response['data'][i]['login']
        ctr["contributions"] = contribution_response['data'][i]['contributions']
        ctr["avatar_url"] = contribution_response['data'][i]['avatar_url']
        ctr["profile_url"] = contribution_response['data'][i]['url']
        all_repo_contributions.append(ctr)
    
    return all_repo_contributions


def lookup_human_name(profile_url):    
    response = session.get(profile_url, headers=headers)

    if response.status_code == 200:
        return (response.json()['name'])
    else:
        print('[!] HTTP {0} looking up user [{1}]'.format(response.status_code, profile_url))
        return None


def append_list_to_csv(mylist, output_file):
    with open(output_file, 'a') as myfile:
        wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
        wr.writerow(mylist)

# define list variable outside of get_all_contributions function
all_org_contributions = list()
  
def get_all_contributions(org):
    print("Retrieving list of all repos for {}".format(org))
    repos = get_repos(org)

    for repo in repos:
        print("Building contributor commit list for {}".format(repo['full_name']))
        contributors = get_contributors(repo)
        contribution_list = build_contribution_list(contributors)
        all_org_contributions.append(contribution_list)

    # new API call to add real names to dictionary
    print("Matching real names against contributor usernames...")
    for i in range (0, len(all_org_contributions)):
        print("Searching repo {} of {}".format(i,len(all_org_contributions)))
        for j in range (0, len(all_org_contributions[i])):
            human_name = lookup_human_name(all_org_contributions[i][j]['profile_url'])
            all_org_contributions[i][j]['name'] = human_name

    print("Contribution list complete!")

############

## This part isn't in the right format
    print("Writing to file.")

    file_name = "{}.csv".format(org)
    flat_contributions = [item for sublist in all_org_contributions for item in sublist]
    ## Do this row by row.
    for item in flat_contributions:
        append_list_to_csv(item, file_name)

    print("Contributor list saved as {}".format(file_name))

In [3]:
get_all_contributions('recursecenter')

Retrieving list of all repos for recursecenter
Building contributor commit list for recursecenter/hs-cli
Building contributor commit list for recursecenter/webstack.jl
Building contributor commit list for recursecenter/blaggregator
Building contributor commit list for recursecenter/community
Building contributor commit list for recursecenter/terminal_snake
Building contributor commit list for recursecenter/sprockets-commonjs
Building contributor commit list for recursecenter/proxy
Building contributor commit list for recursecenter/eventmachine
Building contributor commit list for recursecenter/heroku-buildpack-ruby
Building contributor commit list for recursecenter/sprockets
Building contributor commit list for recursecenter/recurse-lisp-workshop
Building contributor commit list for recursecenter/RSVPBot
Building contributor commit list for recursecenter/ca-tools
Building contributor commit list for recursecenter/coding-bee
Matching real names against contributor usernames...
Searching

In [6]:
all_org_contributions

[[{'repo': 'hs-cli',
   'username': 'zachallaun',
   'contributions': 51,
   'avatar_url': 'https://avatars0.githubusercontent.com/u/503938?v=4',
   'profile_url': 'https://api.github.com/users/zachallaun',
   'name': 'Zach Allaun'},
  {'repo': 'hs-cli',
   'username': 'davidbalbert',
   'contributions': 2,
   'avatar_url': 'https://avatars2.githubusercontent.com/u/123350?v=4',
   'profile_url': 'https://api.github.com/users/davidbalbert',
   'name': 'David Albert'}],
 [{'repo': 'webstack.jl',
   'username': 'danielmendel',
   'contributions': 63,
   'avatar_url': 'https://avatars3.githubusercontent.com/u/304202?v=4',
   'profile_url': 'https://api.github.com/users/danielmendel',
   'name': 'Daniel Espeset'},
  {'repo': 'webstack.jl',
   'username': 'astrieanna',
   'contributions': 36,
   'avatar_url': 'https://avatars3.githubusercontent.com/u/1205394?v=4',
   'profile_url': 'https://api.github.com/users/astrieanna',
   'name': 'Leah Hanson'},
  {'repo': 'webstack.jl',
   'username': 

In [4]:
import pandas as pd

In [5]:
pd.read_csv('recursecenter.csv')

Unnamed: 0,repo,username,contributions,avatar_url,profile_url,name
0,repo,username,contributions,avatar_url,profile_url,name
1,repo,username,contributions,avatar_url,profile_url,name
2,repo,username,contributions,avatar_url,profile_url,name
3,repo,username,contributions,avatar_url,profile_url,name
4,repo,username,contributions,avatar_url,profile_url,name
5,repo,username,contributions,avatar_url,profile_url,name
6,repo,username,contributions,avatar_url,profile_url,name
7,repo,username,contributions,avatar_url,profile_url,name
8,repo,username,contributions,avatar_url,profile_url,name
9,repo,username,contributions,avatar_url,profile_url,name


## Now we have the object let's try writing it in a different form

In [8]:
# def append_list_to_csv(mylist, output_file):
#     with open(output_file, 'a') as myfile:
#         wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
#         wr.writerow(mylist)

In [9]:
# ## This part isn't in the right format
#     print("Writing to file.")

#     file_name = "{}.csv".format(org)
#     flat_contributions = [item for sublist in all_org_contributions for item in sublist]
#     ## Do this row by row.
#     for item in flat_contributions:
#         append_list_to_csv(item, file_name)

In [12]:
# practice with test data
test_data = all_org_contributions[1]

In [23]:
type(test_data)

list

In [27]:
for item in test_data:
    print(item.values()

dict_values(['webstack.jl', 'danielmendel', 63, 'https://avatars3.githubusercontent.com/u/304202?v=4', 'https://api.github.com/users/danielmendel', 'Daniel Espeset'])
dict_values(['webstack.jl', 'astrieanna', 36, 'https://avatars3.githubusercontent.com/u/1205394?v=4', 'https://api.github.com/users/astrieanna', 'Leah Hanson'])
dict_values(['webstack.jl', 'zachallaun', 19, 'https://avatars0.githubusercontent.com/u/503938?v=4', 'https://api.github.com/users/zachallaun', 'Zach Allaun'])
dict_values(['webstack.jl', 'chuckha', 8, 'https://avatars0.githubusercontent.com/u/98927?v=4', 'https://api.github.com/users/chuckha', 'Chuck Ha'])
dict_values(['webstack.jl', 'ncollins', 4, 'https://avatars1.githubusercontent.com/u/607298?v=4', 'https://api.github.com/users/ncollins', 'Nick Collins'])
dict_values(['webstack.jl', 'maxlikely', 3, 'https://avatars1.githubusercontent.com/u/1237055?v=4', 'https://api.github.com/users/maxlikely', 'David'])
dict_values(['webstack.jl', 'StefanKarpinski', 2, 'http

In [28]:
def append_list_to_csv(mylist, output_file):
    with open(output_file, 'a') as myfile:
        wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
        wr.writerow(mylist)

In [29]:
file_name = "test.csv"

In [30]:
for item in test_data:
    append_list_to_csv(item.values(), file_name)

In [31]:
pd.read_csv('test.csv')

Unnamed: 0,webstack.jl,danielmendel,63,https://avatars3.githubusercontent.com/u/304202?v=4,https://api.github.com/users/danielmendel,Daniel Espeset
0,webstack.jl,astrieanna,36,https://avatars3.githubusercontent.com/u/12053...,https://api.github.com/users/astrieanna,Leah Hanson
1,webstack.jl,zachallaun,19,https://avatars0.githubusercontent.com/u/50393...,https://api.github.com/users/zachallaun,Zach Allaun
2,webstack.jl,chuckha,8,https://avatars0.githubusercontent.com/u/98927...,https://api.github.com/users/chuckha,Chuck Ha
3,webstack.jl,ncollins,4,https://avatars1.githubusercontent.com/u/60729...,https://api.github.com/users/ncollins,Nick Collins
4,webstack.jl,maxlikely,3,https://avatars1.githubusercontent.com/u/12370...,https://api.github.com/users/maxlikely,David
5,webstack.jl,StefanKarpinski,2,https://avatars2.githubusercontent.com/u/15359...,https://api.github.com/users/StefanKarpinski,Stefan Karpinski
6,webstack.jl,jroes,1,https://avatars0.githubusercontent.com/u/27847...,https://api.github.com/users/jroes,Jonathan Roes


In [32]:
flat_contributions = [item for sublist in all_org_contributions for item in sublist]

In [33]:
file_name = 'test2.csv'

for item in flat_contributions:
    append_list_to_csv(item.values(), file_name)

In [37]:
df = pd.read_csv('test2.csv')

In [39]:
df.columns = ['repo', 'username', 'contributions', 'avatar_url', 'profile_url', 'real_name']

In [40]:
df.head()

Unnamed: 0,repo,username,contributions,avatar_url,profile_url,real_name
0,hs-cli,davidbalbert,2,https://avatars2.githubusercontent.com/u/12335...,https://api.github.com/users/davidbalbert,David Albert
1,webstack.jl,danielmendel,63,https://avatars3.githubusercontent.com/u/30420...,https://api.github.com/users/danielmendel,Daniel Espeset
2,webstack.jl,astrieanna,36,https://avatars3.githubusercontent.com/u/12053...,https://api.github.com/users/astrieanna,Leah Hanson
3,webstack.jl,zachallaun,19,https://avatars0.githubusercontent.com/u/50393...,https://api.github.com/users/zachallaun,Zach Allaun
4,webstack.jl,chuckha,8,https://avatars0.githubusercontent.com/u/98927...,https://api.github.com/users/chuckha,Chuck Ha
