### Activity has 6 sub-categories, and Events, Starring, and Watching are completed
- __Events__(usable)
- Event Types & Payloads(to be confirmed)
- Feeds(user-center)
- Notifications(user-center)
- __Starring__(usable)
- __Watching__(no timestamp, no use)

In [47]:
import requests
import json
import time
from pymongo import MongoClient
import random

In [2]:
ACCESS_TOKEN = '251e9806e8f4838cc49d5536e50bb2923a17ab15'

In [39]:
def github_request(params, headers, per_page=30, page=1, show_head=False, show_body=False, is_write=False):
    """ Description: function to perform a github api request
        - params: a list of request parameters
        - headers: a dict of request headers
        - per_page: defalt 30 records per page, can go up to 100
        - page: default starts from page 1
        - show_head: default False, not show the response head
        - show_body: default False, not show the response body
        - is_write: default False, if write out to json file, default in current directory
        - return: a string of status code, and a json object of response
    """
    url = "https://api.github.com/" + '/'.join(params) + '?per_page={}&page={}'.format(per_page, page)
    print 'requesting: ' + url
    response = requests.get(url, headers=headers) # get response
    if show_body:
        # body
        print json.dumps(response.json(), indent=1)
        print '------------------------------------'
    if show_head:
        # header
        for (k,v) in response.headers.items():
            print k, "=>", v
        print '------------------------------------'
    file_name = '.'.join(params) + '.json'
    if is_write:
        with open(file_name, 'w') as jsonfile:
            json.dump(response.json(), jsonfile)
    print 'total records of this request: {}'.format(len(response.json()))
    return response.headers['Status'], response.json()

# Events(usable)
https://developer.github.com/v3/activity/events/

__Events support pagination, however the per_page option is unsupported. The fixed page size is 30 items. Fetching up to ten pages is supported, for a total of 300 events.__

__Only events created within the past 90 days will be included in timelines. Events older than 90 days will not be included (even if the total number of events in the timeline is less than 300).__

### List repository events
GET /repos/:owner/:repo/events

In [4]:
params = ['repos', 'Jubatus', 'jubakit', 'events']
headers = {'Authorization' : 'token {}'.format(ACCESS_TOKEN)}

In [5]:
status, result = github_request(params=params,headers=headers, per_page=100, page=1, is_write=False)

requesting: https://api.github.com/repos/Jubatus/jubakit/events?per_page=100&page=1
total records of this request: 100


In [6]:
status

'200 OK'

# Starring(usable)
https://developer.github.com/v3/activity/starring/

__Wachers also has no query limit, can increase per_page to 100, with timestamp option__

### List Stargazers
__GET /repos/:owner/:repo/stargazers__

You can also find out when stars were created by passing the following custom media type via the Accept header:

__Accept: application/vnd.github.v3.star+json__

In [7]:
params = ['repos', 'Jubatus', 'jubatus', 'stargazers']
headers = {'Authorization' : 'token {}'.format(ACCESS_TOKEN),'Accept' : 'application/vnd.github.v3.star+json'}

In [8]:
status, result = github_request(params=params,headers=headers, per_page=100, page=2, is_write=False)

requesting: https://api.github.com/repos/Jubatus/jubatus/stargazers?per_page=100&page=2
total records of this request: 100


In [9]:
status

'200 OK'

# Watching(no use)
https://developer.github.com/v3/activity/watching/

__No timestamp__

### List watchers
GET /repos/:owner/:repo/subscribers

In [10]:
params = ['repos', 'Jubatus', 'jubatus', 'subscribers']
headers = {'Authorization' : 'token {}'.format(ACCESS_TOKEN),'Accept' : 'application/vnd.github.v3.full+json'}

In [11]:
status, result = github_request(params=params,headers=headers, per_page=100, page=1, is_write=False)

requesting: https://api.github.com/repos/Jubatus/jubatus/subscribers?per_page=100&page=1
total records of this request: 98


In [12]:
status

'200 OK'

# Retreive events and starring of sampled repository
https://github-ranking.com/repositories

In [61]:
random.seed(586)
[random.randint(1,978) for i in range(1,11)]

[722, 115, 967, 361, 674, 469, 607, 386, 292, 112]

In [66]:
# owner, repo
repo_portfolio = [['lebinh', 'ngxtop'],
                  ['IanLunn', 'Hover'],
                  ['eczarny', 'spectacle'],
                  ['JakeWharton', 'ViewPagerIndicator'],
                  ['rendrjs', 'rendr'],
                  ['inconshreveable', 'ngrok'],
                  ['madebymany', 'sir-trevor-js'],
                  ['jschr', 'bootstrap-modal'],
                  ['carrierwaveuploader', 'carrierwave'],
                  ['jquery', 'jquery-ui']]

In [63]:
def dump_mongo(db_url, db_name, params, headers):
    mongocli = MongoClient(db_url)# init mongodb client
    mongodb = mongocli[db_name] # connect 'github' database
    
    page = 1
    collection_name = '.'.join(params)
    start_time = time.time()
    while True:
        status, json_body = github_request(params, headers,per_page=100,page=page)
        print 'page {} :'.format(page), 'status: ' + status + '\n'
        if status == '422 Unprocessable Entity' or len(json_body) == 0:
            break
        mongodb[collection_name].insert(json_body)
        page += 1
        time.sleep(1)
    mongocli.close() # close connection
    elapsed_time = time.time() - start_time
    print '------------------------------------'
    print 'completed: {}'.format(elapsed_time)
    

### Retrieve events

In [68]:
events_list = [['repos'] + i + ['events'] for i in repo_portfolio]

In [41]:
db_url = 'ec2-54-67-97-244.us-west-1.compute.amazonaws.com:27017'
db_name = 'activities'
headers = {'Authorization' : 'token {}'.format(ACCESS_TOKEN)}

In [70]:
for event in events_list:
    print event
    dump_mongo(db_url,db_name,event,headers)
    print

requesting: https://api.github.com/repos/lebinh/ngxtop/events?per_page=100&page=1
total records of this request: 100
page 1 : status: 200 OK

requesting: https://api.github.com/repos/lebinh/ngxtop/events?per_page=100&page=2
total records of this request: 100
page 2 : status: 200 OK

requesting: https://api.github.com/repos/lebinh/ngxtop/events?per_page=100&page=3
total records of this request: 0
page 3 : status: 200 OK

------------------------------------
completed: 6.11205482483
requesting: https://api.github.com/repos/IanLunn/Hover/events?per_page=100&page=1
total records of this request: 100
page 1 : status: 200 OK

requesting: https://api.github.com/repos/IanLunn/Hover/events?per_page=100&page=2
total records of this request: 100
page 2 : status: 200 OK

requesting: https://api.github.com/repos/IanLunn/Hover/events?per_page=100&page=3
total records of this request: 100
page 3 : status: 200 OK

requesting: https://api.github.com/repos/IanLunn/Hover/events?per_page=100&page=4
total 

### Retrieve starring

In [73]:
starring_list = [['repos'] + i + ['stargazers'] for i in repo_portfolio]

In [74]:
db_url = 'ec2-54-67-97-244.us-west-1.compute.amazonaws.com:27017'
db_name = 'activities'
headers = {'Authorization' : 'token {}'.format(ACCESS_TOKEN),'Accept' : 'application/vnd.github.v3.star+json'}

In [75]:
for starring in starring_list:
    print starring
    dump_mongo(db_url,db_name,starring,headers)
    print

['repos', 'lebinh', 'ngxtop', 'stargazers']
requesting: https://api.github.com/repos/lebinh/ngxtop/stargazers?per_page=100&page=1
total records of this request: 100
page 1 : status: 200 OK

requesting: https://api.github.com/repos/lebinh/ngxtop/stargazers?per_page=100&page=2
total records of this request: 100
page 2 : status: 200 OK

requesting: https://api.github.com/repos/lebinh/ngxtop/stargazers?per_page=100&page=3
total records of this request: 100
page 3 : status: 200 OK

requesting: https://api.github.com/repos/lebinh/ngxtop/stargazers?per_page=100&page=4
total records of this request: 100
page 4 : status: 200 OK

requesting: https://api.github.com/repos/lebinh/ngxtop/stargazers?per_page=100&page=5
total records of this request: 100
page 5 : status: 200 OK

requesting: https://api.github.com/repos/lebinh/ngxtop/stargazers?per_page=100&page=6
total records of this request: 100
page 6 : status: 200 OK

requesting: https://api.github.com/repos/lebinh/ngxtop/stargazers?per_page=100&p