Skip to content
This repository
branch: master
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

executable file 67 lines (53 sloc) 1.995 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
#!/usr/bin/env python

"""
In this module you'll find a function wikipedia_updates which takes a callback
function which will be passed wikipedia updates that stream from the socket.io
server at http://wikistream.inkdroid.org

Each update will be passed to your callback function will be a Python dictionary
that looks something like:

{
'anonymous': False,
'comment': '/* Anatomy */ changed statement that orbit was the eye to saying
that the orbit was the eye socket for accuracy',
'delta': 7,
'flag': '',
'namespace': 'article',
'newPage': False,
'page': 'Optic nerve',
'pageUrl': 'http://en.wikipedia.org/wiki/Optic_nerve',
'robot': False,
'unpatrolled': False,
'url': 'http://en.wikipedia.org/w/index.php?diff=449570600&oldid=447889877',
'user': 'Moearly',
'userUrl': 'http://en.wikipedia.org/wiki/User:Moearly',
'wikipedia': '#en.wikipedia',
'wikipediaLong': 'English Wikipedia',
'wikipediaShort': 'en',
'wikipediaUrl': 'http://en.wikipedia.org'
}

You'll need the requests (http://pypi.python.org/pypi/requests) library installed
for the HTTP requests.

More about the protocol that socket.io uses can be found at:
https://github.com/learnboost/socket.io-spec
"""

import re
import json
import time

from requests import post, get

def wikipedia_updates(callback):
    endpoint = "http://wikistream.inkdroid.org/socket.io/1"
    endpoint = "http://localhost:3000/socket.io/1"
    session_id = post(endpoint).content.split(':')[0]
    xhr_endpoint = "/".join((endpoint, "xhr-polling", session_id))

    while True:
        t = time.time() * 1000000
        response = get(xhr_endpoint, params={'t': t}).content.decode('utf-8')

        chunks = re.split(u'\ufffd[0-9]+\ufffd', response)
        for chunk in chunks:
            parts = chunk.split(':', 3)
            if len(parts) == 4:
                callback(json.loads(parts[3]))


if __name__ == "__main__":
    def print_page(update):
        print update

    wikipedia_updates(print_page)
Something went wrong with that request. Please try again.