From 700e180c72cddc605f5348072ce6e25e34f8e3e3 Mon Sep 17 00:00:00 2001 From: Diederik van der Boor Date: Sun, 17 Mar 2013 10:28:32 +0100 Subject: [PATCH 1/3] PEP code cleanups in providers.py --- micawber/providers.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/micawber/providers.py b/micawber/providers.py index 6dd4a7f..c9cdbc2 100644 --- a/micawber/providers.py +++ b/micawber/providers.py @@ -3,7 +3,6 @@ import re import socket import urllib2 -import sys from urllib import urlencode try: import simplejson as json @@ -72,6 +71,7 @@ def handle_response(self, response, url): def make_key(*args, **kwargs): return hashlib.md5(pickle.dumps((args, kwargs))).hexdigest() + def url_cache(fn): def inner(self, url, **params): if self.cache: @@ -84,6 +84,7 @@ def inner(self, url, **params): return fn(self, url, **params) return inner + class ProviderRegistry(object): def __init__(self, cache=None): self._registry = {} @@ -114,9 +115,9 @@ def request(self, url, **params): def bootstrap_basic(cache=None): # complements of oembed.com#section7 pr = ProviderRegistry(cache) - + # b - pr.register('http://blip.tv/\S+', Provider('http://blip.tv/oembed')) + pr.register('http://blip.tv/\S+', Provider('http://blip.tv/oembed')) # c pr.register('http://chirb.it/\S+', Provider('http://chirb.it/oembed.json')) From 874b60e1c8c481f4bd1d07c4421289e921adaa61 Mon Sep 17 00:00:00 2001 From: Diederik van der Boor Date: Sun, 17 Mar 2013 10:41:42 +0100 Subject: [PATCH 2/3] Add new providers discovered via Embedly/WordPress and some good guesses.. This adds: * flic.kr * gist.github.com * mobypicture / moby.to * polldaddy, * scribd.com * slidesha.re * speakerdesk * yfrog --- micawber/providers.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/micawber/providers.py b/micawber/providers.py index c9cdbc2..d4bc324 100644 --- a/micawber/providers.py +++ b/micawber/providers.py @@ -129,8 +129,12 @@ def bootstrap_basic(cache=None): # f pr.register('http://\S*?flickr.com/\S+', Provider('http://www.flickr.com/services/oembed/')) + pr.register('http://flic\.kr/\S*', Provider('http://www.flickr.com/services/oembed/')) pr.register('https?://(www\.)?funnyordie\.com/videos/\S+', Provider('http://www.funnyordie.com/oembed')) + # g + pr.register(r'https?://gist.github.com/\S*', Provider('https://github.com/api/oembed')) + # h pr.register('http://www.hulu.com/watch/\S+', Provider('http://www.hulu.com/api/oembed.json')) @@ -142,10 +146,15 @@ def bootstrap_basic(cache=None): # j pr.register('http://www.jest.com/(video|embed)/\S+', Provider('http://www.jest.com/oembed.json')) + # m + pr.register('http://www.mobypicture.com/user/\S*?/view/\S*', Provider('http://api.mobypicture.com/oEmbed')) + pr.register('http://moby.to/\S*', Provider('http://api.mobypicture.com/oEmbed')) + # p pr.register('http://i\S*.photobucket.com/albums/\S+', Provider('http://photobucket.com/oembed')) pr.register('http://gi\S*.photobucket.com/groups/\S+', Provider('http://photobucket.com/oembed')) pr.register('http://www.polleverywhere.com/(polls|multiple_choice_polls|free_text_polls)/\S+', Provider('http://www.polleverywhere.com/services/oembed/')) + pr.register('https?://(.+\.)?polldaddy\.com/\S*', Provider('http://polldaddy.com/oembed/')) # q pr.register('http://qik.com/video/\S+', Provider('http://qik.com/api/oembed.json')) @@ -155,8 +164,11 @@ def bootstrap_basic(cache=None): # s pr.register('http://www.slideshare.net/[^\/]+/\S+', Provider('http://www.slideshare.net/api/oembed/2')) + pr.register('http://slidesha\.re/\S*', Provider('http://www.slideshare.net/api/oembed/2')) pr.register('http://\S*.smugmug.com/\S*', Provider('http://api.smugmug.com/services/oembed/')) pr.register('https://\S*?soundcloud.com/\S+', Provider('http://soundcloud.com/oembed')) + pr.register('https?://speakerdeck\.com/\S*', Provider('https://speakerdeck.com/oembed.json')), + pr.register('https?://(www\.)?scribd\.com/\S*', Provider('http://www.scribd.com/services/oembed')) # t pr.register('https?://(www\.)?twitter.com/\S+/status(es)?/\S+', Provider('http://api.twitter.com/1/statuses/oembed.json')) @@ -168,6 +180,7 @@ def bootstrap_basic(cache=None): # y pr.register('https?://(\S*.)?youtu(\.be/|be\.com/watch)\S+', Provider('http://www.youtube.com/oembed')) + pr.register('http://(\S*\.)?yfrog\.com/\S*', Provider('http://www.yfrog.com/api/oembed')) # w pr.register('http://\S+.wordpress.com/\S+', Provider('http://public-api.wordpress.com/oembed/')) From a8d4d6a44aa25f27ec448613230c7426457db30d Mon Sep 17 00:00:00 2001 From: Diederik van der Boor Date: Sun, 17 Mar 2013 10:47:23 +0100 Subject: [PATCH 3/3] Add support for Noembed.com, via bootstrap_noembed() function This a free service which also implements support for non-oembed sites, however it doesn't support as much as Embed.ly does. --- docs/api.rst | 17 +++++++++++++++++ micawber/providers.py | 19 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/docs/api.rst b/docs/api.rst index 87db756..a9e4c28 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -105,6 +105,23 @@ Providers pr = bootstrap_embedly(key='my-embedly-key') pr.request('http://www.youtube.com/watch?v=54XHDUOHuzU') +.. py:function:: bootstrap_noembed([cache=None, [**kwargs]]) + + Create a :py:class:`ProviderRegistry` and register as many providers as + are supported by `noembed.com `_. Valid services are + fetched from http://noembed.com/providers and parsed then registered. + + :param cache: an object that implements simple ``get`` and ``set`` + :param kwargs: any default keyword arguments to use with providers, useful for + passing the ``nowrap`` option to noembed. + :rtype: a ProviderRegistry with support for noembed + + .. code-block:: python + + # if you have an API key, you can specify that here + pr = bootstrap_noembed(nowrap=1) + pr.request('http://www.youtube.com/watch?v=54XHDUOHuzU') + Parsers ------- diff --git a/micawber/providers.py b/micawber/providers.py index d4bc324..bb27b18 100644 --- a/micawber/providers.py +++ b/micawber/providers.py @@ -206,3 +206,22 @@ def bootstrap_embedly(cache=None, **params): for regex in provider_meta['regex']: pr.register(regex, Provider(endpoint, **params)) return pr + + +def bootstrap_noembed(cache=None, **params): + endpoint = 'http://noembed.com/embed' + schema_url = 'http://noembed.com/providers' + + pr = ProviderRegistry(cache) + + # fetch the schema + resp = urllib2.urlopen(schema_url) + contents = resp.read() + resp.close() + + json_data = json.loads(contents) + + for provider_meta in json_data: + for regex in provider_meta['patterns']: + pr.register(regex, Provider(endpoint, **params)) + return pr