Skip to content
Fetching contributors…
Cannot retrieve contributors at this time
132 lines (105 sloc) 3.84 KB
import hashlib
import pickle
import re
import socket
import urllib2
import sys
from urllib import urlencode
import simplejson as json
except ImportError:
import json
from micawber.exceptions import ProviderException
class Provider(object):
socket_timeout = 3.0
user_agent = 'python-micawber'
def __init__(self, endpoint, **kwargs):
self.endpoint = endpoint
self.base_params = {'format': 'json'}
def fetch(self, url):
req = urllib2.Request(url, headers={'User-Agent': self.user_agent})
resp = urllib2.urlopen(req)
except urllib2.URLError:
return False
except socket.timeout:
return False
if resp.code < 200 or resp.code >= 300:
return False
content =
return content
def request(self, url, **extra_params):
params = dict(self.base_params)
params['url'] = url
encoded_params = urlencode(sorted(params.items()))
endpoint_url = self.endpoint
if '?' in endpoint_url:
endpoint_url = '%s&%s' % (endpoint_url.rstrip('&'), encoded_params)
endpoint_url = '%s?%s' % (endpoint_url, encoded_params)
response = self.fetch(endpoint_url)
if response:
json_data = json.loads(response)
if 'url' not in json_data:
json_data['url'] = url
return json_data
raise ProviderException('Error fetching "%s"' % endpoint_url)
def make_key(*args, **kwargs):
return hashlib.md5(pickle.dumps((args, kwargs))).hexdigest()
def url_cache(fn):
def inner(self, url, **params):
if self.cache:
key = make_key(url, params)
data = self.cache.get(key)
if not data:
data = fn(self, url, **params)
self.cache.set(key, data)
return data
return fn(self, url, **params)
return inner
class ProviderRegistry(object):
def __init__(self, cache=None):
self._registry = {}
self.cache = cache
def register(self, regex, provider):
self._registry[regex] = provider
def unregister(self, regex):
def __iter__(self):
return iter(self._registry.items())
def provider_for_url(self, url):
for regex, provider in self:
if re.match(regex, url):
return provider
def request(self, url, **params):
provider = self.provider_for_url(url)
if provider:
return provider.request(url, **params)
raise ProviderException('Provider not found for "%s"' % url)
def bootstrap_basic(cache=None):
pr = ProviderRegistry(cache)
pr.register('http://\S*?\S*', Provider(''))
pr.register('http://\S*.youtu(\.be|be\.com)/watch\S*', Provider(''))
pr.register('\S*', Provider(''))
pr.register('\S*', Provider(''))
pr.register('[^\/]+/\S*', Provider(''))
return pr
def bootstrap_embedly(cache=None, **params):
endpoint = ''
schema_url = ''
pr = ProviderRegistry(cache)
# fetch the schema
resp = urllib2.urlopen(schema_url)
contents =
json_data = json.loads(contents)
for provider_meta in json_data:
for regex in provider_meta['regex']:
pr.register(regex, Provider(endpoint, **params))
return pr
Jump to Line
Something went wrong with that request. Please try again.