Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 182 lines (166 sloc) 7.222 kb
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
1 import re
2 import urllib2
3 import gzip
d069174 @idan Fix broken replacement engine in re_parts
idan authored
4 from heapq import heappush, heappop
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
5 try:
6 from cStringIO import StringIO
7 except ImportError:
8 from StringIO import StringIO
9 try:
10 import simplejson
11 except ImportError:
12 from django.utils import simplejson
13 from django.conf import settings
14 from django.utils.safestring import mark_safe
15 from oembed.models import ProviderRule, StoredOEmbed
e428feb @ericflo Added much more robust replacement mechanism using Django templates.
ericflo authored
16 from django.template.loader import render_to_string
d069174 @idan Fix broken replacement engine in re_parts
idan authored
17 import logging
18 logger = logging.getLogger("oembed core")
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
19
ea91cc6 @ericflo Rolling back last commit, fixing it another way.
ericflo authored
20 END_OVERRIDES = (')', ',', '.', '>', ']', ';')
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
21 MAX_WIDTH = getattr(settings, "OEMBED_MAX_WIDTH", 320)
22 MAX_HEIGHT = getattr(settings, "OEMBED_MAX_HEIGHT", 240)
23 FORMAT = getattr(settings, "OEMBED_FORMAT", "json")
24
e5bfaab @ericflo Added docstrings and inline documentation, and some standard open source...
ericflo authored
25 def fetch(url, user_agent="django-oembed/0.1"):
26 """
27 Fetches from a URL, respecting GZip encoding, etc.
28 """
29 request = urllib2.Request(url)
30 request.add_header('User-Agent', user_agent)
31 request.add_header('Accept-Encoding', 'gzip')
32 opener = urllib2.build_opener()
33 f = opener.open(request)
34 result = f.read()
35 if f.headers.get('content-encoding', '') == 'gzip':
36 result = gzip.GzipFile(fileobj=StringIO(result)).read()
37 f.close()
38 return result
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
39
40 def re_parts(regex_list, text):
41 """
42 An iterator that returns the entire text, but split by which regex it
43 matched, or none at all. If it did, the first value of the returned tuple
44 is the index into the regex list, otherwise -1.
45
46 >>> first_re = re.compile('asdf')
47 >>> second_re = re.compile('an')
48 >>> list(re_parts([first_re, second_re], 'This is an asdf test.'))
49 [(-1, 'This is '), (1, 'an'), (-1, ' '), (0, 'asdf'), (-1, ' test.')]
50
51 >>> list(re_parts([first_re, second_re], 'asdfasdfasdf'))
52 [(0, 'asdf'), (0, 'asdf'), (0, 'asdf')]
53
54 >>> list(re_parts([], 'This is an asdf test.'))
55 [(-1, 'This is an asdf test.')]
56
57 >>> third_re = re.compile('sdf')
58 >>> list(re_parts([first_re, second_re, third_re], 'This is an asdf test.'))
59 [(-1, 'This is '), (1, 'an'), (-1, ' '), (0, 'asdf'), (-1, ' test.')]
60 """
61 def match_compare(x, y):
62 return x.start() - y.start()
63 prev_end = 0
d069174 @idan Fix broken replacement engine in re_parts
idan authored
64 iter_dict = dict((r, r.finditer(text)) for r in regex_list)
65
66 # a heapq containing matches
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
67 matches = []
d069174 @idan Fix broken replacement engine in re_parts
idan authored
68
69 # bootstrap the search with the first hit for each iterator
70 for regex, iterator in iter_dict.items():
71 try:
72 match = iterator.next()
73 heappush(matches, (match.start(), match))
74 except StopIteration:
75 iter_dict.pop(regex)
76
77 # process matches, revisiting each iterator from which a match is used
78 while matches:
79 # get the earliest match
80 start, match = heappop(matches)
81 end = match.end()
82 if start > prev_end:
83 # yield the text from current location to start of match
84 yield (-1, text[prev_end:start])
85 # yield the match
86 yield (regex_list.index(match.re), text[start:end])
87 # get the next match from the iterator for this match
88 if match.re in iter_dict:
89 try:
90 newmatch = iter_dict[match.re].next()
91 heappush(matches, (newmatch.start(), newmatch))
92 except StopIteration:
93 iter_dict.pop(match.re)
94 prev_end = end
95
96 # yield text from end of last match to end of text
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
97 last_bit = text[prev_end:]
98 if len(last_bit) > 0:
99 yield (-1, last_bit)
100
101 def replace(text, max_width=MAX_WIDTH, max_height=MAX_HEIGHT):
e5bfaab @ericflo Added docstrings and inline documentation, and some standard open source...
ericflo authored
102 """
103 Scans a block of text, replacing anything matched by a ``ProviderRule``
104 pattern with an OEmbed html snippet, if possible.
105
106 Templates should be stored at oembed/{format}.html, so for example:
107
108 oembed/video.html
109
110 These templates are passed a context variable, ``response``, which is a
111 dictionary representation of the response.
112 """
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
113 rules = list(ProviderRule.objects.all())
114 patterns = [re.compile(r.regex) for r in rules] # Compiled patterns from the rules
115 parts = [] # The parts that we will assemble into the final return value.
116 indices = [] # List of indices of parts that need to be replaced with OEmbed stuff.
117 indices_rules = [] # List of indices into the rules in order for which index was gotten by.
118 urls = set() # A set of URLs to try to lookup from the database.
119 stored = {} # A mapping of URLs to StoredOEmbed objects.
120 index = 0
e5bfaab @ericflo Added docstrings and inline documentation, and some standard open source...
ericflo authored
121 # First we pass through the text, populating our data structures.
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
122 for i, part in re_parts(patterns, text):
123 if i == -1:
e5bfaab @ericflo Added docstrings and inline documentation, and some standard open source...
ericflo authored
124 parts.append(part)
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
125 index += 1
126 else:
127 to_append = ""
128 # If the link ends with one of our overrides, build a list
129 while part[-1] in END_OVERRIDES:
130 to_append += part[-1]
131 part = part[:-1]
132 indices.append(index)
c5d71b2 @ericflo Fixed bug where we were actually over-escaping. Not sure what the right...
ericflo authored
133 urls.add(part)
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
134 indices_rules.append(i)
135 parts.append(part)
136 index += 1
137 if to_append:
138 parts.append(to_append)
139 index += 1
e5bfaab @ericflo Added docstrings and inline documentation, and some standard open source...
ericflo authored
140 # Now we fetch a list of all stored patterns, and put it in a dictionary
141 # mapping the URL to to the stored model instance.
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
142 for stored_embed in StoredOEmbed.objects.filter(match__in=urls, max_width=max_width, max_height = max_height):
143 stored[stored_embed.match] = stored_embed
e5bfaab @ericflo Added docstrings and inline documentation, and some standard open source...
ericflo authored
144 # Now we're going to do the actual replacement of URL to embed.
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
145 for i, id_to_replace in enumerate(indices):
146 rule = rules[indices_rules[i]]
147 part = parts[id_to_replace]
148 try:
e5bfaab @ericflo Added docstrings and inline documentation, and some standard open source...
ericflo authored
149 # Try to grab the stored model instance from our dictionary, and
150 # use the stored HTML fragment as a replacement.
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
151 parts[id_to_replace] = stored[part].html
152 except KeyError:
153 try:
e5bfaab @ericflo Added docstrings and inline documentation, and some standard open source...
ericflo authored
154 # Build the URL based on the properties defined in the OEmbed spec.
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
155 url = u"%s?url=%s&maxwidth=%s&maxheight=%s&format=%s" % (
156 rule.endpoint, part, max_width, max_height, FORMAT
157 )
e5bfaab @ericflo Added docstrings and inline documentation, and some standard open source...
ericflo authored
158 # Fetch the link and parse the JSON.
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
159 resp = simplejson.loads(fetch(url))
e5bfaab @ericflo Added docstrings and inline documentation, and some standard open source...
ericflo authored
160 # Depending on the embed type, grab the associated template and
161 # pass it the parsed JSON response as context.
e428feb @ericflo Added much more robust replacement mechanism using Django templates.
ericflo authored
162 replacement = render_to_string('oembed/%s.html' % resp['type'], {'response': resp})
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
163 if replacement:
164 stored_embed = StoredOEmbed.objects.create(
165 match = part,
166 max_width = max_width,
167 max_height = max_height,
168 html = replacement,
169 )
170 stored[stored_embed.match] = stored_embed
171 parts[id_to_replace] = replacement
172 else:
173 raise ValueError
174 except ValueError:
e5bfaab @ericflo Added docstrings and inline documentation, and some standard open source...
ericflo authored
175 parts[id_to_replace] = part
02f3781 @ericflo Committed initial version of django-oembed.
ericflo authored
176 except KeyError:
e5bfaab @ericflo Added docstrings and inline documentation, and some standard open source...
ericflo authored
177 parts[id_to_replace] = part
7a23eb7 @ericflo Fixed bug with 404 not failing silently
ericflo authored
178 except urllib2.HTTPError:
179 parts[id_to_replace] = part
e5bfaab @ericflo Added docstrings and inline documentation, and some standard open source...
ericflo authored
180 # Combine the list into one string and return it.
7a23eb7 @ericflo Fixed bug with 404 not failing silently
ericflo authored
181 return mark_safe(u''.join(parts))
Something went wrong with that request. Please try again.