Permalink
Browse files

Fixed #13 by parsing through github-flavored markdown

  • Loading branch information...
1 parent ca96f6c commit 86ca1ab5ec70e6c0ff25844abd69c3ad0e161f11 @b1naryth1ef b1naryth1ef committed Jul 19, 2012
Showing with 57 additions and 1 deletion.
  1. +2 −1 gistio.py
  2. +55 −0 github_md.py
View
3 gistio.py
@@ -4,6 +4,7 @@
from redis import StrictRedis
from markdown2 import markdown
+from github_md import gfm
import requests
import bleach
@@ -78,7 +79,7 @@ def fetch_and_render(id):
decoded = r.json.copy()
for f in decoded['files'].values():
if f['language'] in RENDERABLE:
- f['rendered'] = bleach.clean(markdown(f['content']),
+ f['rendered'] = bleach.clean(markdown(gfm(f['content'])),
tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES)
encoded = json.dumps(decoded)
cache.setex(id, CACHE_EXPIRATION, encoded)
View
55 github_md.py
@@ -0,0 +1,55 @@
+"""
+Taken from https://gist.github.com/710689
+Credit to mvasilkov
+Modifed by @b1naryth1ef to include fenced code blocks.
+"""
+
+import re
+from hashlib import md5
+
+FENCED = re.compile(r'(?P<fence>^`{3,})[ ]*(?P<lang>[a-zA-Z0-9_-]*)[ ]*\n(?P<code>.*?)(?P=fence)[ ]*$', re.MULTILINE|re.DOTALL)
+CODE_WRAP = '<pre><code%s>%s</code></pre>'
+LANG_TAG = ' class="%s"'
+
+def gfm(text):
+ # Extract pre blocks.
+ def test(text):
+ m = FENCED.search(text)
+ if m:
+ lang = LANG_TAG % m.group('lang')
+ text = CODE_WRAP % (lang, m.group('code'))
+ return text
+ text = test(text)
+
+ extractions = {}
+ def pre_extraction_callback(matchobj):
+ digest = md5(matchobj.group(0).encode('utf-8')).hexdigest()
+ extractions[digest] = matchobj.group(0)
+ return "{gfm-extraction-%s}" % digest
+ pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
+ text = re.sub(pattern, pre_extraction_callback, text)
+
+ # Prevent foo_bar_baz from ending up with an italic word in the middle.
+ def italic_callback(matchobj):
+ s = matchobj.group(0)
+ if list(s).count('_') >= 2:
+ return s.replace('_', '\_')
+ return s
+ pattern = re.compile(r'^(?! {4}|\t).*\w+(?<!_)_\w+_\w[\w_]*', re.MULTILINE | re.UNICODE)
+ text = re.sub(pattern, italic_callback, text)
+
+ # In very clear cases, let newlines become <br /> tags.
+ def newline_callback(matchobj):
+ if len(matchobj.group(1)) == 1:
+ return matchobj.group(0).rstrip() + ' \n'
+ else:
+ return matchobj.group(0)
+ pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE | re.UNICODE)
+ text = re.sub(pattern, newline_callback, text)
+
+ # Insert pre block extractions.
+ def pre_insert_callback(matchobj):
+ return '\n\n' + extractions[matchobj.group(1)]
+ text = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}', pre_insert_callback, text)
+
+ return text

0 comments on commit 86ca1ab

Please sign in to comment.