Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
154 lines (111 sloc) 4.45 KB
# encoding: utf-8
""" is a pre-processor for Markdown resumes, targeting the pandoc
document processor.
Pandoc extended Markdown supports embedded HTML (like all compliant Markdown
parser) and a subset of LaTeX, but when outputting LaTeX any unrecognized
LaTeX commands will simply be passed through.
This means you can keep your resume in pure markdown and define pre-processing
functions that do different things with different parts of the input depending
on the target output format.
Currently, the main feature is extraction of contact details. They are
expected to begin on the fourth line, following the header and a blank line,
and extend until the next blank line. Lines with bullets (•) will be split
into separate lines.
Michael White
72 Bower St. #1 • Medford, MA, 02155
You can then define a function for an output format like this:
def tex(lines, contact_lines, *args):
Returns the pre-processed Markdown output suitable for tex processing,
as a string.
lines -- a list of lines, without the contact lines
contact_lines -- the extracted contact lines
args -- any extra command-line arguments
And finally run it like this:
python tex <
import sys
import re
class Processor(object):
handlers = {}
def register(self, fn):
self.handlers[fn.__name__] = fn
return fn
def process(self, format, lines, contact_lines, *args):
handler = self.handlers[format]
except KeyError:
raise Exception("Unknown format: %s" % format)
return handler(lines, contact_lines, *args)
processor = Processor()
def tex(lines, contact_lines, *args):
def sub(pattern, repl, string, **kwargs):
"""Replacement for re.sub that doesn't replace pattern it's inside the
first latex command argument brackets. Kind of a hack."""
flags = kwargs.pop('flags', 0) | re.X | re.M
num_groups = re.compile(pattern, flags).groups
pattern = r"""
(^|}{) # beginning of line or second argument
([^{}\n\r]*) # disallow { and }
""" % pattern
repl = re.sub(r"\\(\d)", lambda m: r"\%d" % (int( + 2), repl)
return re.sub(pattern, r"\1\2%s\%d" % (repl, num_groups + 3), string,
flags=flags, **kwargs)
# pandoc doesn't seem to support markdown inside latex blocks, so we're
# just going to hardcode the two most common link formats for now so people
# can put links in their contact info
def replace_links(line):
line = re.sub(r"<([^:]+@[^:]+?)>", r"\href{mailto:\1}{\1}", line)
line = re.sub(r"<(http.+?)>", r"\url{\1}", line)
return re.sub(r"\[([^\]]+)\]\(([^\)]+)\)", r"\href{\2}{\1}", line)
contact_lines = "\n\n".join(map(replace_links, contact_lines))
# replacements to apply to the text in contact_lines, because it won't be
# processed by pandoc
replace = {
'~': r"\\textasciitilde{}"
escape = ['#']
for search in replace:
contact_lines = sub(search, replace[search], contact_lines)
for c in escape:
contact_lines = sub(r'([^\\])\%s' % c, r'\1\%s' % c, contact_lines)
lines.insert(0, "\\begin{nospace}\\begin{flushright}\n" +
contact_lines +
return "".join(lines)
def html(lines, contact_lines, *args):
untex = ['LaTeX']
for word in untex:
# yuck
replace = lambda l: l.replace(r"\%s" % word, word)
lines = map(replace, lines)
contact_lines = map(replace, contact_lines)
lines.insert(0, "<div id='container'><div id='contact'>%s</div>\n" %
("<p>" + "</p><p>".join(contact_lines) + "</p>"))
lines.insert(1, "<div>")
return "".join(lines)
def main():
format = sys.argv[1]
except IndexError:
raise Exception("No format specified")
lines = sys.stdin.readlines()
contact_lines = []
for line in lines[3:]:
parts = [x.strip() for x in line.split("")]
if parts == ['']:
print processor.process(format, lines, contact_lines, *sys.argv[1:])
if __name__ == '__main__':
Something went wrong with that request. Please try again.