Permalink
Browse files

wordpress cleaning improvements for hyde

  • Loading branch information...
davefowler committed Mar 30, 2013
1 parent 7dd5c92 commit 95584798feb2a2ef09e86817584754c9059d6616
Showing with 26 additions and 17 deletions.
  1. +12 −5 config.yaml
  2. +14 −12 exitwp.py
@@ -28,15 +28,22 @@ taxonomies:
# Filter taxonomies entries.
entry_filter: {category: Uncategorized}
# Rename taxonomies when writing jekyll output format.
name_mapping: {category: categories, post_tag: tags}
name_mapping: {category: tags, }

# Replace certain patterns in body
# Simply replace the key with its value
body_replace: {
# '<pre.*?lang="(.*?)".*?>': '\n{% codeblock \1 lang:\1 %}\n',
# '<pre.*?>': '\n{% codeblock %}\n',
# '</pre>': '\n{% endcodeblock %}\n',

# '<pre.*?lang="(.*?)".*?>': '\n{% codeblock \1 lang:\1 %}\n',
# '<pre.*?>': '\n{% codeblock %}\n',
# '</pre>': '\n{% endcodeblock %}\n',
' ': ' ',
' ': ' ',
'': '-',
'': '-',
'{{': '&#38;&#35;123;&#38;&#35;123;',
'}}': '&#38;&#35;125;&#38;&#35;125;',
'\[\/sourcecode\]': '</pre>', #'{% endcodeblock %}',
'\[sourcecode language=(.*?)\]': '<pre>', # '{% codeblock lang:\1 %}',
# '[python]': '{% codeblock lang:python %}',
# '[/python]': '{% endcodeblock %}',
}
@@ -16,7 +16,7 @@
'''
exitwp - Wordpress xml exports to Jekykll blog format conversion
Tested with Wordpress 3.3.1 and jekyll 0.11.2
Tested with Wordpress 3.3.1 and hyde 0.11.2
'''
######################################################
@@ -98,7 +98,7 @@ def gi(q, unicode_wrap=True):
tag = q
try:
result = i.find(ns[namespace] + tag).text
print result
#print result
except AttributeError:
result = "No Content Found"
if unicode_wrap:
@@ -107,7 +107,6 @@ def gi(q, unicode_wrap=True):

body = gi('content:encoded')
for key in body_replace:
# body = body.replace(key, body_replace[key])
body = re.sub(key, body_replace[key], body)

img_srcs = []
@@ -145,13 +144,13 @@ def gi(q, unicode_wrap=True):
}


def write_jekyll(data, target_format):
def write_hyde(data, target_format):

sys.stdout.write("writing")
item_uids = {}
attachments = {}

def get_blog_path(data, path_infix='jekyll'):
def get_blog_path(data, path_infix='hyde'):
name = data['header']['link']
name = re.sub('^https?', '', name)
name = re.sub('[^A-Za-z0-9_.-]', '', name)
@@ -166,6 +165,9 @@ def get_full_dir(dir):
return full_dir

def open_file(file):
directory = os.path.dirname(file)
if not os.path.exists(directory):
os.makedirs(directory)
f = codecs.open(file, 'w', encoding='utf-8')
return f

@@ -180,8 +182,7 @@ def get_item_uid(item, date_prefix=False, namespace=''):
uid = []
if (date_prefix):
dt = datetime.strptime(item['date'], date_fmt)
uid.append(dt.strftime('%Y-%m-%d'))
uid.append('-')
uid.append(dt.strftime('%Y/%m/%d/'))
s_title = item['slug']
if s_title is None or s_title == '':
s_title = item['title']
@@ -190,6 +191,7 @@ def get_item_uid(item, date_prefix=False, namespace=''):
s_title = s_title.replace(' ', '_')
s_title = re.sub('[^a-zA-Z0-9_-]', '', s_title)
uid.append(s_title)
uid.append('/index')
fn = ''.join(uid)
n = 1
while fn in item_uids[namespace]:
@@ -257,7 +259,7 @@ def get_attachment_path(src, dir, dir_prefix='a'):
out = None
yaml_header = {
'title': i['title'],
'date': datetime.strptime(i['date'], '%Y-%m-%d %H:%M:%S'),
'date': i['date'][0:10],
'slug': i['slug'],
'wordpress_id': int(i['wp_id']),
'comments': i['comments'],
@@ -267,9 +269,9 @@ def get_attachment_path(src, dir, dir_prefix='a'):

if i['type'] == 'post':
i['uid'] = get_item_uid(i, date_prefix=True)
fn = get_item_path(i, dir='_posts')
fn = get_item_path(i, dir='content')
out = open_file(fn)
yaml_header['layout'] = 'post'
#yaml_header['layout'] = 'post'
elif i['type'] == 'page':
i['uid'] = get_item_uid(i)
# Chase down parent path, if any
@@ -283,7 +285,7 @@ def get_attachment_path(src, dir, dir_prefix='a'):
break
fn = get_item_path(i, parentpath)
out = open_file(fn)
yaml_header['layout'] = 'page'
#yaml_header['layout'] = 'page'
elif i['type'] in item_type_filter:
pass
else:
@@ -330,6 +332,6 @@ def toyaml(data):
wp_exports = glob(wp_exports + '/*.xml')
for wpe in wp_exports:
data = parse_wp_xml(wpe)
write_jekyll(data, target_format)
write_hyde(data, target_format)

print 'done'

0 comments on commit 9558479

Please sign in to comment.