Skip to content

Commit

Permalink
Merge pull request #3141 from mart-e/wp-caption-to-figure
Browse files Browse the repository at this point in the history
  • Loading branch information
justinmayer committed Oct 24, 2023
2 parents 0282c1d + 48166bd commit 620139c
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 1 deletion.
47 changes: 46 additions & 1 deletion pelican/tests/content/wordpressexport.xml
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,52 @@ proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]></con
<wp:meta_key>_edit_last</wp:meta_key>
<wp:meta_value><![CDATA[3]]></wp:meta_value>
</wp:postmeta>
</item>
</item>
<item>
<title>Caption on image</title>
<link>http://thisisa.test/?p=176</link>
<pubDate>Thu, 01 Jan 1970 00:00:00 +0000</pubDate>
<dc:creator>bob</dc:creator>
<guid isPermaLink="false">http://thisisa.test/?p=176</guid>
<description></description>
<content:encoded><![CDATA[Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
[caption attachment_id="42" align="aligncenter" width="300" caption="This is a pelican"]<img src="/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png"/>[/caption]
[caption attachment_id="43" align="aligncenter" width="300"]<img src="/theme/img/xpelican-3.png.pagespeed.ic.m-NAIdRCOM.png" width="300" height="216" class="size-medium wp-image-1055" /> This also a pelican[/caption]
[caption attachment_id="44" align="aligncenter" width="300"]<a href="https://getpelican.com/"><img src="/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png" alt=""/> Yet another pelican[/caption]
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>176</wp:post_id>
<wp:post_date>2012-02-16 15:52:55</wp:post_date>
<wp:post_date_gmt>0000-00-00 00:00:00</wp:post_date_gmt>
<wp:comment_status>open</wp:comment_status>
<wp:ping_status>open</wp:ping_status>
<wp:post_name>caption-on-image</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_parent>0</wp:post_parent>
<wp:menu_order>0</wp:menu_order>
<wp:post_type>post</wp:post_type>
<wp:post_password></wp:post_password>
<wp:is_sticky>0</wp:is_sticky>
<category domain="category" nicename="category-2"><![CDATA[Category 2]]></category>
<wp:postmeta>
<wp:meta_key>_edit_last</wp:meta_key>
<wp:meta_value><![CDATA[3]]></wp:meta_value>
</wp:postmeta>
</item>
<item>
<title>A custom post in category 4</title>
<link>http://thisisa.test/?p=175</link>
Expand Down
26 changes: 26 additions & 0 deletions pelican/tests/test_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,32 @@ def r(f):
escaped_quotes = re.search(r'\\[\'"“”‘’]', md)
self.assertFalse(escaped_quotes)

def test_convert_caption_to_figure(self):
def r(f):
with open(f, encoding='utf-8') as infile:
return infile.read()
silent_f2p = mute(True)(fields2pelican)
test_post = filter(
lambda p: p[0].startswith("Caption on image"),
self.posts)
with temporary_folder() as temp:
md = [r(f) for f in silent_f2p(test_post, 'markdown', temp)][0]

caption = re.search(r'\[caption', md)
self.assertFalse(caption)

for occurence in [
'/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png',
'/theme/img/xpelican-3.png.pagespeed.ic.m-NAIdRCOM.png',
'/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png',
'This is a pelican',
'This also a pelican',
'Yet another pelican',
]:
# pandoc 2.x converts into ![text](src)
# pandoc 3.x converts into <figure>src<figcaption>text</figcaption></figure>
self.assertIn(occurence, md)


class TestBuildHeader(unittest.TestCase):
def test_build_header(self):
Expand Down
7 changes: 7 additions & 0 deletions pelican/tools/pelican_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@ def _multi_replace(dic, string):
return re.sub(pattern, lambda m: dic[m.group()], string)
content = _multi_replace(pre_tags, content)

# convert [caption] tags into <figure>
content = re.sub(
r'\[caption(?:.*?)(?:caption=\"(.*?)\")?\]'
r'((?:\<a(?:.*?)\>)?(?:\<img.*?\>)(?:\<\/a\>)?)\s?(.*?)\[\/caption\]',
r'<figure>\n\2\n<figcaption>\1\3</figcaption>\n</figure>',
content)

return content


Expand Down

0 comments on commit 620139c

Please sign in to comment.