Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

ADDED: Better paragraph parsing

  • Loading branch information...
commit dfd224d2db8387096ff6b163a621490b34d479da 1 parent ce442f2
@tractorcow tractorcow authored
View
24 code/WpParser.php
@@ -81,7 +81,25 @@ public function ParseTags($cats) {
public function ParseBlogContent($content) {
// Convert wordpress-style image links to silverstripe asset filepaths
- $content = preg_replace('/(http:\/\/[\w\.\/]+)?\/wp-content\/uploads\//', '/assets/Uploads/', $content);
+ $content = preg_replace('/(http:\/\/[\w\.\/]+)?\/wp-content\/uploads\//i', '/assets/Uploads/', $content);
+
+ // Split multi-line blocks into paragraphs
+ $split = preg_split('/\s*\n\s*\n\s*/im', $content);
+ $content = '';
+ foreach ($split as $paragraph)
+ {
+ $paragraph = trim($paragraph);
+ if (empty($paragraph))
+ continue;
+
+ if(preg_match('/^<p>.*/i', $paragraph))
+ $content .= $paragraph;
+ else
+ $content .= "<p>$paragraph</p>";
+ }
+
+ // Split single-line blocks with line-breaks
+ $content = nl2br($content);
return $content;
}
@@ -128,9 +146,9 @@ protected function parsePost($item, $namespaces) {
$wp_ns = $item->children($namespaces['wp']);
$content_ns = $item->children($namespaces['content']);
$dc_ns = $item->children($namespaces['dc']);
-
+
// Filter out non-post types (attachments, pages, etc)
- if(!in_array($wp_ns->post_type, self::$allowed_page_types))
+ if (!in_array($wp_ns->post_type, self::$allowed_page_types))
return null;
return array(
View
7 tests/WordpressExport.xml
@@ -55,8 +55,11 @@
<guid isPermaLink="false">http://localhost/?p=79</guid>
<description></description>
<content:encoded><![CDATA[
- <p>Here is a test paragraph</p>
- <p><img src="http://localhost/wp-content/uploads/2012/08/test-image-300x219.jpg" alt="Test Image" /></p>
+ Here is a test paragraph
+
+ <img src="http://localhost/wp-content/uploads/2012/08/test-image-300x219.jpg" alt="Test Image" />
+
+ Another paragraph
]]>
</content:encoded>
<excerpt:encoded><![CDATA[]]>
View
68 tests/WpParserTests.php
@@ -3,42 +3,50 @@
/**
* @package comments
*/
-class WpParserTests extends FunctionalTest {
-
+class WpParserTests extends FunctionalTest
+{
static $testImportFile = 'WordpressExport.xml';
-
- protected function buildTestParser()
- {
+
+ protected function buildTestParser() {
$path = dirname(__FILE__) . '/' . self::$testImportFile;
return new WpParser($path);
}
-
+
public function testRewriteImageURLs() {
$parser = $this->buildTestParser();
-
+
// Test parsing urls with hostname
- $imageIn = 'Here is an image <img src="http://localhost/wp-content/uploads/2012/11/image.jpg" /> that I uploaded';
- $imageOutExpected = 'Here is an image <img src="/assets/Uploads/2012/11/image.jpg" /> that I uploaded';
+ $imageIn = '<p>Here is an image <img src="http://localhost/wp-content/uploads/2012/11/image.jpg" /> that I uploaded</p>';
+ $imageOutExpected = '<p>Here is an image <img src="/assets/Uploads/2012/11/image.jpg" /> that I uploaded</p>';
$imageOut = $parser->ParseBlogContent($imageIn);
$this->assertEquals($imageOutExpected, $imageOut);
-
+
// Test parsing urls without hostname
- $imageIn = 'Here is an image <img src="/wp-content/uploads/2012/11/image.jpg" /> that I uploaded';
- $imageOutExpected = 'Here is an image <img src="/assets/Uploads/2012/11/image.jpg" /> that I uploaded';
+ $imageIn = '<p>Here is an image <img src="/wp-content/uploads/2012/11/image.jpg" /> that I uploaded</p>';
+ $imageOutExpected = '<p>Here is an image <img src="/assets/Uploads/2012/11/image.jpg" /> that I uploaded</p>';
$imageOut = $parser->ParseBlogContent($imageIn);
$this->assertEquals($imageOutExpected, $imageOut);
}
-
- public function testCanParsePosts()
- {
+
+ public function testBuildParagraphs() {
$parser = $this->buildTestParser();
- $posts = $parser->parse();
+ $expected = "<p>Here is a test paragraph</p><p><img src=\"/assets/Uploads/2012/08/test-image-300x219.jpg\" alt=\"Test Image\" /></p><p>Another paragraph</p>";
+ $input = "Here is a test paragraph\t\n\r\n\t\t\t\t<img src=\"/assets/Uploads/2012/08/test-image-300x219.jpg\" alt=\"Test Image\" />\n\n\t\t\t\t\n\n\t\t\t\tAnother paragraph";
+ $output = $parser->ParseBlogContent($input);
+
+ $this->assertEquals($expected, $output, "Failed parsing paragraphs. Expected \"" . addcslashes($expected, "\r\n\t") . "\", returned \"" . addcslashes($output, "\r\n\t") . "\"");
+ }
+
+ public function testCanParsePosts() {
+ $parser = $this->buildTestParser();
+ $posts = $parser->parse();
+
// Have we got a post? The page (non post) should not be parsed
$this->assertEquals(1, count($posts), 'Assert single post parsed');
- if(empty($posts))
+ if (empty($posts))
return;
-
+
// Did it parse correctly?
$firstPost = $posts[0];
$expectedPost = array(
@@ -46,36 +54,35 @@ public function testCanParsePosts()
'Link' => 'http://localhost/2011/08/test-post/',
'Author' => 'Test User',
'Tags' => 'bar, buzz',
- 'Content' => "<p>Here is a test paragraph</p>\n\t\t\t\t<p><img src=\"/assets/Uploads/2012/08/test-image-300x219.jpg\" alt=\"Test Image\" /></p>",
+ 'Content' => "<p>Here is a test paragraph</p><p><img src=\"/assets/Uploads/2012/08/test-image-300x219.jpg\" alt=\"Test Image\" /></p><p>Another paragraph</p>",
'URLSegment' => 'test-post',
'Date' => '2011-08-18 10:52:37',
'WordpressID' => 79,
'ProvideComments' => true,
'IsPublished' => true
);
- foreach($expectedPost as $key => $value)
+ foreach ($expectedPost as $key => $value)
{
$actual = trim($firstPost[$key]);
$expected = trim($value);
- $this->assertEquals($expected, $actual, "Parsing field $key expected \"".addcslashes($expected, "\r\n\t")."\", returned \"".addcslashes($actual, "\r\n\t")."\"");
+ $this->assertEquals($expected, $actual, "Parsing field $key expected \"" . addcslashes($expected, "\r\n\t") . "\", returned \"" . addcslashes($actual, "\r\n\t") . "\"");
}
}
-
- public function testCanParseComments()
- {
+
+ public function testCanParseComments() {
// Attempt setup
$parser = $this->buildTestParser();
$posts = $parser->parse();
- if(empty($posts))
+ if (empty($posts))
$this->fail('Could not setup test testCanParseComments');
$firstPost = $posts[0];
-
+
// Have we got a comment?
$comments = $firstPost['Comments'];
$this->assertEquals(1, count($comments), 'Assert single comment parsed');
- if(empty($comments))
+ if (empty($comments))
return;
-
+
// Did it parse correctly?
$firstComment = $comments[0];
$expectedComment = array(
@@ -87,11 +94,12 @@ public function testCanParseComments()
'Moderated' => true,
'WordpressID' => 9
);
- foreach($expectedComment as $key => $value)
+ foreach ($expectedComment as $key => $value)
{
$actual = trim($firstComment[$key]);
$expected = trim($value);
- $this->assertEquals($expected, $actual, "Parsing field $key expected \"".addcslashes($expected, "\r\n\t")."\", returned \"".addcslashes($actual, "\r\n\t")."\"");
+ $this->assertEquals($expected, $actual, "Parsing field $key expected \"" . addcslashes($expected, "\r\n\t") . "\", returned \"" . addcslashes($actual, "\r\n\t") . "\"");
}
}
+
}
Please sign in to comment.
Something went wrong with that request. Please try again.