Skip to content
This repository has been archived by the owner on Apr 20, 2023. It is now read-only.

Introduce transformString() function to encapsulate the transformation process in multibyte environment. #161

Merged
merged 1 commit into from
Aug 23, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/Facebook/InstantArticles/Elements/GeoTag.php
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,10 @@ public function toDOMElement($document = null)

// Required script field
if ($this->script) {
$element->appendChild($document->createTextNode($this->script));
// script may contain html entities so import it as CDATA
$element->appendChild(
$element->ownerDocument->importNode(new \DOMCdataSection($this->script), true)
);
}

return $element;
Expand Down
30 changes: 30 additions & 0 deletions src/Facebook/InstantArticles/Transformer/Transformer.php
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,36 @@ public function getInstantArticle()
return $this->instantArticle;
}

/**
* @param InstantArticle $context
* @param string $content
*
* @return mixed
*/
public function transformString($context, $content, $encoding = "utf-8")
{
$libxml_previous_state = libxml_use_internal_errors(true);
$document = new \DOMDocument('1.0');
if (function_exists('mb_convert_encoding')) {
$document->loadHTML(mb_convert_encoding($content, 'HTML-ENTITIES', $encoding));
} else {
$log = \Logger::getLogger('facebook-instantarticles-transformer');
$log->debug(
'Your content encoding is "' . $encoding . '" ' .
'but your PHP environment does not have mbstring. Trying to load your content with using meta tags.'
);
// wrap the content with charset meta tags
$document->loadHTML(
'<html><head>' .
'<meta http-equiv="Content-Type" content="text/html; charset=' . $encoding . '">' .
'</head><body>' . $content . '</body></html>'
);
}
libxml_clear_errors();
libxml_use_internal_errors($libxml_previous_state);
return $this->transform($context, $document);
}

/**
* @param InstantArticle $context
* @param \DOMNode $node
Expand Down
93 changes: 93 additions & 0 deletions tests/Facebook/InstantArticles/Transformer/TransformerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,51 @@ protected function setUp()
);
}

public function testTransformString()
{
$json_file = file_get_contents('src/Facebook/InstantArticles/Parser/instant-articles-rules.json');

$instant_article = InstantArticle::create();
$transformer = new Transformer();
$transformer->loadRules($json_file);

$title_html_string = '<h1>Title String</h1>';
$header = Header::create();
$transformer->transformString($header, $title_html_string);

$this->assertEquals('<h1>Title String</h1>', $header->getTitle()->render());
}

public function testTransformStringWithMultibyteUTF8Content()
{
$json_file = file_get_contents('src/Facebook/InstantArticles/Parser/instant-articles-rules.json');

$instant_article = InstantArticle::create();
$transformer = new Transformer();
$transformer->loadRules($json_file);

$title_html_string = '<h1>Test:あÖÄÜöäü</h1>';
$header = Header::create();
$transformer->transformString($header, $title_html_string);

$this->assertEquals('<h1>Test:あÖÄÜöäü</h1>', $header->getTitle()->render());
}

public function testTransformStringWithMultibyteNonUTF8Content()
{
$json_file = file_get_contents('src/Facebook/InstantArticles/Parser/instant-articles-rules.json');

$instant_article = InstantArticle::create();
$transformer = new Transformer();
$transformer->loadRules($json_file);

$title_html_string = mb_convert_encoding('<h1>Test:あÖÄÜöäü</h1>', 'euc-jp', 'utf-8');
$header = Header::create();
$transformer->transformString($header, $title_html_string, 'euc-jp');

$this->assertEquals('<h1>Test:あÖÄÜöäü</h1>', $header->getTitle()->render());
}

public function testSelfTransformerContent()
{
$json_file = file_get_contents('src/Facebook/InstantArticles/Parser/instant-articles-rules.json');
Expand All @@ -64,6 +109,54 @@ public function testSelfTransformerContent()
$this->assertEquals($html_file, $result);
}

public function testSelfTransformerMultibyteContent()
{
$json_file = file_get_contents('src/Facebook/InstantArticles/Parser/instant-articles-rules.json');

$instant_article = InstantArticle::create();
$transformer = new Transformer();
$transformer->loadRules($json_file);

$html_file = file_get_contents(__DIR__ . '/instant-article-example-multibyte.html');

$transformer->transformString($instant_article, $html_file, 'utf-8');
$instant_article->withCanonicalURL('http://foo.com/article.html');
$instant_article->addMetaProperty('op:generator:version', '1.0.0');
$instant_article->addMetaProperty('op:generator:transformer:version', '1.0.0');
$result = $instant_article->render('', true)."\n";

// some fragments are written as html entities even after transformed so
// noralize all strings to html entities and compare them.
$this->assertEquals(
mb_convert_encoding($html_file, 'HTML-ENTITIES', 'utf-8'),
mb_convert_encoding($result, 'HTML-ENTITIES', 'utf-8')
);
}

public function testSelfTransformerNonUTF8Content()
{
$json_file = file_get_contents('src/Facebook/InstantArticles/Parser/instant-articles-rules.json');

$instant_article = InstantArticle::create();
$transformer = new Transformer();
$transformer->loadRules($json_file);

$html_file = file_get_contents(__DIR__ . '/instant-article-example-nonutf8.html');

$transformer->transformString($instant_article, $html_file, 'euc-jp');
$instant_article->withCanonicalURL('http://foo.com/article.html');
$instant_article->addMetaProperty('op:generator:version', '1.0.0');
$instant_article->addMetaProperty('op:generator:transformer:version', '1.0.0');
$result = $instant_article->render('', true)."\n";

// some fragments are written as html entities even after transformed so
// noralize all strings to html entities and compare them.
$this->assertEquals(
mb_convert_encoding($html_file, 'HTML-ENTITIES', 'euc-jp'),
mb_convert_encoding($result, 'HTML-ENTITIES', 'utf-8')
);
}

public function testTransformerAddAndGetRules()
{
$transformer = new Transformer();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
<html>
<head>
<link rel="canonical" href="http://foo.com/article.html"/>
<meta charset="utf-8"/>
<meta property="op:generator" content="facebook-instant-articles-sdk-php"/>
<meta property="op:generator:version" content="1.0.0"/>
<meta property="op:generator:transformer" content="facebook-instant-articles-sdk-php"/>
<meta property="op:generator:transformer:version" content="1.0.0"/>
<meta property="op:markup_version" content="v1.0"/>
</head>
<body>
<article>
<header>
<figure>
<img src="https://jpeg.org/images/jpegls-home.jpg"/>
<figcaption><h1>イメージ名</h1>テキストノード<cite>イメージキャプション</cite></figcaption>
</figure>
<h1>ビッグトップ <b>タイトル</b></h1>
<h2>スモール <b>サブタイトル</b></h2>
<time class="op-published" datetime="1984-08-14T19:30:00+00:00">August 14th, 7:30pm</time>
<time class="op-modified" datetime="2016-02-10T10:00:00+00:00">February 10th, 10:00am</time>
<address><a href="#" title="Title of author">著者名</a>
著者に関する詳しい情報
さらなる詳細
</address>
<address><a href="http://facebook.com/author" rel="facebook">FB上の著者</a>
facebook内の著者情報
</address>
<address><a title="PHP Programmer">開発者</a>
</address>
<h3 class="op-kicker">記事のキッカー</h3>
<ul class="op-sponsors">
<li>
<a href="http://facebook.com/my-sponsor" rel="facebook"></a>
</li>
</ul>
</header>
<p>パラグラフ内のテキストのテストです。</p>
<figure data-feedback="fb:likes">
<img src="http://mydomain.com/path/to/img.jpg"/>
<audio title="&#x30AA;&#x30FC;&#x30C7;&#x30A3;&#x30AA;&#x30BF;&#x30A4;&#x30C8;&#x30EB;" autoplay="autoplay" muted="muted">
<source src="http://foo.com/mp3"/>
</audio>
</figure>
<figure data-feedback="fb:comments">
<img src="http://mydomain.com/path/to/img.jpg"/>
<script type="application/json" class="op-geotag">
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [23.166667, 89.216667]
},
"properties": {
"title": "バングラデシュ ジョソール県",
"radius": 750000,
"pivot": true,
"style": "satellite",
}
}
</script>
<audio title="&#x30AA;&#x30FC;&#x30C7;&#x30A3;&#x30AA;&#x30BF;&#x30A4;&#x30C8;&#x30EB;" autoplay="autoplay" muted="muted">
<source src="http://foo.com/mp3"/>
</audio>
</figure>
<figure data-feedback="fb:likes,fb:comments">
<img src="https://jpeg.org/images/jpegls-home.jpg"/>
<figcaption><h1>イメージ名</h1>テキストノード<cite>イメージキャプション</cite></figcaption>
</figure>
<p>第2段落内のテキストのテストです。</p>
<figure class="op-slideshow">
<figure>
<img src="https://jpeg.org/images/jpegls-home.jpg"/>
</figure>
<figure>
<img src="https://jpeg.org/images/jpegls-home2.jpg"/>
</figure>
<figure>
<img src="https://jpeg.org/images/jpegls-home3.jpg"/>
</figure>
<figcaption><h1>イメージ名</h1>テキストノード<cite>イメージキャプション</cite></figcaption>
<audio title="&#x30AA;&#x30FC;&#x30C7;&#x30A3;&#x30AA;&#x30BF;&#x30A4;&#x30C8;&#x30EB;" autoplay="autoplay" muted="muted">
<source src="http://foo.com/mp3"/>
</audio>
</figure>
<ol>
<li>最初のリスト項目</li>
<li>パラグラフ</li>
<li>spanタグ</li>
<li>div内のテキスト?</li>
<li>li上のその他の <a href="#">段落</a></li>
<li>最後のリスト項目</li>
</ol>
<p>段落内のテキストのテストです。</p>
<figure class="op-interactive">
<iframe src="http://example.com/custom-interactive" class="column-width" height="60">
<h1>カスタムコード</h1>
<script>alert("テスト");</script></iframe>
<figcaption>このグラフィックは素晴らしい。</figcaption>
</figure>
<figure class="op-ad">
<iframe src="http://foo.com"></iframe>
</figure>
<blockquote>blockquoteは記事の中で<b>magic</b>を作ります。</blockquote>
<figure class="op-map">
<script type="application/json" class="op-geotag">
{
"type": "Feature",
"geometry":
{
"type": "Point",
"coordinates": [23.166667, 89.216667]
},
"properties":
{
"title": "バングラデシュ ジョソール県",
"radius": 750000,
"pivot": true,
"style": "satellite",
}
}
</script>
<figcaption class="op-vertical-above"><h1 class="op-vertical-above op-center">キャプション用タイトル</h1><h2 class="op-vertical-below op-right">キャプション用サブタイトル</h2>


<cite class="op-vertical-center op-left">キャプション内のクレジット</cite></figcaption>
<audio title="audio title" autoplay="autoplay" muted="muted">
<source src="http://foo.com/mp3"/>
</audio>
</figure>
<aside>
私たちはどこで成長させるか、何を成長させるか、どうやって成長させるか、について、もっと効率的になれるはずです。
<cite>フルーツストカンパニー</cite></aside>
<p>第2段落内のテキストのテストです。</p>
<figure class="op-tracker">
<iframe>
<h1>カスタムコード</h1>
<script>alert("テスト");</script></iframe>
</figure>
<figure class="op-tracker">
<iframe>
<h1>トラッカー用スクリプト</h1>
<div><script>alert("テスト");</script></div>
</iframe>
</figure>
<figure class="op-interactive">
<iframe class="no-margin">
<h1>ソーシャル埋め込み用カスタムコード</h1>
<script>alert("テスト");</script></iframe>
</figure>
<figure data-mode="fullscreen" data-feedback="fb:likes,fb:comments">
<video data-fb-disable-autoplay="data-fb-disable-autoplay" controls="controls">
<source src="http://mydomain.com/path/to/video.mp4" type="video/mp4"/>
</video>
<figcaption class="op-vertical-below"><h1>ビデオタイトル</h1>

<cite>属性ソース</cite></figcaption>
<script type="application/json" class="op-geotag">
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [ [23.166667, 89.216667], [23.166667, 89.216667] ]
},
"properties": {
"title": "バングラデシュ ジョソール県",
"radius": 750000,
"pivot": true,
"style": "satellite",
}
}
</script>
</figure>
<ul class="op-related-articles" title="The related ones in the middle">
<li>
<a href="http://example.com/article.html"></a>
</li>
<li data-sponsored="true">
<a href="http://example.com/sponsored-article.html"></a>
</li>
<li>
<a href="http://example.com/another-article.html"></a>
</li>
</ul>
<footer>
<aside>
<p><a href="http://facebook.com/author" rel="facebook">著者</a>へのクレジット情報</p>
<p>クレジットとしてのパラグラフ</p>
</aside>
<ul class="op-related-articles" title="The related ones in the footer">
<li>
<a href="http://example.com/article.html"></a>
</li>
<li data-sponsored="true">
<a href="http://example.com/sponsored-article.html"></a>
</li>
<li>
<a href="http://example.com/another-article.html"></a>
</li>
</ul>
</footer>
</article>
</body>
</html>
Loading