Skip to content

Commit

Permalink
Merge pull request #43 from bwnek/fix/improve-character-encoding
Browse files Browse the repository at this point in the history
fix: improve char encoding
  • Loading branch information
rskrzypczak committed Mar 14, 2024
2 parents b561502 + 2bdeb9e commit 30932ed
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 10 deletions.
17 changes: 12 additions & 5 deletions lib/Document.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

namespace YetiForcePDF;

use Exception;
use YetiForcePDF\Html\Parser;
use YetiForcePDF\Layout\FooterBox;
use YetiForcePDF\Layout\HeaderBox;
use YetiForcePDF\Layout\WatermarkBox;
Expand Down Expand Up @@ -95,7 +97,7 @@ class Document
protected $objects = [];

/**
* @var \YetiForcePDF\Html\Parser
* @var Parser
*/
protected $htmlParser;

Expand Down Expand Up @@ -786,14 +788,19 @@ public function removeObject(PdfObject $object): self
* Load html string.
*
* @param string $html
* @param string $inputEncoding
* @param string $fromEncoding
*
* @return $this
* @throws Exception
*/
public function loadHtml(string $html, string $inputEncoding = 'UTF-8')
public function loadHtml(string $html, string $fromEncoding = 'UTF-8'): self
{
$this->htmlParser = (new \YetiForcePDF\Html\Parser())->setDocument($this)->init();
$this->htmlParser->loadHtml($html, $inputEncoding);
if ($fromEncoding === '') {
throw new Exception('Encoding can not be empty');
}

$this->htmlParser = (new Parser())->setDocument($this)->init();
$this->htmlParser->loadHtml($html, $fromEncoding);

return $this;
}
Expand Down
16 changes: 11 additions & 5 deletions lib/Html/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@

namespace YetiForcePDF\Html;

use YetiForcePDF\Base;
use YetiForcePDF\Layout\PageGroupBox;

/**
* Class Parser.
*/
class Parser extends \YetiForcePDF\Base
class Parser extends Base
{
/**
* @var \DOMDocument
Expand Down Expand Up @@ -61,13 +62,18 @@ protected function cleanUpHtml(string $html)
* @param string $html
* @param string $fromEncoding
*
* @return \YetiForcePDF\Html\Parser
* @return Parser
*/
public function loadHtml(string $html, string $fromEncoding = ''): self
public function loadHtml(string $html, string $fromEncoding): self
{
$html = htmlspecialchars_decode($html, ENT_HTML5);
$this->html = htmlspecialchars_decode($html, ENT_HTML5);
$this->html = $this->cleanUpHtml($html);
$this->html = html_entity_decode($this->html, ENT_COMPAT, $fromEncoding);

// 0x80 - start of unicode range
// 0x10FFFF - end of unicode range
// 0 - do not ommit any unicode char
// ~0 - negated 0 - convert negation of nothing (so convert all)
$this->html = mb_encode_numericentity($this->html, [0x80, 0x10FFFF, 0, ~0], $fromEncoding);

return $this;
}
Expand Down

0 comments on commit 30932ed

Please sign in to comment.