Skip to content
This repository has been archived by the owner on May 21, 2024. It is now read-only.

Commit

Permalink
Parsing Turtle made 400 times faster at the expense of slightly hihge…
Browse files Browse the repository at this point in the history
…r memory usage
  • Loading branch information
zozlak committed Jul 19, 2018
1 parent 53004d9 commit 545cb24
Showing 1 changed file with 14 additions and 14 deletions.
28 changes: 14 additions & 14 deletions lib/Parser/Turtle.php
Expand Up @@ -59,6 +59,7 @@
class Turtle extends Ntriples
{
protected $data;
protected $index;
protected $namespaces;
protected $subject;
protected $predicate;
Expand Down Expand Up @@ -95,7 +96,8 @@ public function parse($graph, $data, $format, $baseUri)
);
}

$this->data = $data;
$this->data = preg_split('//u', $data, null, PREG_SPLIT_NO_EMPTY);
$this->index = 0;
$this->namespaces = array();
$this->subject = null;
$this->predicate = null;
Expand Down Expand Up @@ -1168,22 +1170,16 @@ protected function processComment()
*/
protected function read()
{
if (!empty($this->data)) {
$c = mb_substr($this->data, 0, 1, "UTF-8");
if ($this->index < count($this->data)) {
$c = $this->data[$this->index];
// Keep tracks of which line we are on (0A = Line Feed)
if ($c == "\x0A") {
$this->line += 1;
$this->column = 1;
} else {
$this->column += 1;
}

if (version_compare(PHP_VERSION, '5.4.8', '<')) {
// versions of PHP prior to 5.4.8 treat "NULL" length parameter as 0
$this->data = mb_substr($this->data, 1, mb_strlen($this->data), "UTF-8");
} else {
$this->data = mb_substr($this->data, 1, null, "UTF-8");
}
$this->index += 1;
return $c;
} else {
return -1;
Expand All @@ -1197,8 +1193,8 @@ protected function read()
*/
protected function peek()
{
if (!empty($this->data)) {
return mb_substr($this->data, 0, 1, "UTF-8");
if ($this->index < count($this->data)) {
return $this->data[$this->index];
} else {
return -1;
}
Expand All @@ -1212,8 +1208,12 @@ protected function peek()
protected function unread($c)
{
# FIXME: deal with unreading new lines
$this->column -= mb_strlen($c, "UTF-8");
$this->data = $c . $this->data;
$len = mb_strlen($c, "UTF-8");
$this->column -= $len;
$this->index -= $len;
foreach (preg_split('//u', $c, null, PREG_SPLIT_NO_EMPTY) as $h => $i) {
$this->data[$this->index + $h] = $i;
}
}

/** @ignore */
Expand Down

0 comments on commit 545cb24

Please sign in to comment.