Skip to content
This repository has been archived by the owner on Sep 20, 2021. It is now read-only.

Commit

Permalink
Extend Unicode support.
Browse files Browse the repository at this point in the history
  • Loading branch information
Hywan committed Jun 26, 2013
1 parent 71b32cb commit 8276904
Showing 1 changed file with 10 additions and 10 deletions.
20 changes: 10 additions & 10 deletions Llk/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public function lexMe ( $text, Array $tokens ) {
throw new \Hoa\Compiler\Exception\UnrecognizedToken(
'Unrecognized token "%s" at line 1 and column %d:' .
"\n" . '%s' . "\n" . str_repeat(' ', $offset) . '↑',
0, array($this->_text[0], $offset + 1, $text),
0, array(mb_substr($this->_text, 0, 1), $offset + 1, $text),
1, $offset
);

Expand All @@ -125,7 +125,7 @@ public function lexMe ( $text, Array $tokens ) {
}

$offset += $nextToken['length'];
$this->_text = substr($this->_text, $nextToken['length']);
$this->_text = mb_substr($this->_text, $nextToken['length']);
}

$tokenized[] = array(
Expand All @@ -150,7 +150,7 @@ protected function nextToken ( ) {

$tokenArray = &$this->_tokens[$this->_lexerState];

foreach($tokenArray as $fullLexeme => $regexp) {
foreach($tokenArray as $fullLexeme => $regex) {

if(false !== strpos($fullLexeme, ':'))
list($lexeme, $nextState) = explode(':', $fullLexeme, 2);
Expand All @@ -160,7 +160,7 @@ protected function nextToken ( ) {
$nextState = $this->_lexerState;
}

$out = $this->matchLexeme($lexeme, $regexp);
$out = $this->matchLexeme($lexeme, $regex);

if(null !== $out) {

Expand All @@ -180,25 +180,25 @@ protected function nextToken ( ) {
*
* @access protected
* @param string $lexeme Name of the lexeme.
* @param string $regexp Regular expression describing the lexeme.
* @param string $regex Regular expression describing the lexeme.
* @return array
* @throw \Hoa\Compiler\Exception\Lexer
*/
protected function matchLexeme ( $lexeme, $regexp ) {
protected function matchLexeme ( $lexeme, $regex ) {

$_regexp = str_replace('#', '\#', $regexp);
$_regex = str_replace('#', '\#', $regex);

if(0 !== preg_match('#^(?:' . $_regexp . ')#u', $this->_text, $matches)) {
if(0 !== preg_match('#^(?:' . $_regex . ')#u', $this->_text, $matches)) {

if('' === $matches[0])
throw new \Hoa\Compiler\Exception\Lexer(
'A lexeme must not match an empty value, which is the ' .
'case of "%s" (%s).', 1, array($lexeme, $regexp));
'case of "%s" (%s).', 1, array($lexeme, $regex));

return array(
'token' => $lexeme,
'value' => $matches[0],
'length' => strlen($matches[0])
'length' => mb_strlen($matches[0])
);
}

Expand Down

0 comments on commit 8276904

Please sign in to comment.