Skip to content

Commit

Permalink
Use inject-once approach for lexer
Browse files Browse the repository at this point in the history
Now the lexer is injected only once when creating the parser. Instead of

    $parser = new PHPParser_Parser;
    $parser->parse(new PHPParser_Lexer($code));
    $parser->parse(new PHPParser_Lexer($code2));

you write:

    $parser = new PHPParser_Parser(new PHPParser_Lexer);
    $parser->parse($code);
    $parser->parse($code2);
  • Loading branch information
nikic committed May 4, 2012
1 parent 271156f commit 3701e02
Show file tree
Hide file tree
Showing 12 changed files with 164 additions and 113 deletions.
17 changes: 13 additions & 4 deletions grammar/kmyacc.php.parser
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,15 @@ class #(-p)
protected $yyastk;
protected $yysp;
protected $lexer;

/**
* Creates a parser instance.
*
* @param PHPParser_Lexer $lexer A lexer
*/
public function __construct(PHPParser_Lexer $lexer) {
$this->lexer = $lexer;
}
#endif
#if -t
protected static $yyproduction = array(
Expand Down Expand Up @@ -145,12 +154,12 @@ class #(-p)
* Parses PHP code into a node tree and prints out debugging information.
#endif
*
* @param PHPParser_Lexer $lexer A lexer
* @param string $code The source code to parse
*
* @return array Array of statements
*/
public function parse(PHPParser_Lexer $lexer) {
$this->lexer = $lexer;
public function parse($code) {
$this->lexer->startLexing($code);

$this->yysp = 0; // Stack pos
$yysstk = array($yystate = 0); // State stack
Expand All @@ -168,7 +177,7 @@ class #(-p)
$yyn = self::$yydefault[$yystate];
} else {
if ($yychar < 0) {
if (($yychar = $lexer->lex($yylval, $yyline, $yyDC)) < 0)
if (($yychar = $this->lexer->getNextToken($yylval, $yyline, $yyDC)) < 0)
$yychar = 0;
$yychar = $yychar < self::YYMAXLEX ?
self::$yytranslate[$yychar] : self::YYBADCH;
Expand Down
78 changes: 44 additions & 34 deletions lib/PHPParser/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,29 @@ class PHPParser_Lexer
protected $pos;
protected $line;

protected static $tokenMap;
protected static $dropTokens = array(
T_WHITESPACE => 1, T_COMMENT => 1, T_OPEN_TAG => 1
);
protected $tokenMap;
protected $dropTokens;

/**
* Creates a Lexer.
*/
public function __construct() {
// map from internal tokens to PHPParser tokens
$this->tokenMap = $this->createTokenMap();

// map of tokens to drop while lexing (the map is only used for isset lookup,
// that's why the value is simply set to 1; the value is never actually used.)
$this->dropTokens = array_fill_keys(array(T_WHITESPACE, T_COMMENT, T_OPEN_TAG), 1);
}

/**
* Initializes the lexer for lexing the provided source code.
*
* @param string $code
* @param string $code The source code to lex
*
* @throws PHPParser_Error on lexing errors (unterminated comment or unexpected character)
*/
public function __construct($code) {
self::initTokenMap();

public function startLexing($code) {
$this->resetErrors();
$this->tokens = @token_get_all($code);
$this->handleErrors();
Expand Down Expand Up @@ -71,7 +79,7 @@ protected function handleErrors() {
*
* @return int Token id
*/
public function lex(&$value = null, &$line = null, &$docComment = null) {
public function getNextToken(&$value = null, &$line = null, &$docComment = null) {
$docComment = null;

while (isset($this->tokens[++$this->pos])) {
Expand All @@ -93,10 +101,10 @@ public function lex(&$value = null, &$line = null, &$docComment = null) {

if (T_DOC_COMMENT === $token[0]) {
$docComment = $token[1];
} elseif (!isset(self::$dropTokens[$token[0]])) {
} elseif (!isset($this->dropTokens[$token[0]])) {
$value = $token[1];
$line = $token[2];
return self::$tokenMap[$token[0]];
return $this->tokenMap[$token[0]];
}
}
}
Expand Down Expand Up @@ -138,35 +146,37 @@ public function handleHaltCompiler() {
}

/**
* Initializes the token map.
* Creates the token map.
*
* The token map maps the PHP internal token identifiers
* to the identifiers used by the Parser. Additionally it
* maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'.
*
* @return array The token map
*/
protected static function initTokenMap() {
if (!self::$tokenMap) {
self::$tokenMap = array();

// 256 is the minimum possible token number, as everything below
// it is an ASCII value
for ($i = 256; $i < 1000; ++$i) {
// T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM
if (T_DOUBLE_COLON === $i) {
self::$tokenMap[$i] = PHPParser_Parser::T_PAAMAYIM_NEKUDOTAYIM;
// T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO
} elseif(T_OPEN_TAG_WITH_ECHO === $i) {
self::$tokenMap[$i] = PHPParser_Parser::T_ECHO;
// T_CLOSE_TAG is equivalent to ';'
} elseif(T_CLOSE_TAG === $i) {
self::$tokenMap[$i] = ord(';');
// and the others can be mapped directly
} elseif ('UNKNOWN' !== ($name = token_name($i))
&& defined($name = 'PHPParser_Parser::' . $name)
) {
self::$tokenMap[$i] = constant($name);
}
protected function createTokenMap() {
$tokenMap = array();

// 256 is the minimum possible token number, as everything below
// it is an ASCII value
for ($i = 256; $i < 1000; ++$i) {
// T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM
if (T_DOUBLE_COLON === $i) {
$tokenMap[$i] = PHPParser_Parser::T_PAAMAYIM_NEKUDOTAYIM;
// T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO
} elseif(T_OPEN_TAG_WITH_ECHO === $i) {
$tokenMap[$i] = PHPParser_Parser::T_ECHO;
// T_CLOSE_TAG is equivalent to ';'
} elseif(T_CLOSE_TAG === $i) {
$tokenMap[$i] = ord(';');
// and the others can be mapped directly
} elseif ('UNKNOWN' !== ($name = token_name($i))
&& defined($name = 'PHPParser_Parser::' . $name)
) {
$tokenMap[$i] = constant($name);
}
}

return $tokenMap;
}
}
62 changes: 40 additions & 22 deletions lib/PHPParser/Lexer/Emulative.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,48 @@
*/
class PHPParser_Lexer_Emulative extends PHPParser_Lexer
{
protected static $keywords = array(
// PHP 5.4
'callable' => PHPParser_Parser::T_CALLABLE,
'insteadof' => PHPParser_Parser::T_INSTEADOF,
'trait' => PHPParser_Parser::T_TRAIT,
'__trait__' => PHPParser_Parser::T_TRAIT_C,
// PHP 5.3
'__dir__' => PHPParser_Parser::T_DIR,
'goto' => PHPParser_Parser::T_GOTO,
'namespace' => PHPParser_Parser::T_NAMESPACE,
'__namespace__' => PHPParser_Parser::T_NS_C,
);

protected $newKeywords;
protected $inObjectAccess;

public function __construct($code) {
public function __construct() {
parent::__construct();

$this->newKeywords = array();

if (version_compare(PHP_VERSION, '5.4.0RC1', '>=')) {
return;
}

// new PHP 5.4 keywords
$this->newKeywords += array(
'callable' => PHPParser_Parser::T_CALLABLE,
'insteadof' => PHPParser_Parser::T_INSTEADOF,
'trait' => PHPParser_Parser::T_TRAIT,
'__trait__' => PHPParser_Parser::T_TRAIT_C,
);

if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
return;
}

// new PHP 5.3 keywords
$this->newKeywords += array(
'__dir__' => PHPParser_Parser::T_DIR,
'goto' => PHPParser_Parser::T_GOTO,
'namespace' => PHPParser_Parser::T_NAMESPACE,
'__namespace__' => PHPParser_Parser::T_NS_C,
);
}

public function startLexing($code) {
$this->inObjectAccess = false;

// on PHP 5.4 don't do anything
if (version_compare(PHP_VERSION, '5.4.0RC1', '>=')) {
parent::__construct($code);
parent::startLexing($code);
} else {
$code = $this->preprocessCode($code);
parent::__construct($code);
parent::startLexing($code);
$this->postprocessTokens();
}
}
Expand Down Expand Up @@ -103,8 +121,8 @@ protected function postprocessTokens() {
);
} elseif ('NS' === $matches[1]) {
// a \ single char token is returned here and replaced by a
// PHPParser_Parser::T_NS_SEPARATOR token in ->lex(). This hacks around the
// limitations arising from T_NS_SEPARATOR not being defined on 5.3
// PHPParser_Parser::T_NS_SEPARATOR token in ->getNextToken(). This hacks around
// the limitations arising from T_NS_SEPARATOR not being defined on 5.3
$replace = array('\\');
} elseif ('NOWDOC' === $matches[1]) {
// decode the encoded nowdoc payload; pack('H*' is bin2hex( for 5.3
Expand Down Expand Up @@ -155,15 +173,15 @@ public function restoreContentCallback(array $matches) {
}
}

public function lex(&$value = null, &$line = null, &$docComment = null) {
$token = parent::lex($value, $line, $docComment);
public function getNextToken(&$value = null, &$line = null, &$docComment = null) {
$token = parent::getNextToken($value, $line, $docComment);

// replace new keywords by their respective tokens. This is not done
// if we currently are in an object access (e.g. in $obj->namespace
// "namespace" stays a T_STRING tokens and isn't converted to T_NAMESPACE)
if (PHPParser_Parser::T_STRING === $token && !$this->inObjectAccess) {
if (isset(self::$keywords[strtolower($value)])) {
return self::$keywords[strtolower($value)];
if (isset($this->newKeywords[strtolower($value)])) {
return $this->newKeywords[strtolower($value)];
}
// backslashes are replaced by T_NS_SEPARATOR tokens
} elseif (92 === $token) { // ord('\\')
Expand Down
17 changes: 13 additions & 4 deletions lib/PHPParser/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -894,14 +894,23 @@ class PHPParser_Parser
protected $lexer;

/**
* Parses PHP code into a node tree.
* Creates a parser instance.
*
* @param PHPParser_Lexer $lexer A lexer
*/
public function __construct(PHPParser_Lexer $lexer) {
$this->lexer = $lexer;
}

/**
* Parses PHP code into a node tree.
*
* @param string $code The source code to parse
*
* @return array Array of statements
*/
public function parse(PHPParser_Lexer $lexer) {
$this->lexer = $lexer;
public function parse($code) {
$this->lexer->startLexing($code);

$this->yysp = 0; // Stack pos
$yysstk = array($yystate = 0); // State stack
Expand All @@ -916,7 +925,7 @@ public function parse(PHPParser_Lexer $lexer) {
$yyn = self::$yydefault[$yystate];
} else {
if ($yychar < 0) {
if (($yychar = $lexer->lex($yylval, $yyline, $yyDC)) < 0)
if (($yychar = $this->lexer->getNextToken($yylval, $yyline, $yyDC)) < 0)
$yychar = 0;
$yychar = $yychar < self::YYMAXLEX ?
self::$yytranslate[$yychar] : self::YYBADCH;
Expand Down
14 changes: 1 addition & 13 deletions lib/PHPParser/Template.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,8 @@ public function __construct(PHPParser_Parser $parser, $template) {
* @return PHPParser_Node[] Statements
*/
public function getStmts(array $placeholders) {
/*
* TODO This is evil.
* The lexer shouldn't be created in here, instead it should be a dependency, which
* basically means that we'd need to have a LexerFactory (which seems strange).
* An alternative solution would be to make the lexer work similar to how the parser
* works. I.e. one would instantiate the Lexer only once and then pass the results
* of ->lex() to the parser (which would then be the full tokens array). This design
* seems cleaner, but comes at the expense of higher memory consumption, as the token
* array can be quite large.
*/
return $this->parser->parse(
new PHPParser_Lexer_Emulative(
$this->getTemplateWithPlaceholdersReplaced($placeholders)
)
$this->getTemplateWithPlaceholdersReplaced($placeholders)
);
}

Expand Down
33 changes: 20 additions & 13 deletions test/PHPParser/Tests/Lexer/EmulativeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,32 @@

class PHPParser_Tests_Lexer_EmulativeTest extends PHPUnit_Framework_TestCase
{
/** @var PHPParser_Lexer_Emulative */
protected $lexer;

protected function setUp() {
$this->lexer = new PHPParser_Lexer_Emulative;
}

/**
* @dataProvider provideTestReplaceKeywords
*/
public function testReplaceKeywords($keyword, $expectedToken) {
$lexer = new PHPParser_Lexer_Emulative('<?php ' . $keyword);
$this->lexer->startLexing('<?php ' . $keyword);

$this->assertEquals($expectedToken, $lexer->lex());
$this->assertEquals(0, $lexer->lex());
$this->assertEquals($expectedToken, $this->lexer->getNextToken());
$this->assertEquals(0, $this->lexer->getNextToken());
}

/**
* @dataProvider provideTestReplaceKeywords
*/
public function testNoReplaceKeywordsAfterObjectOperator($keyword) {
$lexer = new PHPParser_Lexer_Emulative('<?php ->' . $keyword);
$this->lexer->startLexing('<?php ->' . $keyword);

$this->assertEquals(PHPParser_Parser::T_OBJECT_OPERATOR, $lexer->lex());
$this->assertEquals(PHPParser_Parser::T_STRING, $lexer->lex());
$this->assertEquals(0, $lexer->lex());
$this->assertEquals(PHPParser_Parser::T_OBJECT_OPERATOR, $this->lexer->getNextToken());
$this->assertEquals(PHPParser_Parser::T_STRING, $this->lexer->getNextToken());
$this->assertEquals(0, $this->lexer->getNextToken());
}

public function provideTestReplaceKeywords() {
Expand All @@ -40,26 +47,26 @@ public function provideTestReplaceKeywords() {
* @dataProvider provideTestLexNewFeatures
*/
public function testLexNewFeatures($code, array $expectedTokens) {
$lexer = new PHPParser_Lexer_Emulative('<?php ' . $code);
$this->lexer->startLexing('<?php ' . $code);

foreach ($expectedTokens as $expectedToken) {
list($expectedTokenType, $expectedTokenText) = $expectedToken;
$this->assertEquals($expectedTokenType, $lexer->lex($text));
$this->assertEquals($expectedTokenType, $this->lexer->getNextToken($text));
$this->assertEquals($expectedTokenText, $text);
}
$this->assertEquals(0, $lexer->lex());
$this->assertEquals(0, $this->lexer->getNextToken());
}

/**
* @dataProvider provideTestLexNewFeatures
*/
public function testLeaveStuffAloneInStrings($code) {
$stringifiedToken = '"' . addcslashes($code, '"\\') . '"';
$lexer = new PHPParser_Lexer_Emulative('<?php ' . $stringifiedToken);
$this->lexer->startLexing('<?php ' . $stringifiedToken);

$this->assertEquals(PHPParser_Parser::T_CONSTANT_ENCAPSED_STRING, $lexer->lex($text));
$this->assertEquals(PHPParser_Parser::T_CONSTANT_ENCAPSED_STRING, $this->lexer->getNextToken($text));
$this->assertEquals($stringifiedToken, $text);
$this->assertEquals(0, $lexer->lex());
$this->assertEquals(0, $this->lexer->getNextToken());
}

public function provideTestLexNewFeatures() {
Expand Down
Loading

0 comments on commit 3701e02

Please sign in to comment.