Use inject-once approach for lexer

Now the lexer is injected only once when creating the parser. Instead of $parser = new PHPParser_Parser; $parser->parse(new PHPParser_Lexer($code)); $parser->parse(new PHPParser_Lexer($code2)); you write: $parser = new PHPParser_Parser(new PHPParser_Lexer); $parser->parse($code); $parser->parse($code2);
haileys · May 4, 2012 · 3701e02 · 3701e02
1 parent 271156f
commit 3701e02
Show file tree

Hide file tree

Showing 12 changed files with 164 additions and 113 deletions.
diff --git a/grammar/kmyacc.php.parser b/grammar/kmyacc.php.parser
@@ -95,6 +95,15 @@ class #(-p)
     protected $yyastk;
     protected $yysp;
     protected $lexer;
+
+    /**
+     * Creates a parser instance.
+     *
+     * @param PHPParser_Lexer $lexer A lexer
+     */
+    public function __construct(PHPParser_Lexer $lexer) {
+        $this->lexer = $lexer;
+    }
 #endif
 #if -t
     protected static $yyproduction = array(
@@ -145,12 +154,12 @@ class #(-p)
      * Parses PHP code into a node tree and prints out debugging information.
 #endif
      *
-     * @param PHPParser_Lexer $lexer A lexer
+     * @param string $code The source code to parse
      *
      * @return array Array of statements
      */
-    public function parse(PHPParser_Lexer $lexer) {
-        $this->lexer  = $lexer;
+    public function parse($code) {
+        $this->lexer->startLexing($code);
 
         $this->yysp   = 0;                   // Stack pos
         $yysstk       = array($yystate = 0); // State stack
@@ -168,7 +177,7 @@ class #(-p)
                 $yyn = self::$yydefault[$yystate];
             } else {
                 if ($yychar < 0) {
-                    if (($yychar = $lexer->lex($yylval, $yyline, $yyDC)) < 0)
+                    if (($yychar = $this->lexer->getNextToken($yylval, $yyline, $yyDC)) < 0)
                         $yychar = 0;
                     $yychar = $yychar < self::YYMAXLEX ?
                         self::$yytranslate[$yychar] : self::YYBADCH;

diff --git a/lib/PHPParser/Lexer.php b/lib/PHPParser/Lexer.php
@@ -7,21 +7,29 @@ class PHPParser_Lexer
     protected $pos;
     protected $line;
 
-    protected static $tokenMap;
-    protected static $dropTokens = array(
-        T_WHITESPACE => 1, T_COMMENT => 1, T_OPEN_TAG => 1
-    );
+    protected $tokenMap;
+    protected $dropTokens;
 
     /**
      * Creates a Lexer.
+     */
+    public function __construct() {
+        // map from internal tokens to PHPParser tokens
+        $this->tokenMap = $this->createTokenMap();
+
+        // map of tokens to drop while lexing (the map is only used for isset lookup,
+        // that's why the value is simply set to 1; the value is never actually used.)
+        $this->dropTokens = array_fill_keys(array(T_WHITESPACE, T_COMMENT, T_OPEN_TAG), 1);
+    }
+
+    /**
+     * Initializes the lexer for lexing the provided source code.
      *
-     * @param string $code
+     * @param string $code The source code to lex
      *
      * @throws PHPParser_Error on lexing errors (unterminated comment or unexpected character)
      */
-    public function __construct($code) {
-        self::initTokenMap();
-
+    public function startLexing($code) {
         $this->resetErrors();
         $this->tokens = @token_get_all($code);
         $this->handleErrors();
@@ -71,7 +79,7 @@ protected function handleErrors() {
      *
      * @return int Token id
      */
-    public function lex(&$value = null, &$line = null, &$docComment = null) {
+    public function getNextToken(&$value = null, &$line = null, &$docComment = null) {
         $docComment = null;
 
         while (isset($this->tokens[++$this->pos])) {
@@ -93,10 +101,10 @@ public function lex(&$value = null, &$line = null, &$docComment = null) {
 
                 if (T_DOC_COMMENT === $token[0]) {
                     $docComment = $token[1];
-                } elseif (!isset(self::$dropTokens[$token[0]])) {
+                } elseif (!isset($this->dropTokens[$token[0]])) {
                     $value = $token[1];
                     $line  = $token[2];
-                    return self::$tokenMap[$token[0]];
+                    return $this->tokenMap[$token[0]];
                 }
             }
         }
@@ -138,35 +146,37 @@ public function handleHaltCompiler() {
     }
 
     /**
-     * Initializes the token map.
+     * Creates the token map.
      *
      * The token map maps the PHP internal token identifiers
      * to the identifiers used by the Parser. Additionally it
      * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'.
+     *
+     * @return array The token map
      */
-    protected static function initTokenMap() {
-        if (!self::$tokenMap) {
-            self::$tokenMap = array();
-
-            // 256 is the minimum possible token number, as everything below
-            // it is an ASCII value
-            for ($i = 256; $i < 1000; ++$i) {
-                // T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM
-                if (T_DOUBLE_COLON === $i) {
-                    self::$tokenMap[$i] = PHPParser_Parser::T_PAAMAYIM_NEKUDOTAYIM;
-                // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO
-                } elseif(T_OPEN_TAG_WITH_ECHO === $i) {
-                    self::$tokenMap[$i] = PHPParser_Parser::T_ECHO;
-                // T_CLOSE_TAG is equivalent to ';'
-                } elseif(T_CLOSE_TAG === $i) {
-                    self::$tokenMap[$i] = ord(';');
-                // and the others can be mapped directly
-                } elseif ('UNKNOWN' !== ($name = token_name($i))
-                          && defined($name = 'PHPParser_Parser::' . $name)
-                ) {
-                    self::$tokenMap[$i] = constant($name);
-                }
+    protected function createTokenMap() {
+        $tokenMap = array();
+
+        // 256 is the minimum possible token number, as everything below
+        // it is an ASCII value
+        for ($i = 256; $i < 1000; ++$i) {
+            // T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM
+            if (T_DOUBLE_COLON === $i) {
+                $tokenMap[$i] = PHPParser_Parser::T_PAAMAYIM_NEKUDOTAYIM;
+            // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO
+            } elseif(T_OPEN_TAG_WITH_ECHO === $i) {
+                $tokenMap[$i] = PHPParser_Parser::T_ECHO;
+            // T_CLOSE_TAG is equivalent to ';'
+            } elseif(T_CLOSE_TAG === $i) {
+                $tokenMap[$i] = ord(';');
+            // and the others can be mapped directly
+            } elseif ('UNKNOWN' !== ($name = token_name($i))
+                      && defined($name = 'PHPParser_Parser::' . $name)
+            ) {
+                $tokenMap[$i] = constant($name);
             }
         }
+
+        return $tokenMap;
     }
 }
diff --git a/lib/PHPParser/Lexer/Emulative.php b/lib/PHPParser/Lexer/Emulative.php
@@ -5,30 +5,48 @@
  */
 class PHPParser_Lexer_Emulative extends PHPParser_Lexer
 {
-    protected static $keywords = array(
-        // PHP 5.4
-        'callable'      => PHPParser_Parser::T_CALLABLE,
-        'insteadof'     => PHPParser_Parser::T_INSTEADOF,
-        'trait'         => PHPParser_Parser::T_TRAIT,
-        '__trait__'     => PHPParser_Parser::T_TRAIT_C,
-        // PHP 5.3
-        '__dir__'       => PHPParser_Parser::T_DIR,
-        'goto'          => PHPParser_Parser::T_GOTO,
-        'namespace'     => PHPParser_Parser::T_NAMESPACE,
-        '__namespace__' => PHPParser_Parser::T_NS_C,
-    );
-
+    protected $newKeywords;
     protected $inObjectAccess;
 
-    public function __construct($code) {
+    public function __construct() {
+        parent::__construct();
+
+        $this->newKeywords = array();
+
+        if (version_compare(PHP_VERSION, '5.4.0RC1', '>=')) {
+            return;
+        }
+
+        // new PHP 5.4 keywords
+        $this->newKeywords += array(
+            'callable'  => PHPParser_Parser::T_CALLABLE,
+            'insteadof' => PHPParser_Parser::T_INSTEADOF,
+            'trait'     => PHPParser_Parser::T_TRAIT,
+            '__trait__' => PHPParser_Parser::T_TRAIT_C,
+        );
+
+        if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
+            return;
+        }
+
+        // new PHP 5.3 keywords
+        $this->newKeywords += array(
+            '__dir__'       => PHPParser_Parser::T_DIR,
+            'goto'          => PHPParser_Parser::T_GOTO,
+            'namespace'     => PHPParser_Parser::T_NAMESPACE,
+            '__namespace__' => PHPParser_Parser::T_NS_C,
+        );
+    }
+
+    public function startLexing($code) {
         $this->inObjectAccess = false;
 
         // on PHP 5.4 don't do anything
         if (version_compare(PHP_VERSION, '5.4.0RC1', '>=')) {
-            parent::__construct($code);
+            parent::startLexing($code);
         } else {
             $code = $this->preprocessCode($code);
-            parent::__construct($code);
+            parent::startLexing($code);
             $this->postprocessTokens();
         }
     }
@@ -103,8 +121,8 @@ protected function postprocessTokens() {
                     );
                 } elseif ('NS' === $matches[1]) {
                     // a \ single char token is returned here and replaced by a
-                    // PHPParser_Parser::T_NS_SEPARATOR token in ->lex(). This hacks around the
-                    // limitations arising from T_NS_SEPARATOR not being defined on 5.3
+                    // PHPParser_Parser::T_NS_SEPARATOR token in ->getNextToken(). This hacks around
+                    // the limitations arising from T_NS_SEPARATOR not being defined on 5.3
                     $replace = array('\\');
                 } elseif ('NOWDOC' === $matches[1]) {
                     // decode the encoded nowdoc payload; pack('H*' is bin2hex( for 5.3
@@ -155,15 +173,15 @@ public function restoreContentCallback(array $matches) {
         }
     }
 
-    public function lex(&$value = null, &$line = null, &$docComment = null) {
-        $token = parent::lex($value, $line, $docComment);
+    public function getNextToken(&$value = null, &$line = null, &$docComment = null) {
+        $token = parent::getNextToken($value, $line, $docComment);
 
         // replace new keywords by their respective tokens. This is not done
         // if we currently are in an object access (e.g. in $obj->namespace
         // "namespace" stays a T_STRING tokens and isn't converted to T_NAMESPACE)
         if (PHPParser_Parser::T_STRING === $token && !$this->inObjectAccess) {
-            if (isset(self::$keywords[strtolower($value)])) {
-                return self::$keywords[strtolower($value)];
+            if (isset($this->newKeywords[strtolower($value)])) {
+                return $this->newKeywords[strtolower($value)];
             }
         // backslashes are replaced by T_NS_SEPARATOR tokens
         } elseif (92 === $token) { // ord('\\')

diff --git a/lib/PHPParser/Parser.php b/lib/PHPParser/Parser.php
@@ -894,14 +894,23 @@ class PHPParser_Parser
     protected $lexer;
 
     /**
-     * Parses PHP code into a node tree.
+     * Creates a parser instance.
      *
      * @param PHPParser_Lexer $lexer A lexer
+     */
+    public function __construct(PHPParser_Lexer $lexer) {
+        $this->lexer = $lexer;
+    }
+
+    /**
+     * Parses PHP code into a node tree.
+     *
+     * @param string $code The source code to parse
      *
      * @return array Array of statements
      */
-    public function parse(PHPParser_Lexer $lexer) {
-        $this->lexer  = $lexer;
+    public function parse($code) {
+        $this->lexer->startLexing($code);
 
         $this->yysp   = 0;                   // Stack pos
         $yysstk       = array($yystate = 0); // State stack
@@ -916,7 +925,7 @@ public function parse(PHPParser_Lexer $lexer) {
                 $yyn = self::$yydefault[$yystate];
             } else {
                 if ($yychar < 0) {
-                    if (($yychar = $lexer->lex($yylval, $yyline, $yyDC)) < 0)
+                    if (($yychar = $this->lexer->getNextToken($yylval, $yyline, $yyDC)) < 0)
                         $yychar = 0;
                     $yychar = $yychar < self::YYMAXLEX ?
                         self::$yytranslate[$yychar] : self::YYBADCH;

diff --git a/lib/PHPParser/Template.php b/lib/PHPParser/Template.php
@@ -25,20 +25,8 @@ public function __construct(PHPParser_Parser $parser, $template) {
      * @return PHPParser_Node[] Statements
      */
     public function getStmts(array $placeholders) {
-        /*
-         * TODO This is evil.
-         * The lexer shouldn't be created in here, instead it should be a dependency, which
-         * basically means that we'd need to have a LexerFactory (which seems strange).
-         * An alternative solution would be to make the lexer work similar to how the parser
-         * works. I.e. one would instantiate the Lexer only once and then pass the results
-         * of ->lex() to the parser (which would then be the full tokens array). This design
-         * seems cleaner, but comes at the expense of higher memory consumption, as the token
-         * array can be quite large.
-         */
         return $this->parser->parse(
-            new PHPParser_Lexer_Emulative(
-                $this->getTemplateWithPlaceholdersReplaced($placeholders)
-            )
+            $this->getTemplateWithPlaceholdersReplaced($placeholders)
         );
     }
 

diff --git a/test/PHPParser/Tests/Lexer/EmulativeTest.php b/test/PHPParser/Tests/Lexer/EmulativeTest.php
@@ -2,25 +2,32 @@
 
 class PHPParser_Tests_Lexer_EmulativeTest extends PHPUnit_Framework_TestCase
 {
+    /** @var PHPParser_Lexer_Emulative */
+    protected $lexer;
+
+    protected function setUp() {
+        $this->lexer = new PHPParser_Lexer_Emulative;
+    }
+
     /**
      * @dataProvider provideTestReplaceKeywords
      */
     public function testReplaceKeywords($keyword, $expectedToken) {
-        $lexer = new PHPParser_Lexer_Emulative('<?php ' . $keyword);
+        $this->lexer->startLexing('<?php ' . $keyword);
 
-        $this->assertEquals($expectedToken, $lexer->lex());
-        $this->assertEquals(0, $lexer->lex());
+        $this->assertEquals($expectedToken, $this->lexer->getNextToken());
+        $this->assertEquals(0, $this->lexer->getNextToken());
     }
 
     /**
      * @dataProvider provideTestReplaceKeywords
      */
     public function testNoReplaceKeywordsAfterObjectOperator($keyword) {
-        $lexer = new PHPParser_Lexer_Emulative('<?php ->' . $keyword);
+        $this->lexer->startLexing('<?php ->' . $keyword);
 
-        $this->assertEquals(PHPParser_Parser::T_OBJECT_OPERATOR, $lexer->lex());
-        $this->assertEquals(PHPParser_Parser::T_STRING, $lexer->lex());
-        $this->assertEquals(0, $lexer->lex());
+        $this->assertEquals(PHPParser_Parser::T_OBJECT_OPERATOR, $this->lexer->getNextToken());
+        $this->assertEquals(PHPParser_Parser::T_STRING, $this->lexer->getNextToken());
+        $this->assertEquals(0, $this->lexer->getNextToken());
     }
 
     public function provideTestReplaceKeywords() {
@@ -40,26 +47,26 @@ public function provideTestReplaceKeywords() {
      * @dataProvider provideTestLexNewFeatures
      */
     public function testLexNewFeatures($code, array $expectedTokens) {
-        $lexer = new PHPParser_Lexer_Emulative('<?php ' . $code);
+        $this->lexer->startLexing('<?php ' . $code);
 
         foreach ($expectedTokens as $expectedToken) {
             list($expectedTokenType, $expectedTokenText) = $expectedToken;
-            $this->assertEquals($expectedTokenType, $lexer->lex($text));
+            $this->assertEquals($expectedTokenType, $this->lexer->getNextToken($text));
             $this->assertEquals($expectedTokenText, $text);
         }
-        $this->assertEquals(0, $lexer->lex());
+        $this->assertEquals(0, $this->lexer->getNextToken());
     }
 
     /**
      * @dataProvider provideTestLexNewFeatures
      */
     public function testLeaveStuffAloneInStrings($code) {
         $stringifiedToken = '"' . addcslashes($code, '"\\') . '"';
-        $lexer = new PHPParser_Lexer_Emulative('<?php ' . $stringifiedToken);
+        $this->lexer->startLexing('<?php ' . $stringifiedToken);
 
-        $this->assertEquals(PHPParser_Parser::T_CONSTANT_ENCAPSED_STRING, $lexer->lex($text));
+        $this->assertEquals(PHPParser_Parser::T_CONSTANT_ENCAPSED_STRING, $this->lexer->getNextToken($text));
         $this->assertEquals($stringifiedToken, $text);
-        $this->assertEquals(0, $lexer->lex());
+        $this->assertEquals(0, $this->lexer->getNextToken());
     }
 
     public function provideTestLexNewFeatures() {