Skip to content
This repository has been archived by the owner on Apr 1, 2024. It is now read-only.

Commit

Permalink
More parser gliches
Browse files Browse the repository at this point in the history
Summary:
Interesting programs and their outputs:

<?php
//?????>hello
---
hello

<?php
$foo->if();
---
<works>

<?php
$foo-> if()
---
<syntax error>

Anyway, PHP's parser is a huge mess.

Reviewed By: no one

Test Plan: Disabled "maybe_xhp" check, then `find . | grep php$ | xargs -n1 php -l`. No errors!

Revert Plan: ok
  • Loading branch information
Marcel Laverdet committed Jun 26, 2009
1 parent 04879fc commit d3b262a
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 15 deletions.
4 changes: 1 addition & 3 deletions ext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,7 @@ static zend_op_array* xhp_compile_file(zend_file_handle* f, int type TSRMLS_DC)
maybe_xhp = 1;
break;
}
} else if (*ii == 'e' && ii[1] == 'l' && ii[2] == 'e' && ii[3] == 'm' && ii[4] == 'e' && ii[5] == 'n' && ii[6] == 't') {
// } else if (memcmp(ii, "element", 7)) {
// why is this faster than memcmp? i'm bad at computers and i don't know.
} else if (!memcmp(ii, "element", 7)) {
maybe_xhp = 1;
break;
}
Expand Down
37 changes: 32 additions & 5 deletions xhp_parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
%parse-param { code_rope* root }
%lex-param { void* yyscanner }
%error-verbose
%expect 2 // i hate php's if syntax
// Keywords
%token BOGUS
Expand Down Expand Up @@ -154,7 +155,7 @@ statement:
$$ = "{" + $2 + cr("}");
}
| function_declaration
| if_statement // TODO
| if_statement
| while_statement
| for_statement
| foreach_statement
Expand Down Expand Up @@ -187,11 +188,32 @@ if_statement:
t_IF t_LPAREN expression t_RPAREN statement {
$$ = "if (" + $3 + ") " + $5;
}
| statement t_ELSE statement {
| if_statement t_ELSE statement {
$$ = $1 + " else " + $3;
}
| statement t_ELSEIF t_LPAREN expression t_RPAREN statement {
$$ = $1 + "elseif (" + $4 + ") " + $6;
| if_statement t_ELSEIF t_LPAREN expression t_RPAREN statement {
$$ = $1 + " elseif (" + $4 + ") " + $6;
}
| t_IF t_LPAREN expression t_RPAREN t_COLON statement_list elseif_list else_single t_ENDIF semicolon {
$$ = "if (" + $3 + "): " + $6 + $7 + $8 + " endif" + $10;
}
;
elseif_list:
/* empty */ {
$$ = "";
}
| elseif_list t_ELSEIF t_LPAREN expression t_RPAREN t_COLON statement_list {
$$ = $1 + " elseif(" + $4 + "):" + $7;
}
;
else_single:
/* empty */ {
$$ = "";
}
| t_ELSE statement_list {
$$ = " else: " + $2;
}
;
Expand Down Expand Up @@ -430,6 +452,10 @@ expression:
| t_ARRAY identifier {
$$ = "array " + $2;
}
| t_ARRAY t_BIT_AND identifier {
// handles function foo(array &bar){}
$$ = "array &" + $3;
}
| t_ARRAY t_LPAREN array_pair_list t_RPAREN {
$$ = "array(" + $3 + ")";
}
Expand Down Expand Up @@ -566,7 +592,8 @@ expression:
$$ = $1 + "::" + $3;
}
| expression t_ARROW expression {
$$ = $1 + "->" + $3;
// Need -> in the rope because "$foo-> if()" is a syntax error, but if you remove the space it's not.
$$ = $1 + cr("->") + $3;
}
| t_INCR expression {
$$ = "++" + $2;
Expand Down
53 changes: 46 additions & 7 deletions xhp_scanner.l
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,39 @@ B b?
}
<PHP,PHP_NO_RESERVED_WORDS,XHP_ATTR>{
[ \t\x0b\x0c\xa0]+ /* eat it up */
("//"|"#").*{NEWLINE} |
{NEWLINE} {
++yylloc->internal_line;
}
"//"|"#" {
char c;
for (;;) {
while ((c = yyinput(yyscanner)) != '?' && c != '\r' && c != '\n' && c != EOF);
honk:
if (c == '\r') {
++yylloc->internal_line;
c == yyinput(yyscanner);
if (c != '\n') {
unput(c);
break;
}
} else if (c == '\n') {
++yylloc->internal_line;
break;
}
if (c == '?') {
c = yyinput(yyscanner);
if (c == '>') {
unput('>');
unput('?');
break;
}
goto honk;
}
if (c == EOF) {
return tok(0);
}
}
}
"/*" {
char c;
for (;;) {
Expand Down Expand Up @@ -133,7 +162,13 @@ B b?
(?i:default) return tok(t_DEFAULT);
(?i:break) return tok(t_BREAK);
(?i:continue) return tok(t_CONTINUE);
(?i:function) return tok(t_FUNCTION);
(?i:function) {
int tt = tok(t_FUNCTION);
// needed so you can say function element(){}
// this only works on objects, though.
flex_push_state(PHP_NO_RESERVED_WORDS, yyg);
return tt;
}
(?i:const) return tok(t_CONST);
(?i:instanceof) return tok(t_INSTANCEOF);
(?i:return) return tok(t_RETURN);
Expand Down Expand Up @@ -195,7 +230,11 @@ B b?
return tt;
}
"=>" return tok(t_DOUBLE_ARROW);
"::" return tok(t_HEBREW_THING);
"::" {
int tt = tok(t_HEBREW_THING);
flex_push_state(PHP_NO_RESERVED_WORDS, yyg);
return tt;
}
"." return tok(t_CONCAT);
"," return tok(t_COMMA);
";" return tok(t_SEMICOLON);
Expand Down Expand Up @@ -262,11 +301,11 @@ B b?
*yylval = yytext;
return tok(t_NUMBER);
}
[0-9]*\.?[0-9]+[eE]-?[0-9]{1,3} {
[0-9]*\.?[0-9]+[eE][\-+]?[0-9]{1,3} {
*yylval = yytext;
return tok(t_NUMBER);
}
[0-9]+ |
[0-9]+\.? |
[0-9]*\.[0-9]+ {
*yylval = yytext;
return tok(t_NUMBER);
Expand All @@ -281,7 +320,7 @@ B b?
eom += 3;
while (*eom == '\t' || *eom == ' ') ++eom;
size_t len = yyleng - (eom - yytext) - 1;
if (eom[len - 2] == '\r') { // windows [\r\n] encoding
if (eom[len - 1] == '\r') { // windows [\r\n] encoding
--len;
}
yyextra->heredoc_eom = eom;
Expand Down Expand Up @@ -509,6 +548,6 @@ static int flex_tok(int t, void* y) {
if (YY_START == PHP_NO_RESERVED_WORDS) {
flex_pop_state(yyg);
}
// printf("\ntok:%s\n", yytokname(t));
// fprintf(stderr, "\ntok:%s\n", yytokname(t));
return t;
}

0 comments on commit d3b262a

Please sign in to comment.