Skip to content
Browse files

implement heredoc

  • Loading branch information...
1 parent bc46c37 commit 1af4110f7d0802d4055f7dbe36fcb55386602b02 @FUKUZAWA-Tadashi FUKUZAWA-Tadashi committed Mar 3, 2013
Showing with 347 additions and 12 deletions.
  1. +20 −2 include/mruby/compile.h
  2. +5 −0 src/codegen.c
  3. +1 −0 src/node.h
  4. +246 −9 src/parse.y
  5. +68 −1 test/t/literals.rb
  6. +7 −0 tools/mirb/mirb.c
View
22 include/mruby/compile.h
@@ -59,6 +59,21 @@ struct mrb_parser_message {
char* message;
};
+/* heredoc parse type */
+enum heredoc_type {
+ heredoc_type_norm, /* <<EOH */
+ heredoc_type_quote, /* <<'EOH' */
+};
+/* heredoc structure */
+struct mrb_parser_heredoc_info {
+ enum heredoc_type type;
+ int allow_indent:1;
+ int line_head:1;
+ const char *term;
+ int term_len;
+ mrb_ast_node *doc;
+};
+
/* parser structure */
struct mrb_parser_state {
mrb_state *mrb;
@@ -71,7 +86,7 @@ struct mrb_parser_state {
int column;
enum mrb_lex_state_enum lstate;
- int sterm;
+ int sterm; /* string terminator : ' ' means heredoc */
int regexp;
unsigned int cond_stack;
@@ -85,7 +100,10 @@ struct mrb_parser_state {
char buf[1024];
int bidx;
- mrb_ast_node *heredoc;
+ mrb_ast_node *heredocs; /* list of mrb_parser_heredoc_info* */
+ mrb_ast_node *parsing_heredoc;
+ int heredoc_starts_nextline:1;
+ int heredoc_end_now:1; /* for mirb */
void *ylval;
View
5 src/codegen.c
@@ -1902,6 +1902,11 @@ codegen(codegen_scope *s, node *tree, int val)
}
break;
+ case NODE_HEREDOC:
+ /*if(tree == NULL){printf("heredoc error 1\n");exit(11);}*/
+ tree = ((struct mrb_parser_heredoc_info *)tree)->doc;
+ /*if(tree == NULL){printf("heredoc error 2\n");exit(12);}*/
+ /* fall through */
case NODE_DSTR:
if (val) {
node *n = tree;
View
1 src/node.h
@@ -102,5 +102,6 @@ enum node_type {
NODE_IFUNC,
NODE_DSYM,
NODE_ATTRASGN,
+ NODE_HEREDOC,
NODE_LAST
};
View
255 src/parse.y
@@ -31,6 +31,7 @@
typedef mrb_ast_node node;
typedef struct mrb_parser_state parser_state;
+typedef struct mrb_parser_heredoc_info parser_heredoc_info;
static int yylex(void *lval, parser_state *p);
static void yyerror(parser_state *p, const char *s);
@@ -736,6 +737,14 @@ new_nth_ref(parser_state *p, int n)
return cons((node*)NODE_NTH_REF, (node*)(intptr_t)n);
}
+// (:heredoc . a)
+static node*
+new_heredoc(parser_state *p)
+{
+ parser_heredoc_info *inf = parser_palloc(p, sizeof(parser_heredoc_info));
+ return cons((node*)NODE_HEREDOC, (node*)inf);
+}
+
static void
new_bv(parser_state *p, mrb_sym id)
{
@@ -844,10 +853,80 @@ var_reference(parser_state *p, node *lhs)
return lhs;
}
+
+static node*
+heredoc_start_sb(parser_state *p, const char* term, size_t term_len, enum heredoc_type type, int allow_indent)
+{
+ node *newnode = new_heredoc(p);
+ parser_heredoc_info *inf = (parser_heredoc_info*)newnode->cdr;
+ inf->term = term;
+ inf->term_len = term_len;
+ inf->type = type;
+ inf->allow_indent = allow_indent;
+ inf->line_head = TRUE;
+ inf->doc = NULL;
+ p->heredocs = push(p->heredocs, newnode);
+ if (p->parsing_heredoc == NULL) {
+ node *c = p->heredocs;
+ while (c->cdr)
+ c = c->cdr;
+ p->parsing_heredoc = c;
+ }
+ p->heredoc_starts_nextline = TRUE;
+ p->lstate = EXPR_END;
+ return newnode;
+}
+
+static node*
+heredoc_start(parser_state *p, node *beg, node *str, enum heredoc_type type)
+{
+ char *bs = (char*)beg->cdr->car;
+ int allow_indent = (bs[2] == '-');
+ const char *s = (char*)str->cdr->car;
+ size_t len = (intptr_t)str->cdr->cdr;
+ return heredoc_start_sb(p, s, len, type, allow_indent);
+}
+
+static node*
+heredoc_start_sym(parser_state *p, node *beg, mrb_sym sym, enum heredoc_type type)
+{
+ char *bs = (char*)beg->cdr->car;
+ int allow_indent = (bs[2] == '-');
+ int len;
+ const char *s = mrb_sym2name_len(p->mrb, sym, &len);
+ return heredoc_start_sb(p, s, len, type, allow_indent);
+}
+
+parser_heredoc_info *
+parsing_heredoc_inf(parser_state *p)
+{
+ node *nd = p->parsing_heredoc;
+ if (nd == NULL)
+ return NULL;
+ /* assert(nd->car->car == NODE_HEREDOC); */
+ return (parser_heredoc_info*)nd->car->cdr;
+}
+
+static void
+heredoc_end(parser_state *p)
+{
+ p->parsing_heredoc = p->parsing_heredoc->cdr;
+ if (p->parsing_heredoc == NULL) {
+ p->lstate = EXPR_BEG;
+ p->cmd_start = TRUE;
+ p->sterm = 0;
+ p->heredoc_end_now = TRUE;
+ } else {
+ p->sterm = ' '; /* next heredoc */
+ }
+}
+
+
// xxx -----------------------------
%}
+%expect 2
%pure_parser
%parse-param {parser_state *p}
%lex-param {parser_state *p}
@@ -935,6 +1014,7 @@ var_reference(parser_state *p, node *lhs)
%type <nd> mlhs mlhs_list mlhs_post mlhs_basic mlhs_item mlhs_node mlhs_inner
%type <id> fsym sym basic_symbol operation operation2 operation3
%type <id> cname fname op f_rest_arg f_block_arg opt_f_block_arg f_norm_arg
+%type <nd> heredoc heredoc_rep heredoc_interp
%token tUPLUS /* unary+ */
%token tUMINUS /* unary- */
@@ -965,6 +1045,8 @@ var_reference(parser_state *p, node *lhs)
%token tLAMBDA /* -> */
%token tSYMBEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG
%token tSTRING_BEG tSTRING_DVAR tLAMBEG
+%token <nd> tHEREDOC_BEG /* <<, <<- */
+%token tHEREDOC_END
/*
* precedence table
@@ -1871,6 +1953,7 @@ mrhs : args ',' arg_value
primary : literal
| string
| regexp
+ | heredoc
| var_ref
| backref
| tFID
@@ -2523,6 +2606,71 @@ regexp : tREGEXP_BEG tREGEXP
}
;
+heredoc : tHEREDOC_BEG tSTRING_BEG tSTRING
+ {
+ $$ = heredoc_start(p, $1, $3, heredoc_type_norm);
+ }
+ | tHEREDOC_BEG tSTRING
+ {
+ $$ = heredoc_start(p, $1, $2, heredoc_type_quote);
+ }
+ | tHEREDOC_BEG tIDENTIFIER
+ {
+ $$ = heredoc_start_sym(p, $1, $2, heredoc_type_norm);
+ }
+ | tHEREDOC_BEG tCONSTANT
+ {
+ $$ = heredoc_start_sym(p, $1, $2, heredoc_type_norm);
+ }
+ ;
+
+
+opt_heredoc_bodies : none
+ | heredoc_bodies
+ ;
+
+heredoc_bodies : heredoc_body
+ | heredoc_bodies heredoc_body
+ ;
+
+heredoc_body : tHEREDOC_END
+ {
+ /* assert(parsing_heredoc_inf(p) != NULL); */
+ parsing_heredoc_inf(p)->doc = list1(new_str(p, "", 0));
+ heredoc_end(p);
+ }
+ | heredoc_rep tHEREDOC_END
+ {
+ /* assert(parsing_heredoc_inf(p) != NULL); */
+ parsing_heredoc_inf(p)->doc = $1;
+ heredoc_end(p);
+ }
+ ;
+
+heredoc_rep : heredoc_interp
+ | heredoc_rep heredoc_interp
+ {
+ $$ = append($1, $2);
+ }
+ ;
+
+heredoc_interp : tSTRING
+ {
+ $$ = list1($1);
+ }
+ | tSTRING_PART
+ {
+ $<num>$ = p->sterm;
+ p->sterm = 0;
+ }
+ compstmt
+ '}'
+ {
+ p->sterm = $<num>2;
+ $$ = list2($1, $3);
+ }
+ ;
+
symbol : basic_symbol
{
$$ = new_sym(p, $1);
@@ -2873,6 +3021,7 @@ singleton : var_ref
case NODE_MATCH:
case NODE_FLOAT:
case NODE_ARRAY:
+ case NODE_HEREDOC:
yyerror(p, "can't define singleton method for literals");
default:
break;
@@ -2944,7 +3093,7 @@ rbracket : opt_nl ']'
;
trailer : /* none */
- | nl
+ | nl
| ','
;
@@ -2957,6 +3106,7 @@ nl : '\n'
p->lineno++;
p->column = 0;
}
+ opt_heredoc_bodies
terms : term
| terms ';' {yyerrok;}
@@ -3092,9 +3242,7 @@ nextc(parser_state *p)
else {
c = (unsigned char)*p->s++;
}
- if (c == '\n') {
- // must understand heredoc
- }
+ /* if (c == '\n') { } */ /* heredoc treated in parser_yylex() */
}
p->column++;
return c;
@@ -3548,6 +3696,72 @@ parse_qstring(parser_state *p, int beg, int end)
}
static int
+parse_heredoc_line(parser_state *p)
+{
+ parser_heredoc_info *inf = parsing_heredoc_inf(p);
+ /* assert(inf != NULL); */
+ int c;
+
+ newtok(p);
+ while ((c = nextc(p)) != '\n') {
+ if (c == -1)
+ break;
+ if (inf->type != heredoc_type_quote) {
+ if (c == '\\') {
+ tokadd(p, read_escape(p));
+ inf->line_head = FALSE;
+ continue;
+ }
+ if (c == '#') {
+ c = nextc(p);
+ if (c == '{') {
+ tokfix(p);
+ p->lstate = EXPR_BEG;
+ p->sterm = ' ';
+ p->cmd_start = TRUE;
+ yylval.nd = new_str(p, tok(p), toklen(p));
+ inf->line_head = FALSE;
+ return tSTRING_PART;
+ }
+ tokadd(p, '#');
+ pushback(p, c);
+ continue;
+ }
+ }
+ tokadd(p, c);
+ }
+ tokadd(p, '\n');
+ tokfix(p);
+ p->lineno++;
+ p->column = 0;
+ int line_head = inf->line_head;
+ inf->line_head = TRUE;
+ if (line_head) {
+ /* check whether end of heredoc */
+ const char *s = tok(p);
+ int len = toklen(p);
+ if (inf->allow_indent) {
+ while (ISSPACE(*s) && len > 0) {
+ ++s;
+ --len;
+ }
+ }
+ if ((len-1 == inf->term_len) && (strncmp(s, inf->term, len-1) == 0)) {
+ return tHEREDOC_END;
+ }
+ }
+ if (c == -1) {
+ char buf[256];
+ snprintf(buf, sizeof(buf), "can't find string \"%s\" anywhere before EOF", inf->term);
+ yyerror(p, buf);
+ return 0;
+ }
+
+ yylval.nd = new_str(p, tok(p), toklen(p));
+ return tSTRING;
+}
+
+static int
arg_ambiguous(parser_state *p)
{
yywarning(p, "ambiguous first argument; put parentheses or even spaces");
@@ -3565,6 +3779,9 @@ parser_yylex(parser_state *p)
enum mrb_lex_state_enum last_state;
int token_column;
+ if ((p->sterm == ' ') && (p->parsing_heredoc != NULL) && (! p->heredoc_starts_nextline)) {
+ return parse_heredoc_line(p);
+ }
if (p->sterm) {
return parse_string(p, p->sterm);
}
@@ -3589,6 +3806,11 @@ parser_yylex(parser_state *p)
skip(p, '\n');
/* fall through */
case '\n':
+ p->heredoc_starts_nextline = FALSE;
+ if (p->parsing_heredoc != NULL) {
+ p->sterm = ' ';
+ goto normal_newline;
+ }
switch (p->lstate) {
case EXPR_BEG:
case EXPR_FNAME:
@@ -3711,17 +3933,25 @@ parser_yylex(parser_state *p)
case '<':
last_state = p->lstate;
c = nextc(p);
-#if 0
- // no heredoc supported yet
if (c == '<' &&
p->lstate != EXPR_DOT &&
p->lstate != EXPR_CLASS &&
!IS_END() &&
(!IS_ARG() || space_seen)) {
- int token = heredoc_identifier();
- if (token) return token;
+ /* heredocument check */
+ newtok(p); tokadd(p, '<'); tokadd(p, '<');
+ int c2 = nextc(p);
+ if (c2 == '-') {
+ tokadd(p, c2);
+ c2 = nextc(p);
+ }
+ pushback(p, c2);
+ if (!ISSPACE(c2)) {
+ tokfix(p);
+ yylval.nd = new_str(p, tok(p), toklen(p));
+ return tHEREDOC_BEG;
+ }
}
-#endif
if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) {
p->lstate = EXPR_ARG;
} else {
@@ -4844,6 +5074,8 @@ mrb_parser_new(mrb_state *mrb)
yydebug = 1;
#endif
+ p->heredocs = p->parsing_heredoc = NULL;
+
return p;
}
@@ -5735,6 +5967,11 @@ parser_dump(mrb_state *mrb, node *tree, int offset)
parser_dump(mrb, tree, offset+1);
break;
+ case NODE_HEREDOC:
+ printf("NODE_HEREDOC:\n");
+ parser_dump(mrb, ((parser_heredoc_info*)tree)->doc, offset+1);
+ break;
+
default:
printf("node type: %d (0x%x)\n", (int)n, (int)n);
break;
View
69 test/t/literals.rb
@@ -59,7 +59,74 @@
e == 'abc' and f == 'ab/c' and g == 'abc'
end
-# Not Implemented ATM assert('Literals Strings Here documents', '8.7.6.3.6') do
+assert('Literals Strings Here documents', '8.7.6.3.6') do
+ a = <<AAA
+aaa
+AAA
+ b = <<b_b
+bbb
+b_b
+ c = [<<CCC1, <<"CCC2", <<'CCC3']
+c1
+CCC1
+c 2
+CCC2
+c 3
+CCC3
+
+ d = <<DDD
+d#{1+2}DDD
+d\t
+DDD\n
+DDD
+ e = <<'EEE'
+e#{1+2}EEE
+e\t
+EEE\n
+EEE
+ f = <<"FFF"
+F
+FF#{"f"}FFF
+F
+FFF
+
+ g = <<-GGG
+ ggg
+ GGG
+ h = <<-"HHH"
+ hhh
+ HHH
+ i = <<-'III'
+ iii
+ III
+ j = [<<-JJJ1 , <<-"JJJ2" , <<-'JJJ3' ]
+ j#{1}j
+ JJJ1
+ j#{2}j
+ JJJ2
+ j#{3}j
+ JJJ3
+
+ k = <<'KKK'.to_i
+123
+KKK
+
+ z = <<'ZZZ'
+ZZZ
+
+ a == "aaa\n" and
+ b == "bbb\n" and
+ c == ["c1\n", "c 2\n", "c 3\n"] and
+ d == "d3DDD\nd\t\nDDD\n\n" and
+ e == "e\#{1+2}EEE\ne\\t\nEEE\\n\n" and
+ f == "F\nFFfFFF\nF\n" and
+ g == " ggg\n" and
+ h == " hhh\n" and
+ i == " iii\n" and
+ j == [" j1j\n", " j2j\n", " j\#{3}j\n"] and
+ k == 123 and
+ z == ""
+end
# Not Implemented ATM assert('Literals Array', '8.7.6.4') do
View
7 tools/mirb/mirb.c
@@ -40,6 +40,13 @@ is_code_block_open(struct mrb_parser_state *parser)
/* check for unterminated string */
if (parser->sterm) return TRUE;
+ /* check for heredoc */
+ if (parser->heredoc_starts_nextline) return TRUE;
+ if (parser->heredoc_end_now) {
+ parser->heredoc_end_now = FALSE;
+ return FALSE;
+ }
+
/* check if parser error are available */
if (0 < parser->nerr) {
const char *unexpected_end = "syntax error, unexpected $end";

0 comments on commit 1af4110

Please sign in to comment.
Something went wrong with that request. Please try again.