From a5318f92fd291fe0d40023a5df92201b7a10434c Mon Sep 17 00:00:00 2001 From: Jeffrey Kegler Date: Sun, 13 Nov 2011 20:00:26 -0800 Subject: [PATCH] Convert null parses into special case. --- r2/libmarpa/dev/marpa.w | 301 +++++++++++++++++----------------------- 1 file changed, 124 insertions(+), 177 deletions(-) diff --git a/r2/libmarpa/dev/marpa.w b/r2/libmarpa/dev/marpa.w index 7b68160e6..7b82c8547 100644 --- a/r2/libmarpa/dev/marpa.w +++ b/r2/libmarpa/dev/marpa.w @@ -922,25 +922,32 @@ g->t_max_rule_length = 0; @*0 Grammar Boolean: Precomputed. @ @ = -gboolean marpa_is_precomputed(const struct marpa_g* const g); +gboolean marpa_is_precomputed(struct marpa_g* g); @ @d G_is_Precomputed(g) ((g)->t_is_precomputed) @ = guint t_is_precomputed:1; @ @ = g->t_is_precomputed = FALSE; @ @ = -gboolean marpa_is_precomputed(const struct marpa_g* const g) -{ return G_is_Precomputed(g); } +gboolean marpa_is_precomputed(struct marpa_g* g) +{ + @@/ + @@; +return G_is_Precomputed(g); +} -@*0 Grammar Boolean: Has Loop. +@*0 Grammar boolean: has loop?. @ = guint t_has_loop:1; @ @ = g->t_has_loop = FALSE; -@ The internal accessor would be trivial, so there is none. -@ = -gboolean marpa_has_loop(struct marpa_g* g) -{ return g->t_has_loop; } @ @ = gboolean marpa_has_loop(struct marpa_g* g); +@ @ = +gboolean marpa_has_loop(struct marpa_g* g) +{ + @@/ + @@; +return g->t_has_loop; +} @*0 Grammar Boolean: LHS Terminal OK. Traditionally, a BNF grammar did {\bf not} allow a symbol @@ -2151,6 +2158,7 @@ rule_is_nulling (GRAMMAR g, RULE rule) static inline gint rule_is_nulling(GRAMMAR g, RULE rule); @*0 Is Rule Used?. +Is the rule used in computing the AHFA sets? @d RULE_is_Used(rule) ((rule)->t_is_used) @ = guint t_is_used:1; @ @ = @@ -3220,7 +3228,8 @@ Marpa_Symbol_ID alias_by_id(struct marpa_g* g, Marpa_Symbol_ID proper_id); @** Adding a New Start Symbol. This is such a common rewrite that it has a special name in the literature --- it is called ``augmenting the grammar". - +@ = +static inline struct marpa_g* g_augment(struct marpa_g* g); @ @ = static inline struct marpa_g* g_augment(struct marpa_g* g) { @@ -3231,11 +3240,9 @@ struct marpa_g* g_augment(struct marpa_g* g) { SYM old_start = SYM_by_ID(g->t_start_symid); @@; if (proper_old_start) { @ } - if (nulling_old_start) { @ } + if (nulling_old_start) { @@; } return g; } -@ @ = -static inline struct marpa_g* g_augment(struct marpa_g* g); @ @ = if (SYM_is_Nulling(old_start)) { @@ -3299,7 +3306,7 @@ if there is one. Otherwise it is a new, nulling, symbol. new_start_rule->t_is_start = 1; RULE_is_Virtual_LHS(new_start_rule) = 1; Real_SYM_Count_of_RULE(new_start_rule) = 1; - RULE_is_Used(new_start_rule) = TRUE; + RULE_is_Used(new_start_rule) = FALSE; g->t_null_start_rule = new_start_rule; rule_callback (g, new_start_rule->t_id); } @@ -4459,75 +4466,38 @@ into the |TRANS| structure, for memoization. g_free(singleton_duplicates); g_tree_destroy(duplicates); -@ @ = { - AHFA p_initial_state = DQUEUE_PUSH(states, AHFA_Object);@/ - Marpa_Rule_ID start_rule_id; - AIM start_item; - SYM start_symbol = SYM_by_ID(g->t_start_symid); - SYM start_alias - = symbol_null_alias(start_symbol); - gint no_of_items_in_new_state = start_alias ? 2 : 1; - AIM* item_list - = obstack_alloc(&g->t_obs, no_of_items_in_new_state*sizeof(AIM)); - start_rule_id = g_array_index(start_symbol->t_lhs, Marpa_Rule_ID, 0); /* The start rule - is the unique rule that has the start symbol as its LHS */ - start_item = g->t_AHFA_items_by_rule[start_rule_id]; /* The start item is the - initial item for the start rule */ - item_list[0] = start_item; - if (start_alias) { - Marpa_Rule_ID alias_rule_id - = g_array_index(start_alias->t_lhs, Marpa_Rule_ID, 0); /* Start alias - rule is the unique rule that has - the start alias as its LHS */ - item_list[1] = g->t_AHFA_items_by_rule[alias_rule_id]; - } - p_initial_state->t_items = item_list; - p_initial_state->t_item_count = no_of_items_in_new_state; - p_initial_state->t_key.t_id = 0; - LV_AHFA_is_Predicted(p_initial_state) = 0; - LV_Leo_LHS_ID_of_AHFA(p_initial_state) = -1; - LV_TRANSs_of_AHFA(p_initial_state) = transitions_new(g); - p_initial_state->t_empty_transition = NULL; - if (SYM_is_Nulling(start_symbol)) - { // Special case the null parse - SYMID* complete_symids = obstack_alloc (&g->t_obs, sizeof (SYMID)); - SYMID completed_symbol_id = ID_of_SYM(start_symbol); - *complete_symids = completed_symbol_id; - completion_count_inc (&ahfa_work_obs, p_initial_state, completed_symbol_id); - LV_Complete_SYMIDs_of_AHFA(p_initial_state) = complete_symids; - LV_Complete_SYM_Count_of_AHFA(p_initial_state) = 1; - p_initial_state->t_has_completed_start_rule = 1; - LV_Postdot_SYM_Count_of_AHFA(p_initial_state) = 0; - } - else - { - SYMID* postdot_symbol_ids; - LV_Postdot_SYM_Count_of_AHFA(p_initial_state) = 1; - postdot_symbol_ids = LV_Postdot_SYMID_Ary_of_AHFA(p_initial_state) = - obstack_alloc (&g->t_obs, sizeof (SYMID)); - *postdot_symbol_ids = Postdot_SYMID_of_AIM(start_item); - if (start_alias) - { - SYMID* complete_symids = obstack_alloc (&g->t_obs, sizeof (SYMID)); - SYMID completed_symbol_id = ID_of_SYM(start_alias); - *complete_symids = completed_symbol_id; - completion_count_inc(&ahfa_work_obs, p_initial_state, completed_symbol_id); - LV_Complete_SYMIDs_of_AHFA(p_initial_state) = complete_symids; - LV_Complete_SYM_Count_of_AHFA(p_initial_state) = 1; - p_initial_state->t_has_completed_start_rule = 1; - } - else - { - LV_Complete_SYM_Count_of_AHFA(p_initial_state) = 0; - p_initial_state->t_has_completed_start_rule = 0; - } - p_initial_state->t_empty_transition = - create_predicted_AHFA_state (g, - matrix_row (prediction_matrix, - (guint) - Postdot_SYMID_of_AIM (start_item)), - rule_by_sort_key, &states, duplicates); - } +@ @ = +{ + AHFA p_initial_state = DQUEUE_PUSH (states, AHFA_Object); + Marpa_Rule_ID start_rule_id; + SYMID *postdot_symbol_ids; + AIM start_item; + SYM start_symbol = SYM_by_ID (g->t_start_symid); + AIM *item_list = obstack_alloc (&g->t_obs, sizeof (AIM)); + /* The start rule is the unique rule that has the start symbol as its LHS */ + start_rule_id = g_array_index (start_symbol->t_lhs, Marpa_Rule_ID, 0); + /* The start item is the initial item for the start rule */ + start_item = g->t_AHFA_items_by_rule[start_rule_id]; + item_list[0] = start_item; + p_initial_state->t_items = item_list; + p_initial_state->t_item_count = 1; + p_initial_state->t_key.t_id = 0; + LV_AHFA_is_Predicted (p_initial_state) = 0; + LV_Leo_LHS_ID_of_AHFA (p_initial_state) = -1; + LV_TRANSs_of_AHFA (p_initial_state) = transitions_new (g); + LV_Postdot_SYM_Count_of_AHFA (p_initial_state) = 1; + postdot_symbol_ids = LV_Postdot_SYMID_Ary_of_AHFA (p_initial_state) = + obstack_alloc (&g->t_obs, sizeof (SYMID)); + *postdot_symbol_ids = Postdot_SYMID_of_AIM (start_item); + LV_Complete_SYM_Count_of_AHFA (p_initial_state) = 0; + p_initial_state->t_has_completed_start_rule = 0; + p_initial_state->t_empty_transition = + create_predicted_AHFA_state (g, + matrix_row (prediction_matrix, + (guint) + Postdot_SYMID_of_AIM + (start_item)), rule_by_sort_key, + &states, duplicates); } @* Discovered AHFA States. @@ -9102,46 +9072,6 @@ never on the stack. @@; @@; } - @@; -} - -@ The start rule prediction is a special case --- -it is the one AHFA prediction item not in an -predicted AHFA state. -It's dealt with by letting its entry in the -PSIA be set spuriously, then unsetting it. -Not very elegant, but this deals with it at a constant -cost per parse. -@ = { - const ES first_earley_set = ES_of_R_by_Ord (r, 0); - OR** const nodes_by_item = per_es_data[0].t_aexes_by_item; - const EIM* const eims_of_es = EIMs_of_ES(first_earley_set); - const gint item_count = EIM_Count_of_ES (first_earley_set); - gint item_ordinal; - for (item_ordinal = 0; item_ordinal < item_count; item_ordinal++) - { - OR* const nodes_by_aex = nodes_by_item[item_ordinal]; - if (nodes_by_aex) { - const EIM earley_item = eims_of_es[item_ordinal]; - const Marpa_AHFA_State_ID ahfa_id = AHFAID_of_EIM(earley_item); - /* The prediction start rule will be in AHFA state 0 */ - if (ahfa_id) continue; - { - const gint aim_count_of_item = AIM_Count_of_EIM(earley_item); - AEX aex; - for (aex = 0; aex < aim_count_of_item; aex++) { - AIM ahfa_item = AIM_of_EIM_by_AEX(earley_item, aex); - if (Position_of_AIM(ahfa_item) == 0) { - /* Don't bother with the null count --- - there are no nulling symbols in the start rule */ - nodes_by_aex[aex] = NULL; - goto FINISHED_UNSET; - } - } - } - } - } - FINISHED_UNSET: ; } @ @ = { @@ -10563,13 +10493,13 @@ gint marpa_bocage_new(struct marpa_r* r, Marpa_Rule_ID rule_id, Marpa_Earley_Set ORID top_or_node_id = failure_indicator; const gint no_parse = -1; @@; + @@; r_update_earley_sets(r); - @@; b = B_of_R(r) = g_slice_new(BOC_Object); -MARPA_DEBUG3("%s new bocage B_of_R=%p", G_STRLOC, B_of_R(r)); @@; @@; + @@; +MARPA_DEBUG3("%s new bocage B_of_R=%p", G_STRLOC, B_of_R(r)); @@; if (!start_eim) goto SOFT_ERROR; Phase_of_R(r) = evaluation_phase; @@ -10588,7 +10518,7 @@ MARPA_DEBUG3("%s new bocage B_of_R=%p", G_STRLOC, B_of_R(r)); } @ @ = -const GRAMMAR_Const g = G_of_R(r); +const GRAMMAR g = G_of_R(r); const gint rule_count_of_g = RULE_Count_of_G(g); const gint symbol_count_of_g = SYM_Count_of_G(g); BOC b; @@ -10613,50 +10543,55 @@ struct s_bocage_setup_per_es { @ @ = struct s_bocage_setup_per_es* per_es_data = NULL; -@ @ = +@ @ = { - EARLEME end_of_parse_earleme; - @@; - if (B_of_R(r)) { - R_ERROR ("bocage in use"); - return failure_indicator; + @@; + if (B_of_R (r)) + { + R_ERROR ("bocage in use"); + return failure_indicator; } - switch (Phase_of_R (r)) - { - default: - R_ERROR ("recce not evaluation-ready"); - return failure_indicator; - case input_phase: - case evaluation_phase: - break; - } + switch (Phase_of_R (r)) + { + default: + R_ERROR ("recce not evaluation-ready"); + return failure_indicator; + case input_phase: + case evaluation_phase: + break; + } +} -MARPA_OFF_DEBUG2("ordinal=%d", ordinal); - if (ordinal == -1) - { - end_of_parse_es = Current_ES_of_R (r); - } - else - { // ordinal != -1 - if (!ES_Ord_is_Valid (r, ordinal)) - { - R_ERROR ("invalid es ordinal"); - return failure_indicator; - } - end_of_parse_es = ES_of_R_by_Ord (r, ordinal); - } +@ @ = +{ + EARLEME end_of_parse_earleme; + MARPA_OFF_DEBUG2 ("ordinal=%d", ordinal); + if (ordinal == -1) + { + end_of_parse_es = Current_ES_of_R (r); + } + else + { // ordinal != -1 + if (!ES_Ord_is_Valid (r, ordinal)) + { + R_ERROR ("invalid es ordinal"); + return failure_indicator; + } + end_of_parse_es = ES_of_R_by_Ord (r, ordinal); + } - if (!end_of_parse_es) - return no_parse; - ordinal = Ord_of_ES(end_of_parse_es); - end_of_parse_earleme = Earleme_of_ES (end_of_parse_es); - if (rule_id == -1) { - completed_start_rule = - end_of_parse_earleme ? g->t_proper_start_rule : g->t_null_start_rule; - if (!completed_start_rule) - return no_parse; - } else { + if (!end_of_parse_es) + return no_parse; + ordinal = Ord_of_ES (end_of_parse_es); + end_of_parse_earleme = Earleme_of_ES (end_of_parse_es); + if (rule_id == -1) + { + completed_start_rule = g->t_proper_start_rule; + if (!completed_start_rule) + return no_parse; + } + else + { if (!RULEID_of_G_is_Valid (g, rule_id)) { R_ERROR ("invalid rule id"); @@ -10664,13 +10599,14 @@ MARPA_OFF_DEBUG2("ordinal=%d", ordinal); } completed_start_rule = RULE_by_ID (g, rule_id); } -MARPA_OFF_DEBUG2("ordinal=%d", ordinal); + MARPA_OFF_DEBUG2 ("ordinal=%d", ordinal); } @ @ = { - if (ordinal == 0) { // If this is a null parse - gint rule_length = Length_of_RULE(completed_start_rule); + if (ordinal == 0 && g->t_null_start_rule) { // If this is a null parse + const RULE null_start_rule = g->t_null_start_rule; + gint rule_length = Length_of_RULE(g->t_null_start_rule); OR* or_nodes = ORs_of_B (b) = g_new (OR, 1); AND and_nodes = ANDs_of_B (b) = g_new (AND_Object, 1); OR or_node = or_nodes[0] = (OR)obstack_alloc (&OBS_of_B(b), sizeof(OR_Object)); @@ -10680,7 +10616,7 @@ MARPA_OFF_DEBUG2("ordinal=%d", ordinal); OR_Count_of_B(b) = 1; AND_Count_of_B(b) = 1; - RULE_of_OR(or_node) = completed_start_rule; + RULE_of_OR(or_node) = null_start_rule; Position_of_OR(or_node) = rule_length; Origin_Ord_of_OR(or_node) = 0; ID_of_OR(or_node) = null_or_node_id; @@ -10691,7 +10627,7 @@ MARPA_OFF_DEBUG2("ordinal=%d", ordinal); OR_of_AND(and_nodes) = or_node; Predecessor_OR_of_AND(and_nodes) = NULL; Cause_OR_of_AND (and_nodes) = - (OR)TOK_by_ID_of_R (r, RHS_ID_of_RULE (completed_start_rule, rule_length - 1)); + (OR)TOK_by_ID_of_R (r, RHS_ID_of_RULE (null_start_rule, rule_length - 1)); return null_or_node_id; } @@ -13297,7 +13233,7 @@ switch (Phase_of_R (r)) @ @ = if (g->t_fatal_error) { - R_ERROR(g->t_fatal_error); + MARPA_ERROR(g->t_fatal_error); return failure_indicator; } @@ -13349,11 +13285,13 @@ than specifying the flags. Not being error-prone is important since there are many calls to |r_error| in the code. +@d MARPA_ERROR(message) (marpa_error(g, (message), 0u)) @d R_ERROR(message) (r_error(r, (message), 0u)) @d R_ERROR_CXT(message) (r_error(r, (message), CONTEXT_FLAG)) @d R_FATAL(message) (r_error(r, (message), FATAL_FLAG)) @d R_FATAL_CXT(message) (r_error(r, (message), CONTEXT_FLAG|FATAL_FLAG)) @ = +static void marpa_error( struct marpa_g* g, Marpa_Message_ID message, guint flags ); static void r_error( struct marpa_r* r, Marpa_Message_ID message, guint flags ); @ Not inlined. |r_error| occurs in the code quite often, @@ -13361,11 +13299,20 @@ but |r_error| should actually be invoked only in exceptional circumstances. In this case space clearly is much more important than speed. @ = -static void r_error( struct marpa_r* r, Marpa_Message_ID message, guint flags ) { - GRAMMAR g = G_of_R(r); - if (!(flags & CONTEXT_FLAG)) g_context_clear(g); - g->t_error = message; - if (flags & FATAL_FLAG) g->t_fatal_error = g->t_error; +static void +marpa_error (struct marpa_g *g, Marpa_Message_ID message, guint flags) +{ + if (!(flags & CONTEXT_FLAG)) + g_context_clear (g); + g->t_error = message; + if (flags & FATAL_FLAG) + g->t_fatal_error = g->t_error; +} + +static void +r_error (struct marpa_r *r, Marpa_Message_ID message, guint flags) +{ + marpa_error (G_of_R (r), message, flags); } @** Messages and Logging.