From 9b3ec79cb00848f7a69bc13035f117b2a86d7bd8 Mon Sep 17 00:00:00 2001 From: Heberti Almeida Date: Thu, 19 Nov 2020 23:08:11 -0500 Subject: [PATCH] Updating from GFM upstream --- Sources/ext_scanners.c | 1000 ++++++++++++++------------------------- Sources/ext_scanners.re | 53 ++- Sources/table.c | 123 +++-- 3 files changed, 461 insertions(+), 715 deletions(-) diff --git a/Sources/ext_scanners.c b/Sources/ext_scanners.c index c3de227a..0d3ba288 100644 --- a/Sources/ext_scanners.c +++ b/Sources/ext_scanners.c @@ -1,4 +1,5 @@ -/* Generated by re2c 1.1.1 */ +/* Generated by re2c 1.3 */ + #include "ext_scanners.h" #include @@ -39,265 +40,180 @@ bufsize_t _scan_table_start(const unsigned char *p) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - yych = *(marker = p); - if (yych <= '{') { - if (yych <= 0x1F) { - if (yych <= '\t') { - if (yych <= 0x08) - goto yy3; + yych = *p; + if (yych <= ' ') { + if (yych <= '\n') { + if (yych == '\t') goto yy4; - } else { - if (yych <= '\n') - goto yy2; - if (yych <= '\f') - goto yy4; - goto yy3; - } } else { - if (yych <= '-') { - if (yych <= ' ') - goto yy4; - if (yych <= ',') - goto yy3; - goto yy5; - } else { - if (yych == ':') - goto yy6; - goto yy3; - } + if (yych <= '\f') + goto yy4; + if (yych >= ' ') + goto yy4; } } else { - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '|') - goto yy4; - if (yych <= 0x7F) - goto yy3; - } else { - if (yych <= 0xDF) - goto yy7; - if (yych <= 0xE0) - goto yy9; - goto yy10; - } + if (yych <= '9') { + if (yych == '-') + goto yy5; } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy11; - if (yych <= 0xEF) - goto yy10; - goto yy12; - } else { - if (yych <= 0xF3) - goto yy13; - if (yych <= 0xF4) - goto yy14; - } + if (yych <= ':') + goto yy6; + if (yych == '|') + goto yy4; } } - yy2 : { return 0; } - yy3: ++p; - goto yy2; + yy3 : { return 0; } yy4: yych = *(marker = ++p); if (yybm[0 + yych] & 64) { - goto yy15; + goto yy7; } if (yych == '-') - goto yy17; + goto yy10; if (yych == ':') - goto yy19; - goto yy2; + goto yy12; + goto yy3; yy5: yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy17; + goto yy10; } if (yych <= ' ') { if (yych <= 0x08) - goto yy2; + goto yy3; if (yych <= '\r') - goto yy21; + goto yy14; if (yych <= 0x1F) - goto yy2; - goto yy21; + goto yy3; + goto yy14; } else { if (yych <= ':') { if (yych <= '9') - goto yy2; - goto yy20; + goto yy3; + goto yy13; } else { if (yych == '|') - goto yy21; - goto yy2; + goto yy14; + goto yy3; } } yy6: yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy17; + goto yy10; } - goto yy2; + goto yy3; yy7: - yych = *++p; - if (yych <= 0x7F) - goto yy8; - if (yych <= 0xBF) - goto yy3; - yy8: - p = marker; - goto yy2; - yy9: - yych = *++p; - if (yych <= 0x9F) - goto yy8; - if (yych <= 0xBF) - goto yy7; - goto yy8; - yy10: - yych = *++p; - if (yych <= 0x7F) - goto yy8; - if (yych <= 0xBF) - goto yy7; - goto yy8; - yy11: - yych = *++p; - if (yych <= 0x7F) - goto yy8; - if (yych <= 0x9F) - goto yy7; - goto yy8; - yy12: - yych = *++p; - if (yych <= 0x8F) - goto yy8; - if (yych <= 0xBF) - goto yy10; - goto yy8; - yy13: - yych = *++p; - if (yych <= 0x7F) - goto yy8; - if (yych <= 0xBF) - goto yy10; - goto yy8; - yy14: - yych = *++p; - if (yych <= 0x7F) - goto yy8; - if (yych <= 0x8F) - goto yy10; - goto yy8; - yy15: yych = *++p; if (yybm[0 + yych] & 64) { - goto yy15; + goto yy7; } if (yych == '-') - goto yy17; + goto yy10; if (yych == ':') - goto yy19; - goto yy8; - yy17: + goto yy12; + yy9: + p = marker; + goto yy3; + yy10: yych = *++p; if (yybm[0 + yych] & 128) { - goto yy17; + goto yy10; } if (yych <= 0x1F) { if (yych <= '\n') { if (yych <= 0x08) - goto yy8; + goto yy9; if (yych <= '\t') - goto yy20; - goto yy22; + goto yy13; + goto yy15; } else { if (yych <= '\f') - goto yy20; + goto yy13; if (yych <= '\r') - goto yy24; - goto yy8; + goto yy17; + goto yy9; } } else { if (yych <= ':') { if (yych <= ' ') - goto yy20; + goto yy13; if (yych <= '9') - goto yy8; - goto yy20; + goto yy9; + goto yy13; } else { if (yych == '|') - goto yy25; - goto yy8; + goto yy18; + goto yy9; } } - yy19: + yy12: yych = *++p; if (yybm[0 + yych] & 128) { - goto yy17; + goto yy10; } - goto yy8; - yy20: + goto yy9; + yy13: yych = *++p; - yy21: + yy14: if (yych <= '\r') { if (yych <= '\t') { if (yych <= 0x08) - goto yy8; - goto yy20; + goto yy9; + goto yy13; } else { if (yych <= '\n') - goto yy22; + goto yy15; if (yych <= '\f') - goto yy20; - goto yy24; + goto yy13; + goto yy17; } } else { if (yych <= ' ') { if (yych <= 0x1F) - goto yy8; - goto yy20; + goto yy9; + goto yy13; } else { if (yych == '|') - goto yy25; - goto yy8; + goto yy18; + goto yy9; } } - yy22: + yy15: ++p; { return (bufsize_t)(p - start); } - yy24: + yy17: yych = *++p; if (yych == '\n') - goto yy22; - goto yy8; - yy25: + goto yy15; + goto yy9; + yy18: yych = *++p; if (yybm[0 + yych] & 128) { - goto yy17; + goto yy10; } if (yych <= '\r') { if (yych <= '\t') { if (yych <= 0x08) - goto yy8; - goto yy25; + goto yy9; + goto yy18; } else { if (yych <= '\n') - goto yy22; + goto yy15; if (yych <= '\f') - goto yy25; - goto yy24; + goto yy18; + goto yy17; } } else { if (yych <= ' ') { if (yych <= 0x1F) - goto yy8; - goto yy25; + goto yy9; + goto yy18; } else { if (yych == ':') - goto yy19; - goto yy8; + goto yy12; + goto yy9; } } } @@ -309,6 +225,7 @@ bufsize_t _scan_table_cell(const unsigned char *p) { { unsigned char yych; + unsigned int yyaccept = 0; static const unsigned char yybm[] = { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, @@ -326,53 +243,51 @@ bufsize_t _scan_table_cell(const unsigned char *p) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - yych = *(marker = p); + yych = *p; if (yybm[0 + yych] & 64) { - goto yy30; + goto yy22; } - if (yych <= 0xE0) { - if (yych <= '\\') { - if (yych <= '\n') - goto yy29; + if (yych <= 0xEC) { + if (yych <= 0xC1) { if (yych <= '\r') - goto yy32; - goto yy34; + goto yy25; + if (yych <= '\\') + goto yy27; + goto yy25; } else { - if (yych <= '|') - goto yy32; - if (yych <= 0xC1) - goto yy29; if (yych <= 0xDF) - goto yy36; - goto yy38; + goto yy29; + if (yych <= 0xE0) + goto yy30; + goto yy31; } } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy40; - goto yy39; + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy32; + if (yych <= 0xEF) + goto yy31; + goto yy33; } else { - if (yych <= 0xF0) - goto yy41; if (yych <= 0xF3) - goto yy42; + goto yy34; if (yych <= 0xF4) - goto yy43; + goto yy35; + goto yy25; } } - yy29 : { return (bufsize_t)(p - start); } - yy30: + yy22: + yyaccept = 0; yych = *(marker = ++p); if (yybm[0 + yych] & 64) { - goto yy30; + goto yy22; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\r') - goto yy29; + goto yy24; if (yych <= '\\') - goto yy34; - goto yy29; + goto yy27; } else { if (yych <= 0xDF) goto yy36; @@ -392,29 +307,31 @@ bufsize_t _scan_table_cell(const unsigned char *p) { goto yy42; if (yych <= 0xF4) goto yy43; - goto yy29; } } - yy32: + yy24 : { return (bufsize_t)(p - start); } + yy25: ++p; - { return 0; } - yy34: + yy26 : { return 0; } + yy27: + yyaccept = 0; yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy34; + goto yy27; } if (yych <= 0xDF) { if (yych <= '\f') { if (yych == '\n') - goto yy29; - goto yy30; + goto yy24; + goto yy22; } else { if (yych <= '\r') - goto yy29; + goto yy24; if (yych <= 0x7F) - goto yy30; + goto yy22; if (yych <= 0xC1) - goto yy29; + goto yy24; + goto yy36; } } else { if (yych <= 0xEF) { @@ -430,18 +347,77 @@ bufsize_t _scan_table_cell(const unsigned char *p) { goto yy42; if (yych <= 0xF4) goto yy43; - goto yy29; + goto yy24; } } + yy29: + yych = *++p; + if (yych <= 0x7F) + goto yy26; + if (yych <= 0xBF) + goto yy22; + goto yy26; + yy30: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x9F) + goto yy26; + if (yych <= 0xBF) + goto yy36; + goto yy26; + yy31: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy26; + if (yych <= 0xBF) + goto yy36; + goto yy26; + yy32: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy26; + if (yych <= 0x9F) + goto yy36; + goto yy26; + yy33: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x8F) + goto yy26; + if (yych <= 0xBF) + goto yy39; + goto yy26; + yy34: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy26; + if (yych <= 0xBF) + goto yy39; + goto yy26; + yy35: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy26; + if (yych <= 0x8F) + goto yy39; + goto yy26; yy36: yych = *++p; if (yych <= 0x7F) goto yy37; if (yych <= 0xBF) - goto yy30; + goto yy22; yy37: p = marker; - goto yy29; + if (yyaccept == 0) { + goto yy24; + } else { + goto yy26; + } yy38: yych = *++p; if (yych <= 0x9F) @@ -488,12 +464,10 @@ bufsize_t _scan_table_cell(const unsigned char *p) { } bufsize_t _scan_table_cell_end(const unsigned char *p) { - const unsigned char *marker = NULL; const unsigned char *start = p; { unsigned char yych; - unsigned int yyaccept = 0; static const unsigned char yybm[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, @@ -509,115 +483,17 @@ bufsize_t _scan_table_cell_end(const unsigned char *p) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - yych = *(marker = p); - if (yych <= 0xDF) { - if (yych <= '{') { - if (yych != '\n') - goto yy47; - } else { - if (yych <= '|') - goto yy48; - if (yych <= 0x7F) - goto yy47; - if (yych >= 0xC2) - goto yy51; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy53; - if (yych == 0xED) - goto yy55; - goto yy54; - } else { - if (yych <= 0xF0) - goto yy56; - if (yych <= 0xF3) - goto yy57; - if (yych <= 0xF4) - goto yy58; - } - } - yy46 : { return 0; } - yy47: + yych = *p; + if (yych == '|') + goto yy48; ++p; - goto yy46; + { return 0; } yy48: - yyaccept = 1; - yych = *(marker = ++p); + yych = *++p; if (yybm[0 + yych] & 128) { goto yy48; } - if (yych <= 0x08) - goto yy50; - if (yych <= '\n') - goto yy59; - if (yych <= '\r') - goto yy60; - yy50 : { return (bufsize_t)(p - start); } - yy51: - yych = *++p; - if (yych <= 0x7F) - goto yy52; - if (yych <= 0xBF) - goto yy47; - yy52: - p = marker; - if (yyaccept == 0) { - goto yy46; - } else { - goto yy50; - } - yy53: - yych = *++p; - if (yych <= 0x9F) - goto yy52; - if (yych <= 0xBF) - goto yy51; - goto yy52; - yy54: - yych = *++p; - if (yych <= 0x7F) - goto yy52; - if (yych <= 0xBF) - goto yy51; - goto yy52; - yy55: - yych = *++p; - if (yych <= 0x7F) - goto yy52; - if (yych <= 0x9F) - goto yy51; - goto yy52; - yy56: - yych = *++p; - if (yych <= 0x8F) - goto yy52; - if (yych <= 0xBF) - goto yy54; - goto yy52; - yy57: - yych = *++p; - if (yych <= 0x7F) - goto yy52; - if (yych <= 0xBF) - goto yy54; - goto yy52; - yy58: - yych = *++p; - if (yych <= 0x7F) - goto yy52; - if (yych <= 0x8F) - goto yy54; - goto yy52; - yy59: - ++p; - goto yy50; - yy60: - yych = *++p; - if (yych == '\n') - goto yy59; - goto yy52; + { return (bufsize_t)(p - start); } } } @@ -642,138 +518,62 @@ bufsize_t _scan_table_row_end(const unsigned char *p) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - yych = *(marker = p); - if (yych <= 0xC1) { - if (yych <= '\f') { - if (yych <= 0x08) - goto yy64; - if (yych == '\n') - goto yy66; - goto yy65; - } else { - if (yych <= 0x1F) { - if (yych <= '\r') - goto yy68; - goto yy64; - } else { - if (yych <= ' ') - goto yy65; - if (yych <= 0x7F) - goto yy64; - } - } + yych = *p; + if (yych <= '\f') { + if (yych <= 0x08) + goto yy53; + if (yych == '\n') + goto yy56; + goto yy55; } else { - if (yych <= 0xED) { - if (yych <= 0xDF) - goto yy69; - if (yych <= 0xE0) - goto yy71; - if (yych <= 0xEC) - goto yy72; - goto yy73; - } else { - if (yych <= 0xF0) { - if (yych <= 0xEF) - goto yy72; - goto yy74; - } else { - if (yych <= 0xF3) - goto yy75; - if (yych <= 0xF4) - goto yy76; - } - } + if (yych <= '\r') + goto yy58; + if (yych == ' ') + goto yy55; } - yy63 : { return 0; } - yy64: + yy53: ++p; - goto yy63; - yy65: + yy54 : { return 0; } + yy55: yych = *(marker = ++p); if (yych <= 0x08) - goto yy63; + goto yy54; if (yych <= '\r') - goto yy78; + goto yy60; if (yych == ' ') - goto yy78; - goto yy63; - yy66: + goto yy60; + goto yy54; + yy56: ++p; { return (bufsize_t)(p - start); } - yy68: + yy58: yych = *++p; if (yych == '\n') - goto yy66; - goto yy63; - yy69: - yych = *++p; - if (yych <= 0x7F) - goto yy70; - if (yych <= 0xBF) - goto yy64; - yy70: - p = marker; - goto yy63; - yy71: - yych = *++p; - if (yych <= 0x9F) - goto yy70; - if (yych <= 0xBF) - goto yy69; - goto yy70; - yy72: - yych = *++p; - if (yych <= 0x7F) - goto yy70; - if (yych <= 0xBF) - goto yy69; - goto yy70; - yy73: - yych = *++p; - if (yych <= 0x7F) - goto yy70; - if (yych <= 0x9F) - goto yy69; - goto yy70; - yy74: - yych = *++p; - if (yych <= 0x8F) - goto yy70; - if (yych <= 0xBF) - goto yy72; - goto yy70; - yy75: - yych = *++p; - if (yych <= 0x7F) - goto yy70; - if (yych <= 0xBF) - goto yy72; - goto yy70; - yy76: - yych = *++p; - if (yych <= 0x7F) - goto yy70; - if (yych <= 0x8F) - goto yy72; - goto yy70; - yy77: + goto yy56; + goto yy54; + yy59: yych = *++p; - yy78: + yy60: if (yybm[0 + yych] & 128) { - goto yy77; + goto yy59; } if (yych <= 0x08) - goto yy70; + goto yy61; if (yych <= '\n') - goto yy66; - if (yych >= 0x0E) - goto yy70; + goto yy56; + if (yych <= '\r') + goto yy62; + yy61: + p = marker; + goto yy54; + yy62: yych = *++p; if (yych == '\n') - goto yy66; - goto yy70; + goto yy56; + goto yy61; } } + bufsize_t _scan_tasklist(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -798,361 +598,281 @@ bufsize_t _scan_tasklist(const unsigned char *p) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - yych = *(marker = p); - if (yych <= '/') { - if (yych <= 0x1F) { - if (yych <= '\t') { - if (yych <= 0x08) - goto yy83; - goto yy84; - } else { - if (yych <= '\n') - goto yy82; - if (yych <= '\f') - goto yy84; - goto yy83; - } + yych = *p; + if (yych <= ' ') { + if (yych <= '\n') { + if (yych == '\t') + goto yy67; } else { - if (yych <= '+') { - if (yych <= ' ') - goto yy84; - if (yych <= ')') - goto yy83; - goto yy85; - } else { - if (yych == '-') - goto yy85; - goto yy83; - } + if (yych <= '\f') + goto yy67; + if (yych >= ' ') + goto yy67; } } else { - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '9') - goto yy86; - if (yych <= 0x7F) - goto yy83; - } else { - if (yych <= 0xDF) - goto yy87; - if (yych <= 0xE0) - goto yy89; - goto yy90; - } + if (yych <= ',') { + if (yych <= ')') + goto yy65; + if (yych <= '+') + goto yy68; } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy91; - if (yych <= 0xEF) - goto yy90; - goto yy92; - } else { - if (yych <= 0xF3) - goto yy93; - if (yych <= 0xF4) - goto yy94; - } + if (yych <= '-') + goto yy68; + if (yych <= '/') + goto yy65; + if (yych <= '9') + goto yy69; } } - yy82 : { return 0; } - yy83: + yy65: ++p; - goto yy82; - yy84: + yy66 : { return 0; } + yy67: yych = *(marker = ++p); if (yybm[0 + yych] & 64) { - goto yy95; + goto yy70; } if (yych <= ',') { if (yych <= ')') - goto yy82; + goto yy66; if (yych <= '+') - goto yy97; - goto yy82; + goto yy73; + goto yy66; } else { if (yych <= '-') - goto yy97; + goto yy73; if (yych <= '/') - goto yy82; + goto yy66; if (yych <= '9') - goto yy98; - goto yy82; + goto yy74; + goto yy66; } - yy85: + yy68: yych = *(marker = ++p); if (yych <= '\n') { if (yych == '\t') - goto yy99; - goto yy82; + goto yy75; + goto yy66; } else { if (yych <= '\f') - goto yy99; + goto yy75; if (yych == ' ') - goto yy99; - goto yy82; + goto yy75; + goto yy66; } - yy86: + yy69: yych = *(marker = ++p); if (yych <= 0x1F) { if (yych <= '\t') { if (yych <= 0x08) - goto yy102; - goto yy97; + goto yy78; + goto yy73; } else { if (yych <= '\n') - goto yy82; + goto yy66; if (yych <= '\f') - goto yy97; - goto yy102; + goto yy73; + goto yy78; } } else { if (yych <= 0x7F) { if (yych <= ' ') - goto yy97; - goto yy102; + goto yy73; + goto yy78; } else { if (yych <= 0xC1) - goto yy82; + goto yy66; if (yych <= 0xF4) - goto yy102; - goto yy82; + goto yy78; + goto yy66; } } - yy87: - yych = *++p; - if (yych <= 0x7F) - goto yy88; - if (yych <= 0xBF) - goto yy83; - yy88: - p = marker; - goto yy82; - yy89: - yych = *++p; - if (yych <= 0x9F) - goto yy88; - if (yych <= 0xBF) - goto yy87; - goto yy88; - yy90: - yych = *++p; - if (yych <= 0x7F) - goto yy88; - if (yych <= 0xBF) - goto yy87; - goto yy88; - yy91: - yych = *++p; - if (yych <= 0x7F) - goto yy88; - if (yych <= 0x9F) - goto yy87; - goto yy88; - yy92: - yych = *++p; - if (yych <= 0x8F) - goto yy88; - if (yych <= 0xBF) - goto yy90; - goto yy88; - yy93: - yych = *++p; - if (yych <= 0x7F) - goto yy88; - if (yych <= 0xBF) - goto yy90; - goto yy88; - yy94: - yych = *++p; - if (yych <= 0x7F) - goto yy88; - if (yych <= 0x8F) - goto yy90; - goto yy88; - yy95: + yy70: yych = *++p; if (yybm[0 + yych] & 64) { - goto yy95; + goto yy70; } if (yych <= ',') { if (yych <= ')') - goto yy88; - if (yych >= ',') - goto yy88; + goto yy72; + if (yych <= '+') + goto yy73; } else { if (yych <= '-') - goto yy97; + goto yy73; if (yych <= '/') - goto yy88; + goto yy72; if (yych <= '9') - goto yy98; - goto yy88; + goto yy74; } - yy97: + yy72: + p = marker; + goto yy66; + yy73: yych = *++p; if (yych == '[') - goto yy88; - goto yy100; - yy98: + goto yy72; + goto yy76; + yy74: yych = *++p; if (yych <= '\n') { if (yych == '\t') - goto yy97; - goto yy102; + goto yy73; + goto yy78; } else { if (yych <= '\f') - goto yy97; + goto yy73; if (yych == ' ') - goto yy97; - goto yy102; + goto yy73; + goto yy78; } - yy99: + yy75: yych = *++p; - yy100: + yy76: if (yych <= '\f') { if (yych == '\t') - goto yy99; + goto yy75; if (yych <= '\n') - goto yy88; - goto yy99; + goto yy72; + goto yy75; } else { if (yych <= ' ') { if (yych <= 0x1F) - goto yy88; - goto yy99; + goto yy72; + goto yy75; } else { if (yych == '[') - goto yy110; - goto yy88; + goto yy86; + goto yy72; } } - yy101: + yy77: yych = *++p; - yy102: + yy78: if (yybm[0 + yych] & 128) { - goto yy101; + goto yy77; } if (yych <= 0xC1) { if (yych <= '\f') { if (yych <= 0x08) - goto yy97; + goto yy73; if (yych == '\n') - goto yy88; - goto yy99; + goto yy72; + goto yy75; } else { if (yych == ' ') - goto yy99; + goto yy75; if (yych <= 0x7F) - goto yy97; - goto yy88; + goto yy73; + goto yy72; } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy103; + goto yy79; if (yych <= 0xE0) - goto yy104; + goto yy80; if (yych <= 0xEC) - goto yy105; - goto yy106; + goto yy81; + goto yy82; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy105; - goto yy107; + goto yy81; + goto yy83; } else { if (yych <= 0xF3) - goto yy108; + goto yy84; if (yych <= 0xF4) - goto yy109; - goto yy88; + goto yy85; + goto yy72; } } } - yy103: + yy79: yych = *++p; if (yych <= 0x7F) - goto yy88; + goto yy72; if (yych <= 0xBF) - goto yy97; - goto yy88; - yy104: + goto yy73; + goto yy72; + yy80: yych = *++p; if (yych <= 0x9F) - goto yy88; + goto yy72; if (yych <= 0xBF) - goto yy103; - goto yy88; - yy105: + goto yy79; + goto yy72; + yy81: yych = *++p; if (yych <= 0x7F) - goto yy88; + goto yy72; if (yych <= 0xBF) - goto yy103; - goto yy88; - yy106: + goto yy79; + goto yy72; + yy82: yych = *++p; if (yych <= 0x7F) - goto yy88; + goto yy72; if (yych <= 0x9F) - goto yy103; - goto yy88; - yy107: + goto yy79; + goto yy72; + yy83: yych = *++p; if (yych <= 0x8F) - goto yy88; + goto yy72; if (yych <= 0xBF) - goto yy105; - goto yy88; - yy108: + goto yy81; + goto yy72; + yy84: yych = *++p; if (yych <= 0x7F) - goto yy88; + goto yy72; if (yych <= 0xBF) - goto yy105; - goto yy88; - yy109: + goto yy81; + goto yy72; + yy85: yych = *++p; if (yych <= 0x7F) - goto yy88; + goto yy72; if (yych <= 0x8F) - goto yy105; - goto yy88; - yy110: + goto yy81; + goto yy72; + yy86: yych = *++p; if (yych <= 'W') { if (yych != ' ') - goto yy88; + goto yy72; } else { if (yych <= 'X') - goto yy111; + goto yy87; if (yych != 'x') - goto yy88; + goto yy72; } - yy111: + yy87: yych = *++p; if (yych != ']') - goto yy88; + goto yy72; yych = *++p; if (yych <= '\n') { if (yych != '\t') - goto yy88; + goto yy72; } else { if (yych <= '\f') - goto yy113; + goto yy89; if (yych != ' ') - goto yy88; + goto yy72; } - yy113: + yy89: yych = *++p; if (yych <= '\n') { if (yych == '\t') - goto yy113; + goto yy89; } else { if (yych <= '\f') - goto yy113; + goto yy89; if (yych == ' ') - goto yy113; + goto yy89; } { return (bufsize_t)(p - start); } } diff --git a/Sources/ext_scanners.re b/Sources/ext_scanners.re index 94a4c673..9144e5b4 100644 --- a/Sources/ext_scanners.re +++ b/Sources/ext_scanners.re @@ -1,3 +1,6 @@ +/*!re2c re2c:flags:no-debug-info = 1; */ +/*!re2c re2c:indent:string = ' '; */ + #include #include "ext_scanners.h" @@ -22,7 +25,6 @@ bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned cha re2c:define:YYCTYPE = "unsigned char"; re2c:define:YYCURSOR = p; re2c:define:YYMARKER = marker; - re2c:define:YYCTXMARKER = marker; re2c:yyfill:enable = 0; spacechar = [ \t\v\f]; @@ -30,7 +32,7 @@ bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned cha escaped_char = [\\][|!"#$%&'()*+,./:;<=>?@[\\\]^_`{}~-]; table_marker = (spacechar*[:]?[-]+[:]?spacechar*); - table_cell = (escaped_char|[^|\r\n])*; + table_cell = (escaped_char|[^|\r\n])+; tasklist = spacechar*("-"|"+"|"*"|[0-9]+.)spacechar+("[ ]"|"[x]")spacechar+; */ @@ -39,47 +41,52 @@ bufsize_t _scan_table_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; -/*!re2c - [|]? table_marker ([|] table_marker)* [|]? spacechar* newline { return (bufsize_t)(p - start); } - .? { return 0; } -*/ + /*!re2c + [|]? table_marker ([|] table_marker)* [|]? spacechar* newline { + return (bufsize_t)(p - start); + } + * { return 0; } + */ } bufsize_t _scan_table_cell(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; -/*!re2c - table_cell { return (bufsize_t)(p - start); } - .? { return 0; } -*/ + /*!re2c + // In fact, `table_cell` matches non-empty table cells only. The empty + // string is also a valid table cell, but is handled by the default rule. + // This approach prevents re2c's match-empty-string warning. + table_cell { return (bufsize_t)(p - start); } + * { return 0; } + */ } bufsize_t _scan_table_cell_end(const unsigned char *p) { - const unsigned char *marker = NULL; const unsigned char *start = p; -/*!re2c - [|] spacechar* newline? { return (bufsize_t)(p - start); } - .? { return 0; } -*/ + /*!re2c + [|] spacechar* { return (bufsize_t)(p - start); } + * { return 0; } + */ } bufsize_t _scan_table_row_end(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; -/*!re2c - spacechar* newline { return (bufsize_t)(p - start); } - .? { return 0; } -*/ + /*!re2c + spacechar* newline { return (bufsize_t)(p - start); } + * { return 0; } + */ } + bufsize_t _scan_tasklist(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; -/*!re2c - tasklist { return (bufsize_t)(p - start); } - .? { return 0; } -*/ + /*!re2c + tasklist { return (bufsize_t)(p - start); } + * { return 0; } + */ } diff --git a/Sources/table.c b/Sources/table.c index 0ea31cbc..a5bb4406 100644 --- a/Sources/table.c +++ b/Sources/table.c @@ -114,60 +114,87 @@ static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsi static table_row *row_from_string(cmark_syntax_extension *self, cmark_parser *parser, unsigned char *string, int len) { + // Parses a single table row. It has the following form: + // `delim? table_cell (delim table_cell)* delim? newline` + // Note that cells are allowed to be empty. + // + // From the GitHub-flavored Markdown specification: + // + // > Each row consists of cells containing arbitrary text, in which inlines + // > are parsed, separated by pipes (|). A leading and trailing pipe is also + // > recommended for clarity of reading, and if there’s otherwise parsing + // > ambiguity. + table_row *row = NULL; bufsize_t cell_matched = 1, pipe_matched = 1, offset; - int cell_end_offset; + int expect_more_cells = 1; + int row_end_offset = 0; row = (table_row *)parser->mem->calloc(1, sizeof(table_row)); row->n_columns = 0; row->cells = NULL; + // Scan past the (optional) leading pipe. offset = scan_table_cell_end(string, len, 0); // Parse the cells of the row. Stop if we reach the end of the input, or if we // cannot detect any more cells. - while (offset < len && (cell_matched || pipe_matched)) { + while (offset < len && expect_more_cells) { cell_matched = scan_table_cell(string, len, offset); pipe_matched = scan_table_cell_end(string, len, offset + cell_matched); if (cell_matched || pipe_matched) { - cell_end_offset = offset + cell_matched - 1; + // We are guaranteed to have a cell, since (1) either we found some + // content and cell_matched, or (2) we found an empty cell followed by a + // pipe. + cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset, + cell_matched); + cmark_strbuf_trim(cell_buf); + + node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell)); + cell->buf = cell_buf; + cell->start_offset = offset; + cell->end_offset = offset + cell_matched - 1; + + while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') { + --cell->start_offset; + ++cell->internal_offset; + } + + row->n_columns += 1; + row->cells = cmark_llist_append(parser->mem, row->cells, cell); + } + + offset += cell_matched + pipe_matched; + + if (pipe_matched) { + expect_more_cells = 1; + } else { + // We've scanned the last cell. Check if we have reached the end of the row + row_end_offset = scan_table_row_end(string, len, offset); + offset += row_end_offset; - if (string[cell_end_offset] == '\n' || string[cell_end_offset] == '\r') { - row->paragraph_offset = cell_end_offset; + // If the end of the row is not the end of the input, + // the row is not a real row but potentially part of the paragraph + // preceding the table. + if (row_end_offset && offset != len) { + row->paragraph_offset = offset; cmark_llist_free_full(parser->mem, row->cells, (cmark_free_func)free_table_cell); row->cells = NULL; row->n_columns = 0; - } else { - cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset, - cell_matched); - cmark_strbuf_trim(cell_buf); - - node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell)); - cell->buf = cell_buf; - cell->start_offset = offset; - cell->end_offset = cell_end_offset; - - while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') { - --cell->start_offset; - ++cell->internal_offset; - } - row->n_columns += 1; - row->cells = cmark_llist_append(parser->mem, row->cells, cell); - } - } + // Scan past the (optional) leading pipe. + offset += scan_table_cell_end(string, len, offset); - offset += cell_matched + pipe_matched; - - if (!pipe_matched) { - pipe_matched = scan_table_row_end(string, len, offset); - offset += pipe_matched; + expect_more_cells = 1; + } else { + expect_more_cells = 0; + } } } - if (offset != len || !row->n_columns) { + if (offset != len || row->n_columns == 0) { free_table_row(parser->mem, row); row = NULL; } @@ -199,8 +226,6 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_parser *parser, cmark_node *parent_container, unsigned char *input, int len) { - bufsize_t matched = - scan_table_start(input, len, cmark_parser_get_first_nonspace(parser)); cmark_node *table_header; table_row *header_row = NULL; table_row *marker_row = NULL; @@ -208,41 +233,37 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, const char *parent_string; uint16_t i; - if (!matched) - return parent_container; - - parent_string = cmark_node_get_string_content(parent_container); - - cmark_arena_push(); - - header_row = row_from_string(self, parser, (unsigned char *)parent_string, - (int)strlen(parent_string)); - - if (!header_row) { - free_table_row(parser->mem, header_row); - cmark_arena_pop(); + if (!scan_table_start(input, len, cmark_parser_get_first_nonspace(parser))) { return parent_container; } + // Since scan_table_start was successful, we must have a marker row. marker_row = row_from_string(self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); - assert(marker_row); - if (header_row->n_columns != marker_row->n_columns) { - free_table_row(parser->mem, header_row); + cmark_arena_push(); + + // Check for a matching header row. We call `row_from_string` with the entire + // (potentially long) parent container as input, but this should be safe since + // `row_from_string` bails out early if it does not find a row. + parent_string = cmark_node_get_string_content(parent_container); + header_row = row_from_string(self, parser, (unsigned char *)parent_string, + (int)strlen(parent_string)); + if (!header_row || header_row->n_columns != marker_row->n_columns) { free_table_row(parser->mem, marker_row); + free_table_row(parser->mem, header_row); cmark_arena_pop(); return parent_container; } if (cmark_arena_pop()) { + marker_row = row_from_string( + self, parser, input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); header_row = row_from_string(self, parser, (unsigned char *)parent_string, (int)strlen(parent_string)); - marker_row = row_from_string(self, parser, - input + cmark_parser_get_first_nonspace(parser), - len - cmark_parser_get_first_nonspace(parser)); } if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) { @@ -257,9 +278,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, } cmark_node_set_syntax_extension(parent_container, self); - parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table)); - set_n_table_columns(parent_container, header_row->n_columns); uint8_t *alignments =