From c5bc07b25d04f6ada65f0654c96a41a9ea649ebd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Rasmusson?= Date: Sat, 15 Apr 2017 18:17:50 +0200 Subject: [PATCH 1/4] gherkin: (C) Use UTF-16 when wchar_t is of 2 bytes size. On Windows wchar_t is 2 bytes large, and use UTF-16. This means that for the case of code points > 0xFFFF (and wchar_t is only 2 bytes large), the code point read from the UTF-8 source need to be converted to two UTF-16 surrogates (wchar_t wide characters). --- gherkin/c/src/Makefile | 4 +- gherkin/c/src/compiler.c | 5 +- gherkin/c/src/file_reader.c | 34 ++++++++--- gherkin/c/src/file_token_scanner.c | 38 ++++++++---- gherkin/c/src/gherkin_line.c | 4 +- gherkin/c/src/string_utilities.c | 39 ++++++++++-- gherkin/c/src/string_utilities.h | 4 ++ gherkin/c/src/token_queue.h | 1 + gherkin/c/src/unicode_utilities.c | 98 ++++++++++++++++++++++++++++++ gherkin/c/src/unicode_utilities.h | 31 ++++++++++ gherkin/c/src/utf8_utilities.c | 34 ----------- gherkin/c/src/utf8_utilities.h | 18 ------ 12 files changed, 225 insertions(+), 85 deletions(-) create mode 100644 gherkin/c/src/unicode_utilities.c create mode 100644 gherkin/c/src/unicode_utilities.h delete mode 100644 gherkin/c/src/utf8_utilities.c delete mode 100644 gherkin/c/src/utf8_utilities.h diff --git a/gherkin/c/src/Makefile b/gherkin/c/src/Makefile index a3b1e1baa47..0fa61f35102 100644 --- a/gherkin/c/src/Makefile +++ b/gherkin/c/src/Makefile @@ -42,8 +42,8 @@ UTILITIES_OBJS= \ ../objs/file_utf8_source.o \ ../objs/print_utilities.o \ ../objs/string_utilities.o \ - ../objs/utf8_source.o \ - ../objs/utf8_utilities.o + ../objs/unicode_utilities.o \ + ../objs/utf8_source.o -include $(UTILITIES_OBJS:.o=.d) PARSER_OBJS= \ diff --git a/gherkin/c/src/compiler.c b/gherkin/c/src/compiler.c index dbd9330bc66..f05630e04f9 100644 --- a/gherkin/c/src/compiler.c +++ b/gherkin/c/src/compiler.c @@ -9,6 +9,7 @@ #include "pickle_table.h" #include "pickle_tag.h" #include "pickle_string.h" +#include "string_utilities.h" #include typedef struct Compiler { @@ -103,7 +104,7 @@ int Compiler_compile(Compiler* compiler, const GherkinDocument* gherkin_document } int j; for (j = 0; j < scenario_outline->steps->step_count; ++j) { - int column_offset = scenario_outline->steps->steps[j].keyword ? wcslen(scenario_outline->steps->steps[j].keyword) : 0; + int column_offset = scenario_outline->steps->steps[j].keyword ? StringUtilities_code_point_length(scenario_outline->steps->steps[j].keyword) : 0; const PickleLocations* step_locations = PickleLocations_new_double(table_row->location.line, table_row->location.column, scenario_outline->steps->steps[j].location.line, scenario_outline->steps->steps[j].location.column + column_offset); const PickleStep* step = expand_outline_step(&scenario_outline->steps->steps[j], example_table->table_header, table_row, step_locations); PickleStep_transfer(&steps->steps[background_step_count + j], (PickleStep*)step); @@ -225,7 +226,7 @@ static void copy_tags(PickleTag* destination_array, const Tags* source) { static void copy_steps(PickleStep* destination_array, const Steps* source) { int i; for (i = 0; i < source->step_count; ++i) { - int column_offset = source->steps[i].keyword ? wcslen(source->steps[i].keyword) : 0; + int column_offset = source->steps[i].keyword ? StringUtilities_code_point_length(source->steps[i].keyword) : 0; const PickleLocations* step_locations = PickleLocations_new_single(source->steps[i].location.line, source->steps[i].location.column + column_offset); const PickleArgument* argument = create_pickle_argument(source->steps[i].argument, 0, 0); const PickleStep* step = PickleStep_new(step_locations, source->steps[i].text, argument); diff --git a/gherkin/c/src/file_reader.c b/gherkin/c/src/file_reader.c index ee40f496be4..7143128b671 100644 --- a/gherkin/c/src/file_reader.c +++ b/gherkin/c/src/file_reader.c @@ -1,12 +1,14 @@ #include "file_reader.h" #include "file_utf8_source.h" -#include "utf8_utilities.h" +#include "unicode_utilities.h" #include typedef struct FileReader { const char* file_name; } FileReader; +static void extend_buffer_if_needed(wchar_t** buffer, int* buffer_size, int pos); + FileReader* FileReader_new(const char* const file_name) { FileReader* file_reader = (FileReader*)malloc(sizeof(FileReader)); file_reader->file_name = file_name; @@ -17,19 +19,24 @@ const wchar_t* FileReader_read(FileReader* file_reader) { int buffer_size = 256; wchar_t* buffer = (wchar_t*)malloc(buffer_size * sizeof(wchar_t)); int pos = 0; - wchar_t c; - FILE* file = fopen(file_reader->file_name, "r"); + long code_point; + FILE* file = fopen(file_reader->file_name, "rb"); Utf8Source* utf8_source = FileUtf8Source_new(file); do { - c = Utf8Utilities_read_wchar_from_utf8_source(utf8_source); - if (c != WEOF) { - buffer[pos++] = c; - if (pos >= buffer_size - 1) { - buffer_size *= 2; - buffer = (wchar_t*)realloc(buffer, buffer_size * sizeof(wchar_t)); + code_point = UnicodeUtilities_read_code_point_from_utf8_source(utf8_source); + if (code_point != WEOF) { + if (code_point <= 0xFFFF || sizeof(wchar_t) > 2) { + buffer[pos++] = (wchar_t)code_point; + extend_buffer_if_needed(&buffer, &buffer_size, pos); + } else { + Utf16Surrogates surrogates = UnicodeUtilities_get_utf16_surrogates(code_point); + buffer[pos++] = surrogates.leading; + extend_buffer_if_needed(&buffer, &buffer_size, pos); + buffer[pos++] = surrogates.trailing; + extend_buffer_if_needed(&buffer, &buffer_size, pos); } } - } while (c != WEOF); + } while (code_point != WEOF); buffer[pos] = L'\0'; Utf8Source_delete(utf8_source); fclose(file); @@ -42,3 +49,10 @@ void FileReader_delete(FileReader* file_reader) { } free((void*)file_reader); } + +static void extend_buffer_if_needed(wchar_t** buffer, int* buffer_size, int pos) { + if (pos >= *buffer_size - 1) { + *buffer_size *= 2; + *buffer = (wchar_t*)realloc(*buffer, *buffer_size * sizeof(wchar_t)); + } +} diff --git a/gherkin/c/src/file_token_scanner.c b/gherkin/c/src/file_token_scanner.c index 98d2a1d841b..d0648c4d538 100644 --- a/gherkin/c/src/file_token_scanner.c +++ b/gherkin/c/src/file_token_scanner.c @@ -2,7 +2,7 @@ #include "file_utf8_source.h" #include "gherkin_line.h" #include "string_utilities.h" -#include "utf8_utilities.h" +#include "unicode_utilities.h" #include typedef struct FileTokenScanner { @@ -16,6 +16,8 @@ typedef struct FileTokenScanner { static Token* FileTokenScanner_read(TokenScanner* token_scanner); +static void extend_buffer_if_needed(FileTokenScanner* token_scanner, int pos); + static void FileTokenScanner_delete(TokenScanner* token_scanner); TokenScanner* FileTokenScanner_new(const char* const file_name) { @@ -24,7 +26,7 @@ TokenScanner* FileTokenScanner_new(const char* const file_name) { token_scanner->token_scanner.delete = &FileTokenScanner_delete; token_scanner->line = 0; token_scanner->file = 0; - token_scanner->file = fopen(file_name, "r"); + token_scanner->file = fopen(file_name, "rb"); token_scanner->utf8_source = FileUtf8Source_new(token_scanner->file); token_scanner->buffer_size = 128; token_scanner->buffer = (wchar_t*)malloc(token_scanner->buffer_size * sizeof(wchar_t)); @@ -51,18 +53,23 @@ static Token* FileTokenScanner_read(TokenScanner* token_scanner) { if (feof(file_token_scanner->file)) return Token_new(0, file_token_scanner->line); int pos = 0; - wchar_t c; + long code_point; do { - c = Utf8Utilities_read_wchar_from_utf8_source(file_token_scanner->utf8_source); - if (c != WEOF && c != L'\r' && c != L'\n') { - file_token_scanner->buffer[pos++] = c; - if (pos >= file_token_scanner->buffer_size - 1) { - file_token_scanner->buffer_size *= 2; - file_token_scanner->buffer = (wchar_t*)realloc(file_token_scanner->buffer, file_token_scanner->buffer_size * sizeof(wchar_t)); + code_point = UnicodeUtilities_read_code_point_from_utf8_source(file_token_scanner->utf8_source); + if (code_point != WEOF && code_point != L'\r' && code_point != L'\n') { + if (code_point <= 0xFFFF || sizeof(wchar_t) > 2) { + file_token_scanner->buffer[pos++] = (wchar_t)code_point; + extend_buffer_if_needed(file_token_scanner, pos); + } else { + Utf16Surrogates surrogates = UnicodeUtilities_get_utf16_surrogates(code_point); + file_token_scanner->buffer[pos++] = surrogates.leading; + extend_buffer_if_needed(file_token_scanner, pos); + file_token_scanner->buffer[pos++] = surrogates.trailing; + extend_buffer_if_needed(file_token_scanner, pos); } } - } while (c != WEOF && c != L'\r' && c != L'\n'); - if (c == L'\r') { + } while (code_point != WEOF && code_point != L'\r' && code_point != L'\n'); + if (code_point == L'\r') { unsigned char next_char = fgetc(file_token_scanner->file); if (next_char != L'\n') { ungetc(next_char, file_token_scanner->file); @@ -70,7 +77,7 @@ static Token* FileTokenScanner_read(TokenScanner* token_scanner) { } file_token_scanner->buffer[pos] = L'\0'; const GherkinLine* line; - if (c != WEOF || pos != 0) { + if (code_point != WEOF || pos != 0) { wchar_t* text = StringUtilities_copy_string_part(file_token_scanner->buffer, pos); line = GherkinLine_new(text, file_token_scanner->line); } @@ -78,3 +85,10 @@ static Token* FileTokenScanner_read(TokenScanner* token_scanner) { line = (GherkinLine*)0; return Token_new(line, file_token_scanner->line); } + +static void extend_buffer_if_needed(FileTokenScanner* file_token_scanner, int pos){ + if (pos >= file_token_scanner->buffer_size - 1) { + file_token_scanner->buffer_size *= 2; + file_token_scanner->buffer = (wchar_t*)realloc(file_token_scanner->buffer, file_token_scanner->buffer_size * sizeof(wchar_t)); + } +} diff --git a/gherkin/c/src/gherkin_line.c b/gherkin/c/src/gherkin_line.c index 096cefad201..f35d1c2a273 100644 --- a/gherkin/c/src/gherkin_line.c +++ b/gherkin/c/src/gherkin_line.c @@ -215,7 +215,7 @@ static const wchar_t* populate_cell_data(Span* item, const wchar_t* start_pos, i ++current_pos; while (end_text > current_pos && *(end_text - 1) == L' ') --end_text; - item->column = start_indent + (current_pos - start_pos) + 1; + item->column = start_indent + StringUtilities_code_point_length_for_part(start_pos, current_pos - start_pos) + 1; int text_length = end_text - current_pos; wchar_t* text = StringUtilities_copy_string_part(current_pos, text_length); const wchar_t* from = text; @@ -251,7 +251,7 @@ static const wchar_t* populate_tag_data(Span* item, const wchar_t* start_pos, in const wchar_t* end_text = end_pos; while (end_text > current_pos && *(end_text - 1) == L' ') --end_text; - item->column = start_indent + (current_pos - start_pos) + 1; + item->column = start_indent + StringUtilities_code_point_length_for_part(start_pos, current_pos - start_pos) + 1; int text_length = end_text - current_pos; if (text_length > 0) { item->text = StringUtilities_copy_string_part(current_pos, text_length); diff --git a/gherkin/c/src/string_utilities.c b/gherkin/c/src/string_utilities.c index 338b19de581..db6a4997042 100644 --- a/gherkin/c/src/string_utilities.c +++ b/gherkin/c/src/string_utilities.c @@ -1,5 +1,5 @@ #include "string_utilities.h" -#include "utf8_utilities.h" +#include "unicode_utilities.h" #include #include #include @@ -32,19 +32,48 @@ wchar_t* StringUtilities_copy_to_wide_string(const char* string) { int length = strlen(string); wchar_t* copy = (wchar_t*)malloc((length + 1) * sizeof(wchar_t)); Utf8Source* utf8_source = StringUtf8Source_new(string); + int to_index = 0; int i; for (i = 0; i < length; ++i) { - wchar_t c = Utf8Utilities_read_wchar_from_utf8_source(utf8_source); - if (c == WEOF) { + long code_point = UnicodeUtilities_read_code_point_from_utf8_source(utf8_source); + if (code_point == WEOF) { break; } - copy[i] = c; + if (code_point <= 0xFFFF || sizeof(wchar_t) > 2) { + copy[to_index++] = (wchar_t)code_point; + } else { + Utf16Surrogates surrogates = UnicodeUtilities_get_utf16_surrogates(code_point); + copy[to_index++] = surrogates.leading; + copy[to_index++] = surrogates.trailing; + } } - copy[i] = L'\0'; + copy[to_index] = L'\0'; Utf8Source_delete(utf8_source); return copy; } +size_t StringUtilities_code_point_length(const wchar_t* string) { + if (sizeof(wchar_t) > 2) { + return wcslen(string); + } else { + return StringUtilities_code_point_length_for_part(string, wcslen(string)); + } +} + +size_t StringUtilities_code_point_length_for_part(const wchar_t* string, const int length) { + int code_points = 0; + int i; + for (i = 0; i < length; ++i) { + ++code_points; + if (UnicodeUtilities_is_utf16_surrogate(string[i])) { + ++i; + } + + } + return code_points; +} + + Utf8Source* StringUtf8Source_new(const char* string) { StringUtf8Source* string_utf8_source = (StringUtf8Source*)malloc(sizeof(StringUtf8Source)); string_utf8_source->utf8_source.read = &StringUtf8Source_read; diff --git a/gherkin/c/src/string_utilities.h b/gherkin/c/src/string_utilities.h index 17e330dffd0..30472a5ea97 100644 --- a/gherkin/c/src/string_utilities.h +++ b/gherkin/c/src/string_utilities.h @@ -13,6 +13,10 @@ wchar_t* StringUtilities_copy_string_part(const wchar_t* string, const int lengt wchar_t* StringUtilities_copy_to_wide_string(const char* string); +size_t StringUtilities_code_point_length(const wchar_t* string); + +size_t StringUtilities_code_point_length_for_part(const wchar_t* string, const int length); + #ifdef __cplusplus } #endif diff --git a/gherkin/c/src/token_queue.h b/gherkin/c/src/token_queue.h index eaf1513c5c1..68a0cba2cf0 100644 --- a/gherkin/c/src/token_queue.h +++ b/gherkin/c/src/token_queue.h @@ -1,6 +1,7 @@ #ifndef GHERKIN_TOKEN_QUEUE_H_ #define GHERKIN_TOKEN_QUEUE_H_ +#include #include "token.h" #ifdef __cplusplus diff --git a/gherkin/c/src/unicode_utilities.c b/gherkin/c/src/unicode_utilities.c new file mode 100644 index 00000000000..c83bbdd9ae4 --- /dev/null +++ b/gherkin/c/src/unicode_utilities.c @@ -0,0 +1,98 @@ +#include "unicode_utilities.h" + +long UnicodeUtilities_read_code_point_from_utf8_source(Utf8Source* utf8_source) { + unsigned char c = Utf8Source_read(utf8_source); + if (c < 0x80) { + return (long)c; + } + unsigned char c2 = Utf8Source_read(utf8_source); + long lower_part = (long)(c2 & 0x3F); + if ((c & 0xE0) == 0xC0) { + return (((long)(c & 0x1F)) << 6) | lower_part; + } + c2 = Utf8Source_read(utf8_source); + lower_part = (lower_part << 6) | (long)(c2 & 0x3F); + if ((c & 0xF0) == 0xE0) { + return (((long)(c & 0x0F)) << 12) | lower_part; + } + c2 = Utf8Source_read(utf8_source); + lower_part = (lower_part << 6) | (long)(c2 & 0x3F); + if ((c & 0xF8) == 0xF0) { + return (((long)(c & 0x07)) << 18) | lower_part; + } + c2 = Utf8Source_read(utf8_source); + lower_part = (lower_part << 6) | (long)(c2 & 0x3F); + if ((c & 0xFC) == 0xF8) { + return (((long)(c & 0x03)) << 24) | lower_part; + } + c2 = Utf8Source_read(utf8_source); + lower_part = (lower_part << 6) | (long)(c2 & 0x3F); + if ((c & 0xFE) == 0xFC) { + return (((long)(c & 0x01)) << 30) | lower_part; + } + return WEOF; +} + +Utf16Surrogates UnicodeUtilities_get_utf16_surrogates(long code_point){ + Utf16Surrogates surrogates; + long surrogates_base = code_point - 0x10000; + surrogates.leading = 0xD800 + (surrogates_base >> 10); + surrogates.trailing = 0xDC00 + (surrogates_base & 0x3FF); + return surrogates; +} + +int UnicodeUtilities_print_wide_character_to_utf8_file(FILE* file, const wchar_t* text, int pos) { + long code_point; + if (!UnicodeUtilities_is_utf16_surrogate(text[pos]) || sizeof(wchar_t) > 2) { + code_point = (long)text[pos]; + } else { + long leading_surrogate = (long)text[pos++]; + long trailing_surrogate = (long)text[pos]; + code_point = 0x10000 + ((leading_surrogate - 0xD800) << 10) + (trailing_surrogate - 0xDC00); + } + print_code_point_to_utf8_file(file, code_point); + return pos; +} + +bool UnicodeUtilities_is_utf16_surrogate(const wchar_t wide_char) { + return wide_char >= 0xD800 && wide_char < 0xE000; +} + +void print_code_point_to_utf8_file(FILE* file, long code_point) { + int trailing_bytes; + if (code_point < 0x80) { + fputc((char)code_point, file); + return; + } else if (code_point < 0x800) { + fputc((char)(0xC0 | ((code_point & 0x7C0) >> 6)), file); + trailing_bytes = 1; + } else if (code_point < 0x10000) { + fputc((char)(0xE0 | ((code_point & 0xF000) >> 12)), file); + trailing_bytes = 2; + } else if (code_point < 0x200000) { + fputc((char)(0xF0 | ((code_point & 0x1C0000) >> 18)), file); + trailing_bytes = 3; + } else if (code_point < 0x4000000) { + fputc((char)(0xF8 | ((code_point & 0x3000000) >> 24)), file); + trailing_bytes = 4; + } else { + fputc((char)(0xFC | ((code_point & 0x40000000) >> 30)), file); + trailing_bytes = 5; + } + switch (trailing_bytes) { + case 5: + fputc((char)(0x80 | ((code_point & 0x3F000000) >> 24)), file); + /* fall through */ + case 4: + fputc((char)(0x80 | ((code_point & 0xFC0000) >> 18)), file); + /* fall through */ + case 3: + fputc((char)(0x80 | ((code_point & 0x3F000) >> 12)), file); + /* fall through */ + case 2: + fputc((char)(0x80 | ((code_point & 0xFC0) >> 6)), file); + /* fall through */ + case 1: + fputc((char)(0x80 | (code_point & 0x3F)), file); + } +} diff --git a/gherkin/c/src/unicode_utilities.h b/gherkin/c/src/unicode_utilities.h new file mode 100644 index 00000000000..006b69588cb --- /dev/null +++ b/gherkin/c/src/unicode_utilities.h @@ -0,0 +1,31 @@ +#ifndef GHERKIN_UNICODE_UTILITIES_H_ +#define GHERKIN_UNICODE_UTILITIES_H_ + +#include +#include +#include + +#include "utf8_source.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct Utf16Surrogates { + wchar_t leading; + wchar_t trailing; +} Utf16Surrogates; + +long UnicodeUtilities_read_code_point_from_utf8_source(Utf8Source* utf8_source); + +Utf16Surrogates UnicodeUtilities_get_utf16_surrogates(long code_point); + +int UnicodeUtilities_print_wide_character_to_utf8_file(FILE* file, const wchar_t* text, int pos); + +bool UnicodeUtilities_is_utf16_surrogate(const wchar_t wide_char); + +#ifdef __cplusplus +} +#endif + +#endif /* GHERKIN_UNICODE_UTILITIES_H_ */ diff --git a/gherkin/c/src/utf8_utilities.c b/gherkin/c/src/utf8_utilities.c deleted file mode 100644 index 1fc1eee30db..00000000000 --- a/gherkin/c/src/utf8_utilities.c +++ /dev/null @@ -1,34 +0,0 @@ -#include "utf8_utilities.h" - -wchar_t Utf8Utilities_read_wchar_from_utf8_source(Utf8Source* utf8_source) { - unsigned char c = Utf8Source_read(utf8_source); - if (c < 0x80) { - return (wchar_t)c; - } - unsigned char c2 = Utf8Source_read(utf8_source); - wchar_t lower_part = (wchar_t)(c2 & 0x3F); - if ((c & 0xE0) == 0xC0) { - return (((wchar_t)(c & 0x1F)) << 6) | lower_part; - } - c2 = Utf8Source_read(utf8_source); - lower_part = (lower_part << 6) | (wchar_t)(c2 & 0x3F); - if ((c & 0xF0) == 0xE0) { - return (((wchar_t)(c & 0x0F)) << 12) | lower_part; - } - c2 = Utf8Source_read(utf8_source); - lower_part = (lower_part << 6) | (wchar_t)(c2 & 0x3F); - if ((c & 0xF8) == 0xF0) { - return (((wchar_t)(c & 0x07)) << 18) | lower_part; - } - c2 = Utf8Source_read(utf8_source); - lower_part = (lower_part << 6) | (wchar_t)(c2 & 0x3F); - if ((c & 0xFC) == 0xF8) { - return (((wchar_t)(c & 0x03)) << 24) | lower_part; - } - c2 = Utf8Source_read(utf8_source); - lower_part = (lower_part << 6) | (wchar_t)(c2 & 0x3F); - if ((c & 0xFE) == 0xFC) { - return (((wchar_t)(c & 0x01)) << 30) | lower_part; - } - return WEOF; -} diff --git a/gherkin/c/src/utf8_utilities.h b/gherkin/c/src/utf8_utilities.h deleted file mode 100644 index 5a2db1b4a4e..00000000000 --- a/gherkin/c/src/utf8_utilities.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef GHERKIN_UFT8_UTILITIES_H_ -#define GHERKIN_UFT8_UTILITIES_H_ - -#include - -#include "utf8_source.h" - -#ifdef __cplusplus -extern "C" { -#endif - -wchar_t Utf8Utilities_read_wchar_from_utf8_source(Utf8Source* utf8_source); - -#ifdef __cplusplus -} -#endif - -#endif /* GHERKIN_UFT8_UTILITIES_H_ */ From c1528c63536abc53c76844c4388bb037af2c564b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Rasmusson?= Date: Mon, 17 Apr 2017 17:00:07 +0200 Subject: [PATCH 2/4] gherkin: (C) Always print feature file data using UTF-8. Windows does not use UTF-8 by default. To make sure that UTF-8 is used in the output, do manual conversion from unicode/UTF-16 to an UTF-8 byte sequence for output. It seems safer that to try to add Windows specific code to set UTF-8 output on that platform. --- gherkin/c/src/ast_printer.c | 41 ++++++++++++++++++------- gherkin/c/src/attachment_event.c | 11 ++++--- gherkin/c/src/gherkin_document_event.c | 6 ++-- gherkin/c/src/pickle_event.c | 6 ++-- gherkin/c/src/pickle_printer.c | 9 ++++-- gherkin/c/src/print_utilities.c | 22 ++++++++----- gherkin/c/src/print_utilities.h | 2 ++ gherkin/c/src/source_event.c | 5 +-- gherkin/c/src/token_formatter_builder.c | 35 +++++++++++++-------- gherkin/c/src/unicode_utilities.c | 2 ++ 10 files changed, 96 insertions(+), 43 deletions(-) diff --git a/gherkin/c/src/ast_printer.c b/gherkin/c/src/ast_printer.c index e255af56bd7..e7bf99831a7 100644 --- a/gherkin/c/src/ast_printer.c +++ b/gherkin/c/src/ast_printer.c @@ -93,7 +93,9 @@ static void print_doc_string(FILE* file, const DocString* doc_string) { fprintf(file, "{\"type\":\"%ls\",", ast_item_type_to_string(doc_string->type)); print_location(file, &doc_string->location); if (doc_string->content_type) { - fprintf(file, "\"contentType\":\"%ls\",", doc_string->content_type); + fprintf(file, "\"contentType\":\""); + PrintUtilities_print_json_string(file, doc_string->content_type); + fprintf(file, "\","); } fprintf(file, "\"content\":\""); if (doc_string->content) { @@ -102,11 +104,23 @@ static void print_doc_string(FILE* file, const DocString* doc_string) { fprintf(file, "\"}"); } +static void print_keyword(FILE* file, const wchar_t* keyword) { + fprintf(file, "\"keyword\":\""); + PrintUtilities_print_json_string(file, keyword); + fprintf(file, "\","); +} + +static void print_text(FILE* file, const wchar_t* text) { + fprintf(file, "\"text\":\""); + PrintUtilities_print_json_string(file, text); + fprintf(file, "\""); +} + static void print_step(FILE* file, const Step* step) { fprintf(file, "{\"type\":\"%ls\",", ast_item_type_to_string(step->type)); print_location(file, &step->location); - fprintf(file, "\"keyword\":\"%ls\",", step->keyword); - fprintf(file, "\"text\":\"%ls\"", step->text); + print_keyword(file, step->keyword); + print_text(file, step->text); if (step->argument) { fprintf(file, ",\"argument\":"); if (step->argument->type == Gherkin_DataTable) { @@ -136,7 +150,7 @@ static void print_description(FILE* file, const wchar_t* description) { static void print_background(FILE* file, const Background* background) { fprintf(file, "{\"type\":\"%ls\",", ast_item_type_to_string(background->type)); print_location(file, &background->location); - fprintf(file, "\"keyword\":\"%ls\",", background->keyword); + print_keyword(file, background->keyword); print_name(file, background->name); print_description(file, background->description); fprintf(file, "\"steps\":["); @@ -153,7 +167,9 @@ static void print_background(FILE* file, const Background* background) { static void print_tag(FILE* file, const Tag* tag) { fprintf(file, "{\"type\":\"%ls\",", ast_item_type_to_string(tag->type)); print_location(file, &tag->location); - fprintf(file, "\"name\":\"%ls\"}", tag->name); + fprintf(file, "\"name\":\""); + PrintUtilities_print_json_string(file, tag->name); + fprintf(file, "\"}"); } static void print_scenario(FILE* file, const Scenario* scenario) { @@ -168,7 +184,7 @@ static void print_scenario(FILE* file, const Scenario* scenario) { } fprintf(file, "],"); print_location(file, &scenario->location); - fprintf(file, "\"keyword\":\"%ls\",", scenario->keyword); + print_keyword(file, scenario->keyword); print_name(file, scenario->name); print_description(file, scenario->description); fprintf(file, "\"steps\":["); @@ -185,7 +201,7 @@ static void print_example_table(FILE* file, const ExampleTable* example_table) { fprintf(file, "{\"type\":\"%ls\",", ast_item_type_to_string(example_table->type)); print_location(file, &example_table->location); print_description(file, example_table->description); - fprintf(file, "\"keyword\":\"%ls\",", example_table->keyword); + print_keyword(file, example_table->keyword); print_name(file, example_table->name); fprintf(file, "\"tags\":["); int i; @@ -227,7 +243,7 @@ static void print_scenario_outline(FILE* file, const ScenarioOutline* scenario_o } fprintf(file, "],"); print_location(file, &scenario_outline->location); - fprintf(file, "\"keyword\":\"%ls\",", scenario_outline->keyword); + print_keyword(file, scenario_outline->keyword); print_name(file, scenario_outline->name); print_description(file, scenario_outline->description); fprintf(file, "\"steps\":["); @@ -251,7 +267,8 @@ static void print_scenario_outline(FILE* file, const ScenarioOutline* scenario_o static void print_comment(FILE* file, const Comment* comment) { fprintf(file, "{\"type\":\"%ls\",", ast_item_type_to_string(comment->type)); print_location(file, &comment->location); - fprintf(file, "\"text\":\"%ls\"}", comment->text); + print_text(file, comment->text); + fprintf(file, "}"); } void print_feature(FILE* file, const Feature* feature) { @@ -267,8 +284,10 @@ void print_feature(FILE* file, const Feature* feature) { } fprintf(file, "],"); print_location(file, &feature->location); - fprintf(file, "\"language\":\"%ls\",", feature->language); - fprintf(file, "\"keyword\":\"%ls\",", feature->keyword); + fprintf(file, "\"language\":\""); + PrintUtilities_print_json_string(file, feature->language); + fprintf(file, "\","); + print_keyword(file, feature->keyword); print_name(file, feature->name); print_description(file, feature->description); fprintf(file, "\"children\":["); diff --git a/gherkin/c/src/attachment_event.c b/gherkin/c/src/attachment_event.c index 2476dac0334..4eff8f91534 100644 --- a/gherkin/c/src/attachment_event.c +++ b/gherkin/c/src/attachment_event.c @@ -1,4 +1,5 @@ #include "attachment_event.h" +#include "print_utilities.h" #include "string_utilities.h" #include #include @@ -46,12 +47,14 @@ static void AttachmentEvent_print(const Event* event, FILE* file) { } const AttachmentEvent* attachment_event = (const AttachmentEvent*)event; fprintf(file, "{"); - fprintf(file, "\"data\":\"%ls\",", attachment_event->data); - fprintf(file, "\"media\":{\"encoding\":\"utf-8\",\"type\":\"text/vnd.cucumber.stacktrace+plain\"},"); + fprintf(file, "\"data\":\""); + PrintUtilities_print_json_string(file, attachment_event->data); + fprintf(file, "\",\"media\":{\"encoding\":\"utf-8\",\"type\":\"text/vnd.cucumber.stacktrace+plain\"},"); fprintf(file, "\"source\":{\"start\":"); fprintf(file, "{\"line\":%d,", attachment_event->location.line); fprintf(file, "\"column\":%d},", attachment_event->location.column); - fprintf(file, "\"uri\":\"%ls\"},", attachment_event->uri); - fprintf(file, "\"type\":\"attachment\""); + fprintf(file, "\"uri\":\""); + PrintUtilities_print_json_string(file, attachment_event->uri); + fprintf(file, "\"},\"type\":\"attachment\""); fprintf(file, "}\n"); } diff --git a/gherkin/c/src/gherkin_document_event.c b/gherkin/c/src/gherkin_document_event.c index 3099c9946bc..6f39611332b 100644 --- a/gherkin/c/src/gherkin_document_event.c +++ b/gherkin/c/src/gherkin_document_event.c @@ -1,5 +1,6 @@ #include "gherkin_document_event.h" #include "ast_printer.h" +#include "print_utilities.h" #include "string_utilities.h" #include #include @@ -42,8 +43,9 @@ static void GherkinDocumentEvent_print(const Event* event, FILE* file) { const GherkinDocumentEvent* gherkin_document_event = (const GherkinDocumentEvent*)event; fprintf(file, "{"); fprintf(file, "\"type\":\"gherkin-document\","); - fprintf(file, "\"uri\":\"%ls\",", gherkin_document_event->uri); - fprintf(file, "\"document\":"); + fprintf(file, "\"uri\":\""); + PrintUtilities_print_json_string(file, gherkin_document_event->uri); + fprintf(file, "\",\"document\":"); AstPrinter_print_gherkin_document(file, gherkin_document_event->gherkin_document); fprintf(file, "}\n"); } diff --git a/gherkin/c/src/pickle_event.c b/gherkin/c/src/pickle_event.c index e24ea8d7f9c..bf21ceb13ec 100644 --- a/gherkin/c/src/pickle_event.c +++ b/gherkin/c/src/pickle_event.c @@ -1,5 +1,6 @@ #include "pickle_event.h" #include "pickle_printer.h" +#include "print_utilities.h" #include "string_utilities.h" #include #include @@ -43,8 +44,9 @@ static void PickleEvent_print(const Event* event, FILE* file) { if (pickle_event) { fprintf(file, "{"); fprintf(file, "\"type\":\"pickle\","); - fprintf(file, "\"uri\":\"%ls\",", pickle_event->uri); - fprintf(file, "\"pickle\":"); + fprintf(file, "\"uri\":\""); + PrintUtilities_print_json_string(file, pickle_event->uri); + fprintf(file, "\",\"pickle\":"); PicklePrinter_print_pickle(file, pickle_event->pickle); fprintf(file, "}\n"); } diff --git a/gherkin/c/src/pickle_printer.c b/gherkin/c/src/pickle_printer.c index 9cd1f43afe0..9d7e90fc5b9 100644 --- a/gherkin/c/src/pickle_printer.c +++ b/gherkin/c/src/pickle_printer.c @@ -70,7 +70,9 @@ static void print_pickle_string(FILE* file, const PickleString* pickle_string) { static void print_tag(FILE* file, const PickleTag* tag) { fprintf(file, "{\"location\":"); print_location(file, &tag->location); - fprintf(file, ",\"name\":\"%ls\"}", tag->name); + fprintf(file, ",\"name\":\""); + PrintUtilities_print_json_string(file, tag->name); + fprintf(file, "\"}"); } static void print_pickle_step(FILE* file, const PickleStep* step) { @@ -86,8 +88,9 @@ static void print_pickle_step(FILE* file, const PickleStep* step) { } } fprintf(file, "],"); - fprintf(file, "\"text\":\"%ls\"", step->text); - fprintf(file, "}"); + fprintf(file, "\"text\":\""); + PrintUtilities_print_json_string(file, step->text); + fprintf(file, "\"}"); } void PicklePrinter_print_pickle(FILE* file, const Pickle* pickle) { diff --git a/gherkin/c/src/print_utilities.c b/gherkin/c/src/print_utilities.c index 0b4f2181da0..18b6f84365b 100644 --- a/gherkin/c/src/print_utilities.c +++ b/gherkin/c/src/print_utilities.c @@ -1,22 +1,30 @@ #include "print_utilities.h" +#include "unicode_utilities.h" void PrintUtilities_print_json_string(FILE* file, const wchar_t* text) { int i; for (i = 0; i < wcslen(text); ++i) { if (text[i] == L'\\' || text[i] == L'"') { - fprintf(file, "%lc", (wint_t)L'\\'); - fprintf(file, "%lc", (wint_t)text[i]); + fputc((char)'\\', file); + fputc((char)text[i], file); } else if (text[i] == L'\n') { - fprintf(file, "%lc", (wint_t)L'\\'); - fprintf(file, "%lc", (wint_t)L'n'); + fputc((char)'\\', file); + fputc((char)'n', file); } else if (text[i] == L'\r') { - fprintf(file, "%lc", (wint_t)L'\\'); - fprintf(file, "%lc", (wint_t)L'r'); + fputc((char)'\\', file); + fputc((char)'r', file); } else { - fprintf(file, "%lc", (wint_t)text[i]); + i = UnicodeUtilities_print_wide_character_to_utf8_file(file, text, i); } } } + +void PrintUtilities_print_wide_string(FILE* file, const wchar_t* text) { + int i; + for (i = 0; i < wcslen(text); ++i) { + i = UnicodeUtilities_print_wide_character_to_utf8_file(file, text, i); + } +} diff --git a/gherkin/c/src/print_utilities.h b/gherkin/c/src/print_utilities.h index 91d25db6fd4..61bad1ba755 100644 --- a/gherkin/c/src/print_utilities.h +++ b/gherkin/c/src/print_utilities.h @@ -10,6 +10,8 @@ extern "C" { void PrintUtilities_print_json_string(FILE* file, const wchar_t* text); +void PrintUtilities_print_wide_string(FILE* file, const wchar_t* text); + #ifdef __cplusplus } #endif diff --git a/gherkin/c/src/source_event.c b/gherkin/c/src/source_event.c index 8ef1cf54e33..6153db2da7c 100644 --- a/gherkin/c/src/source_event.c +++ b/gherkin/c/src/source_event.c @@ -43,8 +43,9 @@ static void SourceEvent_print(const Event* event, FILE* file) { fprintf(file, "{"); fprintf(file, "\"type\":\"source\","); fprintf(file, "\"media\":{\"encoding\":\"utf-8\",\"type\":\"text/vnd.cucumber.gherkin+plain\"},"); - fprintf(file, "\"uri\":\"%ls\",", source_event->uri); - fprintf(file, "\"data\":\""); + fprintf(file, "\"uri\":\""); + PrintUtilities_print_json_string(file, source_event->uri); + fprintf(file, "\",\"data\":\""); PrintUtilities_print_json_string(file, source_event->source); fprintf(file, "\"}\n"); } diff --git a/gherkin/c/src/token_formatter_builder.c b/gherkin/c/src/token_formatter_builder.c index 7324054fce2..335e2cdc938 100644 --- a/gherkin/c/src/token_formatter_builder.c +++ b/gherkin/c/src/token_formatter_builder.c @@ -1,4 +1,5 @@ #include "token_formatter_builder.h" +#include "print_utilities.h" #include #include @@ -40,31 +41,41 @@ void TokenFormatterBuilder_build(Builder* builder, Token* token) { fprintf(((TokenFormatterBuilder*)builder)->file, "%s\n", token_type_to_string(token->matched_type)); else if (token->matched_type == Token_TableRow || token->matched_type == Token_TagLine) { fprintf(((TokenFormatterBuilder*)builder)->file, - "(%d:%d)%s:%ls/%ls/", + "(%d:%d)%s:", token->location.line, token->location.column, - token_type_to_string(token->matched_type), - token->matched_keyword ? token->matched_keyword : L"", - token->matched_text ? token->matched_text : L""); + token_type_to_string(token->matched_type)); + PrintUtilities_print_wide_string(((TokenFormatterBuilder*)builder)->file, + token->matched_keyword ? token->matched_keyword : L""); + fprintf(((TokenFormatterBuilder*)builder)->file, "/"); + PrintUtilities_print_wide_string(((TokenFormatterBuilder*)builder)->file, + token->matched_text ? token->matched_text : L""); + fprintf(((TokenFormatterBuilder*)builder)->file, "/"); int i; for (i = 0; i < token->matched_items->count; ++i) { if (i != 0) fprintf(((TokenFormatterBuilder*)builder)->file, ","); fprintf(((TokenFormatterBuilder*)builder)->file, - "%d:%ls", - token->matched_items->items[i].column, - token->matched_items->items[i].text); + "%d:", + token->matched_items->items[i].column); + PrintUtilities_print_wide_string(((TokenFormatterBuilder*)builder)->file, + token->matched_items->items[i].text); } fprintf(((TokenFormatterBuilder*)builder)->file, "\n"); } - else + else { fprintf(((TokenFormatterBuilder*)builder)->file, - "(%d:%d)%s:%ls/%ls/\n", + "(%d:%d)%s:", token->location.line, token->location.column, - token_type_to_string(token->matched_type), - token->matched_keyword ? token->matched_keyword : L"", - token->matched_text ? token->matched_text : L""); + token_type_to_string(token->matched_type)); + PrintUtilities_print_wide_string(((TokenFormatterBuilder*)builder)->file, + token->matched_keyword ? token->matched_keyword : L""); + fprintf(((TokenFormatterBuilder*)builder)->file, "/"); + PrintUtilities_print_wide_string(((TokenFormatterBuilder*)builder)->file, + token->matched_text ? token->matched_text : L""); + fprintf(((TokenFormatterBuilder*)builder)->file, "/\n"); + } Token_delete(token); } diff --git a/gherkin/c/src/unicode_utilities.c b/gherkin/c/src/unicode_utilities.c index c83bbdd9ae4..8e3575b1ab4 100644 --- a/gherkin/c/src/unicode_utilities.c +++ b/gherkin/c/src/unicode_utilities.c @@ -1,5 +1,7 @@ #include "unicode_utilities.h" +static void print_code_point_to_utf8_file(FILE* file, long code_point); + long UnicodeUtilities_read_code_point_from_utf8_source(Utf8Source* utf8_source) { unsigned char c = Utf8Source_read(utf8_source); if (c < 0x80) { From 1183c9620bf9124dad424abda72824758b743e69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Rasmusson?= Date: Sun, 23 Apr 2017 15:27:38 +0200 Subject: [PATCH 3/4] gherkin: (C) Conform to C90 (mostly) To support a wider range of compilers, remove the C90 breaches so that the code compiles also with -ansi and/or -std=c90 (using gcc). Another reason to not use swprintf is that on many Windows compilers swprintf has a different signature than the ISO standard specification. When cleaning up commented out code in gherkin_generate_tokens.c (to remove the usage of // for comments), settle to use the FileTokenScanner to get coverage of it in the acceptance tests. The code does not compile with -std=c90 -pedantic (using gcc), the conformance to C90 is not taken that far. --- gherkin/c/src/Makefile | 2 +- gherkin/c/src/error_list.c | 151 ++++++++++++++++++------ gherkin/c/src/gherkin_cli.c | 2 - gherkin/c/src/gherkin_generate_tokens.c | 10 +- 4 files changed, 116 insertions(+), 49 deletions(-) diff --git a/gherkin/c/src/Makefile b/gherkin/c/src/Makefile index 0fa61f35102..eb03b7f4bd5 100644 --- a/gherkin/c/src/Makefile +++ b/gherkin/c/src/Makefile @@ -21,7 +21,7 @@ endif GENERATE_DEPS_FLAGS=-MMD -MP -MF $(basename $@).d AR_FLAGS=cr LD_FLAGS= -LD_LIBS= +LD_LIBS=-lm RM_CMD=rm -rf MKDIR_CMD=mkdir -p diff --git a/gherkin/c/src/error_list.c b/gherkin/c/src/error_list.c index 92badb41f3d..2bd00634622 100644 --- a/gherkin/c/src/error_list.c +++ b/gherkin/c/src/error_list.c @@ -2,6 +2,7 @@ #include "error.h" #include "item_queue.h" #include "token.h" +#include #include #include @@ -12,6 +13,14 @@ typedef struct ErrorList { jmp_buf* local_env; } ErrorList; +static int calculate_string_length_for_location(int line_width, int column_width); + +static int print_location_to_string(wchar_t* string, int pos, int line, int line_width, int column, int column_width); + +static int calculate_string_length_for_number(int number); + +static int print_number_to_string(wchar_t* string, int pos, int number, int number_width); + ErrorList* ErrorList_new() { ErrorList* error_list = (ErrorList*)malloc(sizeof(ErrorList)); error_list->errors = ItemQueue_new(); @@ -63,60 +72,87 @@ void ErrorList_add(ErrorList* error_list, const wchar_t* error_text, const Locat } void ErrorList_add_unexpected_eof_error(ErrorList* error_list, Token* received_token, const wchar_t* expected_tokens) { - const wchar_t* const message = L"unexpected end of file, expected: %ls"; - const int location_text_width = 11; // enough space for "(xxx:yyy): " to fit - const int message_length = wcslen(message) - 3 + wcslen(expected_tokens); - wchar_t* text = (wchar_t*)malloc((location_text_width + message_length + 1) * sizeof(wchar_t*)); - int actual_location_width = swprintf(text, location_text_width + message_length + 1, L"(%d:%d): ", received_token->location.line, received_token->location.column); - if (actual_location_width > location_text_width) { - text = (wchar_t*)realloc(text, (actual_location_width + message_length + 1) * sizeof(wchar_t*)); - } - swprintf(text + actual_location_width, message_length + 1, message, expected_tokens); + const wchar_t* const message = L"unexpected end of file, expected: "; + const int message_length = wcslen(message); + const int line_width = calculate_string_length_for_number(received_token->location.line); + const int column_width = calculate_string_length_for_number(received_token->location.column); + const int total_length = calculate_string_length_for_location(line_width, column_width) + message_length + wcslen(expected_tokens); + wchar_t* text = (wchar_t*)malloc((total_length + 1) * sizeof(wchar_t*)); + int pos = 0; + pos = print_location_to_string(text, pos, received_token->location.line, line_width, received_token->location.column, column_width); + wcscpy(text + pos, message); + pos += message_length; + wcscpy(text + pos, expected_tokens); + text[total_length] = L'\0'; ErrorList_add(error_list, text, received_token->location); } void ErrorList_add_unexpected_token_error(ErrorList* error_list, Token* received_token, const wchar_t* expected_tokens) { - const wchar_t* const message = L"expected: %ls, got '%ls'"; - const int location_text_width = 11; // enough space for "(xxx:yyy): " to fit - const int message_length = wcslen(message) - 6 + wcslen(expected_tokens) + wcslen(received_token->line->trimmed_line); - wchar_t* text = (wchar_t*)malloc((location_text_width + message_length + 1) * sizeof(wchar_t*)); + const wchar_t* const expected = L"expected: "; + const int expected_length = wcslen(expected); + const wchar_t* const got = L", got "; + const int got_length = wcslen(got); + const int expected_tokens_length = wcslen(expected_tokens); + const int received_tokens_length = wcslen(received_token->line->trimmed_line); + const int line_width = calculate_string_length_for_number(received_token->location.line); + const int column_width = calculate_string_length_for_number(received_token->location.column); + const int total_length = calculate_string_length_for_location(line_width, column_width) + expected_length + expected_tokens_length + got_length + received_tokens_length + 2; + wchar_t* text = (wchar_t*)malloc((total_length + 1) * sizeof(wchar_t*)); int column = received_token->location.column; if (column == 0) { column = received_token->line->indent + 1; } - int actual_location_width = swprintf(text, location_text_width + message_length + 1, L"(%d:%d): ", received_token->location.line, column); - if (actual_location_width > location_text_width) { - text = (wchar_t*)realloc(text, (actual_location_width + message_length + 1) * sizeof(wchar_t*)); - } - swprintf(text + actual_location_width, message_length + 1, message, expected_tokens, received_token->line->trimmed_line); + int pos = 0; + pos = print_location_to_string(text, pos, received_token->location.line, line_width, column, column_width); + wcscpy(text + pos, expected); + pos += expected_length; + wcscpy(text + pos, expected_tokens); + pos += expected_tokens_length; + wcscpy(text + pos, got); + pos += got_length; + text[pos++] = L'\''; + wcscpy(text + pos, received_token->line->trimmed_line); + pos += received_tokens_length; + text[pos++] = L'\''; + text[total_length] = L'\0'; Location location = {received_token->location.line, column}; ErrorList_add(error_list, text, location); } void ErrorList_add_no_such_language_error(ErrorList* error_list, Location* location, const wchar_t* language) { - const wchar_t* const message = L"Language not supported: %ls"; - const int location_text_width = 11; // enough space for "(xxx:yyy): " to fit - const int message_length = wcslen(message) - 3 + wcslen(language); - wchar_t* text = (wchar_t*)malloc((location_text_width + message_length + 1) * sizeof(wchar_t*)); - int actual_location_width = swprintf(text, location_text_width + message_length + 1, L"(%d:%d): ", location->line, location->column); - if (actual_location_width > location_text_width) { - text = (wchar_t*)realloc(text, (actual_location_width + message_length + 1) * sizeof(wchar_t*)); + const wchar_t* const message = L"Language not supported: "; + const int message_length = wcslen(message); + const int language_length = wcslen(language); + Location used_location = {-1, -1}; + if (location) { + used_location.line = location->line; + used_location.column = location->column; } - swprintf(text + actual_location_width, message_length + 1, message, language); - ErrorList_add(error_list, text, *location); + const int line_width = calculate_string_length_for_number(used_location.line); + const int column_width = calculate_string_length_for_number(used_location.column); + const int total_length = calculate_string_length_for_location(line_width, column_width) + message_length + language_length; + wchar_t* text = (wchar_t*)malloc((total_length + 1) * sizeof(wchar_t*)); + int pos = 0; + pos = print_location_to_string(text, pos, used_location.line, line_width, used_location.column, column_width); + wcscpy(text + pos, message); + pos += message_length; + wcscpy(text + pos, language); + text[total_length] = L'\0'; + ErrorList_add(error_list, text, used_location); ErrorList_jump_to_local_rescue_env(error_list); } void ErrorList_add_inconsisten_cell_count_error(ErrorList* error_list, Location location) { const wchar_t* const message = L"inconsistent cell count within the table"; - const int location_text_width = 11; // enough space for "(xxx:yyy): " to fit const int message_length = wcslen(message); - wchar_t* text = (wchar_t*)malloc((location_text_width + message_length + 1) * sizeof(wchar_t*)); - int actual_location_width = swprintf(text, location_text_width + message_length + 1, L"(%d:%d): ", location.line, location.column); - if (actual_location_width > location_text_width) { - text = (wchar_t*)realloc(text, (actual_location_width + message_length + 1) * sizeof(wchar_t*)); - } - wcscpy(text + actual_location_width, message); + const int line_width = calculate_string_length_for_number(location.line); + const int column_width = calculate_string_length_for_number(location.column); + const int total_length = calculate_string_length_for_location(line_width, column_width) + message_length; + wchar_t* text = (wchar_t*)malloc((total_length + 1) * sizeof(wchar_t*)); + int pos = 0; + pos = print_location_to_string(text, pos, location.line, line_width, location.column, column_width); + wcscpy(text + pos, message); + text[total_length] = L'\0'; ErrorList_add(error_list, text, location); ErrorList_jump_to_local_rescue_env(error_list); } @@ -132,10 +168,16 @@ void ErrorList_internal_grammar_error(ErrorList* error_list) { } void ErrorList_add_invalid_operation_error(ErrorList* error_list, int state) { - const wchar_t* const message = L"Unknown state: %d"; - const int message_length = wcslen(message) + 10; // some extra space for the state number - wchar_t* text = (wchar_t*)malloc((message_length + 1) * sizeof(wchar_t*)); - swprintf(text, message_length + 1, message, state); + const wchar_t* const message = L"Unknown state: "; + const int message_length = wcslen(message); + const int state_width = calculate_string_length_for_number(state); + const int total_length = message_length + state_width; + wchar_t* text = (wchar_t*)malloc((total_length + 1) * sizeof(wchar_t*)); + int pos = 0; + wcscpy(text + pos, message); + pos += message_length; + print_number_to_string(text, pos, state, state_width); + text[total_length] = L'\0'; Location location = {-1, -1}; ErrorList_add(error_list, text, location); } @@ -150,3 +192,36 @@ Error* ErrorList_next_error(ErrorList* error_list) { } return ErrorList_remove(error_list); } + +int calculate_string_length_for_location(int line_width, int column_width) { + return line_width + column_width + 5; /* "(:= 0; --i) { + string[pos + i] = L'0' + ((number / divisor) % 10); + divisor *= 10; + } + return pos + number_width; +} diff --git a/gherkin/c/src/gherkin_cli.c b/gherkin/c/src/gherkin_cli.c index cd210cbce7b..53fa733b250 100644 --- a/gherkin/c/src/gherkin_cli.c +++ b/gherkin/c/src/gherkin_cli.c @@ -7,7 +7,6 @@ #include "file_reader.h" #include "string_token_scanner.h" -//#include "file_token_scanner.h" #include "token_matcher.h" #include "parser.h" #include "ast_builder.h" @@ -75,7 +74,6 @@ int main(int argc, char** argv) { Event_print((const Event*)source_event, stdout); } TokenScanner* token_scanner = StringTokenScanner_new(source_event->source); - //TokenScanner* token_scanner = FileTokenScanner_new(argv[i]); result_code = Parser_parse(parser, token_matcher, token_scanner); Event_delete((const Event*)source_event); if (result_code == 0) { diff --git a/gherkin/c/src/gherkin_generate_tokens.c b/gherkin/c/src/gherkin_generate_tokens.c index 374165340ba..d528219aadd 100644 --- a/gherkin/c/src/gherkin_generate_tokens.c +++ b/gherkin/c/src/gherkin_generate_tokens.c @@ -1,8 +1,7 @@ #include #include #include "file_reader.h" -#include "string_token_scanner.h" -//#include "file_token_scanner.h" +#include "file_token_scanner.h" #include "token_matcher.h" #include "parser.h" #include "token_formatter_builder.h" @@ -11,16 +10,11 @@ int main(int argc, char** argv) { setlocale(LC_ALL, "en_US.UTF-8"); int i; for (i = 1; i < argc; ++i) { - FileReader* file_reader = FileReader_new(argv[i]); - const wchar_t* source = FileReader_read(file_reader); - FileReader_delete(file_reader); - TokenScanner* token_scanner = StringTokenScanner_new(source); - //TokenScanner* token_scanner = FileTokenScanner_new(argv[i]); + TokenScanner* token_scanner = FileTokenScanner_new(argv[i]); TokenMatcher* token_matcher = TokenMatcher_new(L"en"); Builder* builder = TokenFormatterBuilder_new(); Parser* parser = Parser_new(builder); Parser_parse(parser, token_matcher, token_scanner); - free((void*)source); Parser_delete(parser); TokenFormatterBuilder_delete(builder); TokenMatcher_delete(token_matcher); From 6cdd13ed513bfd8f6745916018b6603643867730 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Rasmusson?= Date: Sun, 23 Apr 2017 19:41:22 +0200 Subject: [PATCH 4/4] gherkin: (C) Avoid using repeated typedefs (support gcc #include @@ -46,11 +45,11 @@ typedef struct ParserContext { ErrorList* errors; } ParserContext; -typedef struct @Model.ParserClassName { +struct @Model.ParserClassName { ParserContext* parser_context; Builder* builder; ErrorList* errors; -} @Model.ParserClassName; +}; static Token* read_token(ParserContext* context); diff --git a/gherkin/c/include/builder.h b/gherkin/c/include/builder.h index 85f26c9f1a7..7457ea650cb 100644 --- a/gherkin/c/include/builder.h +++ b/gherkin/c/include/builder.h @@ -1,11 +1,10 @@ #ifndef GHERKIN_BUILDER_H_ #define GHERKIN_BUILDER_H_ +#include "error_list.h" #include "rule_type.h" #include "token.h" -typedef struct ErrorList ErrorList; - typedef struct Builder Builder; typedef void (*builder_reset_function) (Builder*); @@ -16,14 +15,12 @@ typedef void (*build_function) (Builder*, Token*); typedef void (*rule_function) (Builder*, RuleType); -typedef void (*rule_function) (Builder*, RuleType); - -typedef struct Builder { +struct Builder { builder_reset_function reset; builder_error_context_function set_error_context; build_function build; rule_function start_rule; rule_function end_rule; -} Builder; +}; #endif /* GHERKIN_BUILDER_H_ */ diff --git a/gherkin/c/include/compiler.h b/gherkin/c/include/compiler.h index ac55f6dfe91..b90fb32267a 100644 --- a/gherkin/c/include/compiler.h +++ b/gherkin/c/include/compiler.h @@ -3,6 +3,8 @@ #include #include +#include "gherkin_document.h" +#include "pickle.h" #ifdef __cplusplus extern "C" { @@ -10,10 +12,6 @@ extern "C" { typedef struct Compiler Compiler; -typedef struct GherkinDocument GherkinDocument; - -typedef struct Pickle Pickle; - Compiler* Compiler_new(); void Compiler_delete(Compiler* compiler); diff --git a/gherkin/c/include/event.h b/gherkin/c/include/event.h index 1dbea76e9f2..ef793e236bc 100644 --- a/gherkin/c/include/event.h +++ b/gherkin/c/include/event.h @@ -20,11 +20,11 @@ typedef enum EventType { Gherkin_PickleEvent } EventType; -typedef struct Event { +struct Event { event_delete_function event_delete; event_print_function event_print; EventType event_type; -} Event; +}; void Event_delete(const Event* event); diff --git a/gherkin/c/include/item.h b/gherkin/c/include/item.h index 36aa984ad1f..9e19bfe5e69 100644 --- a/gherkin/c/include/item.h +++ b/gherkin/c/include/item.h @@ -5,8 +5,8 @@ typedef struct Item Item; typedef void (*item_delete_function) (Item*); -typedef struct Item { +struct Item { item_delete_function item_delete; -} Item; +}; #endif /* GHERKIN_ITEM_H_ */ diff --git a/gherkin/c/include/parser.h b/gherkin/c/include/parser.h index 7b455655501..26ee333990e 100644 --- a/gherkin/c/include/parser.h +++ b/gherkin/c/include/parser.h @@ -1,7 +1,10 @@ #ifndef GHERKIN_PARSER_H_ #define GHERKIN_PARSER_H_ +#include "builder.h" #include "error.h" +#include "token_matcher.h" +#include "token_scanner.h" #include #include @@ -11,14 +14,6 @@ extern "C" { typedef struct Parser Parser; -typedef struct Builder Builder; - -typedef struct TokenMatcher TokenMatcher; - -typedef struct TokenScanner TokenScanner; - -typedef struct Feature Feature; - Parser* Parser_new(Builder* builder); void Parser_delete(Parser* parser); diff --git a/gherkin/c/include/token_matcher.h b/gherkin/c/include/token_matcher.h index e91a9f7610e..ce8409e34f7 100644 --- a/gherkin/c/include/token_matcher.h +++ b/gherkin/c/include/token_matcher.h @@ -2,23 +2,21 @@ #define GHERKIN_TOKEN_MATCHER_H_ #include +#include "dialect.h" +#include "error_list.h" #include "token.h" #ifdef __cplusplus extern "C" { #endif -typedef struct Dialect Dialect; - -typedef struct ErrorList ErrorList; - typedef struct TokenMatcher TokenMatcher; typedef void (*matcher_reset_function) (TokenMatcher*); typedef bool (*match_function) (TokenMatcher*, Token*); -typedef struct TokenMatcher { +struct TokenMatcher { const wchar_t* default_language; const wchar_t* language; const Dialect* dialect; @@ -40,7 +38,7 @@ typedef struct TokenMatcher { match_function match_Language; match_function match_Other; match_function match_EOF; -} TokenMatcher; +}; TokenMatcher* TokenMatcher_new(const wchar_t* default_language); diff --git a/gherkin/c/include/token_scanner.h b/gherkin/c/include/token_scanner.h index dc6e21e8a4c..8f1783bad5e 100644 --- a/gherkin/c/include/token_scanner.h +++ b/gherkin/c/include/token_scanner.h @@ -15,10 +15,10 @@ typedef Token* (*read_function) (TokenScanner*); typedef void (*delete_function) (TokenScanner*); -typedef struct TokenScanner { +struct TokenScanner { read_function read; delete_function delete; -} TokenScanner; +}; void TokenScanner_delete(TokenScanner* token_scanner); diff --git a/gherkin/c/src/Makefile b/gherkin/c/src/Makefile index eb03b7f4bd5..3fccc6e98e9 100644 --- a/gherkin/c/src/Makefile +++ b/gherkin/c/src/Makefile @@ -1,5 +1,5 @@ GCC_FLAGS=-c -Wall -Werror -g -CLANG_FLAGS=-c -Wall -Wno-typedef-redefinition -Werror -g +CLANG_FLAGS=-c -Wall -Werror -g ifeq ($(CC),i686-w64-mingw32-gcc) CC=i686-w64-mingw32-gcc diff --git a/gherkin/c/src/ast_builder.c b/gherkin/c/src/ast_builder.c index baa8819a341..a12cf03c27a 100644 --- a/gherkin/c/src/ast_builder.c +++ b/gherkin/c/src/ast_builder.c @@ -6,7 +6,6 @@ #include "scenario_outline.h" #include "data_table.h" #include "doc_string.h" -#include "error_list.h" #include #include diff --git a/gherkin/c/src/compiler.c b/gherkin/c/src/compiler.c index f05630e04f9..7c702781214 100644 --- a/gherkin/c/src/compiler.c +++ b/gherkin/c/src/compiler.c @@ -12,9 +12,9 @@ #include "string_utilities.h" #include -typedef struct Compiler { +struct Compiler { ItemQueue* pickle_list; -} Compiler; +}; typedef struct ReplacementItem { item_delete_function item_delete; diff --git a/gherkin/c/src/error_list.c b/gherkin/c/src/error_list.c index 2bd00634622..52438f9e305 100644 --- a/gherkin/c/src/error_list.c +++ b/gherkin/c/src/error_list.c @@ -6,12 +6,12 @@ #include #include -typedef struct ErrorList { +struct ErrorList { ItemQueue* errors; QueueItem* current_error; jmp_buf* global_env; jmp_buf* local_env; -} ErrorList; +}; static int calculate_string_length_for_location(int line_width, int column_width); diff --git a/gherkin/c/src/file_reader.c b/gherkin/c/src/file_reader.c index 7143128b671..401126d29f4 100644 --- a/gherkin/c/src/file_reader.c +++ b/gherkin/c/src/file_reader.c @@ -3,9 +3,9 @@ #include "unicode_utilities.h" #include -typedef struct FileReader { +struct FileReader { const char* file_name; -} FileReader; +}; static void extend_buffer_if_needed(wchar_t** buffer, int* buffer_size, int pos); diff --git a/gherkin/c/src/item_queue.h b/gherkin/c/src/item_queue.h index 8d665923cee..e8e3711c6ea 100644 --- a/gherkin/c/src/item_queue.h +++ b/gherkin/c/src/item_queue.h @@ -10,10 +10,10 @@ extern "C" { typedef struct QueueItem QueueItem; -typedef struct QueueItem { +struct QueueItem { Item* item; QueueItem* next; -} QueueItem; +}; typedef struct ItemQueue { QueueItem* first; diff --git a/gherkin/c/src/parser.c b/gherkin/c/src/parser.c index c85c9c28d29..c92696066f5 100644 --- a/gherkin/c/src/parser.c +++ b/gherkin/c/src/parser.c @@ -4,7 +4,6 @@ #include "token_scanner.h" #include "token_matcher.h" #include "token_queue.h" -#include "builder.h" #include "error_list.h" #include #include @@ -18,11 +17,11 @@ typedef struct ParserContext { ErrorList* errors; } ParserContext; -typedef struct Parser { +struct Parser { ParserContext* parser_context; Builder* builder; ErrorList* errors; -} Parser; +}; static Token* read_token(ParserContext* context); diff --git a/gherkin/c/src/token_queue.c b/gherkin/c/src/token_queue.c index d564a15eaa6..2677f843bb8 100644 --- a/gherkin/c/src/token_queue.c +++ b/gherkin/c/src/token_queue.c @@ -2,8 +2,6 @@ #include "item_queue.h" #include -typedef struct QueueItem QueueItem; - TokenQueue* TokenQueue_new() { return (TokenQueue*)ItemQueue_new(); } diff --git a/gherkin/c/src/utf8_source.h b/gherkin/c/src/utf8_source.h index ae1f12aca9d..333871a3f10 100644 --- a/gherkin/c/src/utf8_source.h +++ b/gherkin/c/src/utf8_source.h @@ -15,10 +15,10 @@ typedef unsigned char (*utf8_source_read_function) (Utf8Source*); typedef void (*utf8_source_delete_function) (Utf8Source*); -typedef struct Utf8Source { +struct Utf8Source { utf8_source_read_function read; utf8_source_delete_function delete; -} Utf8Source; +}; unsigned char Utf8Source_read(Utf8Source* utf8_source);