From 25654416e029e45997fdceabddfa11664dd5fa2b Mon Sep 17 00:00:00 2001 From: Quinton Miller Date: Fri, 21 Jun 2024 01:16:40 +0800 Subject: [PATCH] Implement `ECR.process_string` as a macro --- spec/std/ecr/ecr_lexer_spec.cr | 237 --------------------------------- spec/wasm32_std_spec.cr | 3 +- src/ecr/lexer.cr | 201 ---------------------------- src/ecr/macros.cr | 6 +- src/ecr/process.cr | 11 -- src/ecr/processor.cr | 213 +++++++++++++++++------------ 6 files changed, 134 insertions(+), 537 deletions(-) delete mode 100644 spec/std/ecr/ecr_lexer_spec.cr delete mode 100644 src/ecr/lexer.cr delete mode 100644 src/ecr/process.cr diff --git a/spec/std/ecr/ecr_lexer_spec.cr b/spec/std/ecr/ecr_lexer_spec.cr deleted file mode 100644 index 05e3f5436b93..000000000000 --- a/spec/std/ecr/ecr_lexer_spec.cr +++ /dev/null @@ -1,237 +0,0 @@ -require "spec" -require "ecr/lexer" - -private def t(type : ECR::Lexer::Token::Type) - type -end - -describe "ECR::Lexer" do - it "lexes without interpolation" do - lexer = ECR::Lexer.new("hello") - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq("hello") - token.line_number.should eq(1) - token.column_number.should eq(1) - - token = lexer.next_token - token.type.should eq(t :eof) - end - - it "lexes with <% %>" do - lexer = ECR::Lexer.new("hello <% foo %> bar") - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq("hello ") - token.column_number.should eq(1) - token.line_number.should eq(1) - - token = lexer.next_token - token.type.should eq(t :control) - token.value.should eq(" foo ") - token.line_number.should eq(1) - token.column_number.should eq(9) - token.suppress_leading?.should be_false - token.suppress_trailing?.should be_false - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq(" bar") - token.line_number.should eq(1) - token.column_number.should eq(16) - - token = lexer.next_token - token.type.should eq(t :eof) - end - - it "lexes with <%- %>" do - lexer = ECR::Lexer.new("<%- foo %>") - - token = lexer.next_token - token.type.should eq(t :control) - token.value.should eq(" foo ") - token.line_number.should eq(1) - token.column_number.should eq(4) - token.suppress_leading?.should be_true - token.suppress_trailing?.should be_false - end - - it "lexes with <% -%>" do - lexer = ECR::Lexer.new("<% foo -%>") - - token = lexer.next_token - token.type.should eq(t :control) - token.value.should eq(" foo ") - token.line_number.should eq(1) - token.column_number.should eq(3) - token.suppress_leading?.should be_false - token.suppress_trailing?.should be_true - end - - it "lexes with -% inside string" do - lexer = ECR::Lexer.new("<% \"-%\" %>") - - token = lexer.next_token - token.type.should eq(t :control) - token.value.should eq(" \"-%\" ") - end - - it "lexes with <%= %>" do - lexer = ECR::Lexer.new("hello <%= foo %> bar") - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq("hello ") - token.column_number.should eq(1) - token.line_number.should eq(1) - - token = lexer.next_token - token.type.should eq(ECR::Lexer::Token::Type::Output) - token.value.should eq(" foo ") - token.line_number.should eq(1) - token.column_number.should eq(10) - token.suppress_leading?.should be_false - token.suppress_trailing?.should be_false - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq(" bar") - token.line_number.should eq(1) - token.column_number.should eq(17) - - token = lexer.next_token - token.type.should eq(t :eof) - end - - it "lexes with <%= -%>" do - lexer = ECR::Lexer.new("<%= foo -%>") - - token = lexer.next_token - token.type.should eq(ECR::Lexer::Token::Type::Output) - token.value.should eq(" foo ") - token.line_number.should eq(1) - token.column_number.should eq(4) - token.suppress_leading?.should be_false - token.suppress_trailing?.should be_true - end - - it "lexes with <%# %>" do - lexer = ECR::Lexer.new("hello <%# foo %> bar") - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq("hello ") - token.column_number.should eq(1) - token.line_number.should eq(1) - - token = lexer.next_token - token.type.should eq(t :control) - token.value.should eq("# foo ") - token.line_number.should eq(1) - token.column_number.should eq(9) - token.suppress_leading?.should be_false - token.suppress_trailing?.should be_false - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq(" bar") - token.line_number.should eq(1) - token.column_number.should eq(17) - - token = lexer.next_token - token.type.should eq(t :eof) - end - - it "lexes with <%# -%>" do - lexer = ECR::Lexer.new("<%# foo -%>") - - token = lexer.next_token - token.type.should eq(t :control) - token.value.should eq("# foo ") - token.line_number.should eq(1) - token.column_number.should eq(3) - token.suppress_leading?.should be_false - token.suppress_trailing?.should be_true - end - - it "lexes with <%% %>" do - lexer = ECR::Lexer.new("hello <%% foo %> bar") - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq("hello ") - token.column_number.should eq(1) - token.line_number.should eq(1) - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq("<% foo %>") - token.line_number.should eq(1) - token.column_number.should eq(10) - token.suppress_leading?.should be_false - token.suppress_trailing?.should be_false - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq(" bar") - token.line_number.should eq(1) - token.column_number.should eq(17) - - token = lexer.next_token - token.type.should eq(t :eof) - end - - it "lexes with <%%= %>" do - lexer = ECR::Lexer.new("hello <%%= foo %> bar") - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq("hello ") - token.column_number.should eq(1) - token.line_number.should eq(1) - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq("<%= foo %>") - token.line_number.should eq(1) - token.column_number.should eq(10) - token.suppress_leading?.should be_false - token.suppress_trailing?.should be_false - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq(" bar") - token.line_number.should eq(1) - token.column_number.should eq(18) - - token = lexer.next_token - token.type.should eq(t :eof) - end - - it "lexes with <% %> and correct location info" do - lexer = ECR::Lexer.new("hi\nthere <% foo\nbar %> baz") - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq("hi\nthere ") - token.line_number.should eq(1) - token.column_number.should eq(1) - - token = lexer.next_token - token.type.should eq(t :control) - token.value.should eq(" foo\nbar ") - token.line_number.should eq(2) - token.column_number.should eq(9) - - token = lexer.next_token - token.type.should eq(t :string) - token.value.should eq(" baz") - token.line_number.should eq(3) - token.column_number.should eq(7) - - token = lexer.next_token - token.type.should eq(t :eof) - end -end diff --git a/spec/wasm32_std_spec.cr b/spec/wasm32_std_spec.cr index 7d762e479b91..efdfe49a960f 100644 --- a/spec/wasm32_std_spec.cr +++ b/spec/wasm32_std_spec.cr @@ -66,8 +66,7 @@ require "./std/deque_spec.cr" # require "./std/digest/sha512_spec.cr" (failed linking) # require "./std/dir_spec.cr" (failed to run) require "./std/double_spec.cr" -require "./std/ecr/ecr_lexer_spec.cr" -# require "./std/ecr/ecr_spec.cr" (failed linking) +require "./std/ecr/ecr_spec.cr" require "./std/enum_spec.cr" require "./std/enumerable_spec.cr" # require "./std/env_spec.cr" (failed to run) diff --git a/src/ecr/lexer.cr b/src/ecr/lexer.cr deleted file mode 100644 index b5a30cae8e84..000000000000 --- a/src/ecr/lexer.cr +++ /dev/null @@ -1,201 +0,0 @@ -# :nodoc: -class ECR::Lexer - class Token - enum Type - String - Output - Control - EOF - end - - property type : Type - property value : String - property line_number : Int32 - property column_number : Int32 - property? suppress_leading : Bool - property? suppress_trailing : Bool - - def initialize - @type = :EOF - @value = "" - @line_number = 0 - @column_number = 0 - @suppress_leading = false - @suppress_trailing = false - end - end - - def initialize(string) - @reader = Char::Reader.new(string) - @token = Token.new - @line_number = 1 - @column_number = 1 - end - - def next_token : Token - copy_location_info_to_token - - case current_char - when '\0' - @token.type = :EOF - return @token - when '<' - if peek_next_char == '%' - next_char - next_char - - if current_char == '-' - @token.suppress_leading = true - next_char - else - @token.suppress_leading = false - end - - case current_char - when '=' - next_char - copy_location_info_to_token - is_output = true - when '%' - next_char - copy_location_info_to_token - is_escape = true - else - copy_location_info_to_token - end - - return consume_control(is_output, is_escape) - end - else - # consume string - end - - consume_string - end - - private def consume_string - start_pos = current_pos - while true - case current_char - when '\0' - break - when '\n' - @line_number += 1 - @column_number = 0 - when '<' - if peek_next_char == '%' - break - end - else - # keep going - end - next_char - end - - @token.type = :string - @token.value = string_range(start_pos) - @token - end - - private def consume_control(is_output, is_escape) - start_pos = current_pos - while true - case current_char - when '\0' - if is_output - raise "Unexpected end of file inside <%= ..." - elsif is_escape - raise "Unexpected end of file inside <%% ..." - else - raise "Unexpected end of file inside <% ..." - end - when '\n' - @line_number += 1 - @column_number = 0 - when '-' - if peek_next_char == '%' - # We need to peek another char, so we remember - # where we are, check that, and then go back - pos = @reader.pos - column_number = @column_number - - next_char - - is_end = peek_next_char == '>' - @reader.pos = pos - @column_number = column_number - - if is_end - @token.suppress_trailing = true - setup_control_token(start_pos, is_escape) - raise "Expecting '>' after '-%'" if current_char != '>' - next_char - break - end - end - when '%' - if peek_next_char == '>' - @token.suppress_trailing = false - setup_control_token(start_pos, is_escape) - break - end - else - # keep going - end - next_char - end - - if is_escape - @token.type = :string - elsif is_output - @token.type = :output - else - @token.type = :control - end - @token - end - - private def setup_control_token(start_pos, is_escape) - @token.value = if is_escape - "<%#{string_range(start_pos, current_pos + 2)}" - else - string_range(start_pos) - end - next_char - next_char - end - - private def copy_location_info_to_token - @token.line_number = @line_number - @token.column_number = @column_number - end - - private def current_char - @reader.current_char - end - - private def next_char - @column_number += 1 - next_char_no_column_increment - end - - private def next_char_no_column_increment - @reader.next_char - end - - private def peek_next_char - @reader.peek_next_char - end - - private def current_pos - @reader.pos - end - - private def string_range(start_pos) - string_range(start_pos, current_pos) - end - - private def string_range(start_pos, end_pos) - @reader.string.byte_slice(start_pos, end_pos - start_pos) - end -end diff --git a/src/ecr/macros.cr b/src/ecr/macros.cr index 92c02cc4284a..3cbaf9bd0cf1 100644 --- a/src/ecr/macros.cr +++ b/src/ecr/macros.cr @@ -1,3 +1,5 @@ +require "./processor" + module ECR # Defines a `to_s(io)` method whose body is the ECR contained # in *filename*, translated to Crystal code. @@ -34,7 +36,7 @@ module ECR # ``` macro def_to_s(filename) def to_s(__io__ : IO) : Nil - ECR.embed {{filename}}, "__io__" + ::ECR.embed {{filename}}, "__io__" end end @@ -67,7 +69,7 @@ module ECR # io << '!' # ``` macro embed(filename, io_name) - \{{ run("ecr/process", {{filename}}, {{io_name.id.stringify}}) }} + ::ECR.process_string({{ read_file(filename) }}, {{ filename }}, {{ io_name.id.stringify }}, false) end # Embeds an ECR file *filename* into the program and renders it to a string. diff --git a/src/ecr/process.cr b/src/ecr/process.cr deleted file mode 100644 index 0e394bc5db1b..000000000000 --- a/src/ecr/process.cr +++ /dev/null @@ -1,11 +0,0 @@ -require "ecr/processor" - -filename = ARGV[0] -buffer_name = ARGV[1] - -begin - puts ECR.process_file(filename, buffer_name) -rescue ex : File::Error - STDERR.puts ex.message - exit 1 -end diff --git a/src/ecr/processor.cr b/src/ecr/processor.cr index 786879e94273..0489a9fc23b5 100644 --- a/src/ecr/processor.cr +++ b/src/ecr/processor.cr @@ -1,104 +1,149 @@ -require "./lexer" - module ECR extend self DefaultBufferName = "__str__" # :nodoc: - def process_file(filename, buffer_name = DefaultBufferName) : String - process_string File.read(filename), filename, buffer_name - end + macro process_string(string, filename, buffer_name = nil, quote = true) + {% + buffer_name = (buffer_name || DefaultBufferName).id - # :nodoc: - def process_string(string, filename, buffer_name = DefaultBufferName) : String - lexer = Lexer.new string - token = lexer.next_token + tokens = [] of _ + chars = string.chars + pos = 0 + line_number = 1 + column_number = 1 + + looper = [nil] + looper.each do + looper << nil # while true + + start_line_number = line_number + start_column_number = column_number + + if chars[pos].nil? + looper.clear # break + elsif chars[pos] == '<' && chars[pos + 1] == '%' + column_number += 2 + pos += 2 + + suppress_leading = chars[pos] == '-' + if suppress_leading + column_number += 1 + pos += 1 + end - String.build do |str| - while true - case token.type - when .string? - string = token.value - token = lexer.next_token + if chars[pos] == '=' + type = :output + column_number += 1 + pos += 1 + elsif chars[pos] == '%' + type = :string + column_number += 1 + pos += 1 + else + type = :control + end - string = suppress_leading_indentation(token, string) + start_line_number = line_number + start_column_number = column_number + start_pos = pos - str << buffer_name - str << " << " - string.inspect(str) - str << '\n' - when .output? - string = token.value - line_number = token.line_number - column_number = token.column_number - suppress_trailing = token.suppress_trailing? - token = lexer.next_token + looper2 = [nil] + looper2.each do + looper2 << nil # while true - suppress_trailing_whitespace(token, suppress_trailing) + if chars[pos].nil? + if type == :output + raise "Unexpected end of file inside <%= ..." + elsif type == :string + raise "Unexpected end of file inside <%% ..." + else + raise "Unexpected end of file inside <% ..." + end + elsif (chars[pos] == '-' && chars[pos + 1] == '%' && chars[pos + 2] == '>') || + (chars[pos] == '%' && chars[pos + 1] == '>') + suppress_trailing = chars[pos] == '-' + value = type == :string ? "<%" + string[start_pos...pos + 2] : string[start_pos...pos] + column_number += suppress_trailing ? 3 : 2 + pos += suppress_trailing ? 3 : 2 + tokens << { + type: type, + value: value, + line_number: start_line_number, + column_number: start_column_number, + suppress_leading: suppress_leading && type != :string, + suppress_trailing: suppress_trailing && type != :string, + } + looper2.clear # break + elsif chars[pos] == '\n' + line_number += 1 + column_number = 1 + pos += 1 + else + column_number += 1 + pos += 1 + end + end + else + start_pos = pos - str << "#(" - append_loc(str, filename, line_number, column_number) - str << string - str << ")#.to_s " - str << buffer_name - str << '\n' - when .control? - string = token.value - line_number = token.line_number - column_number = token.column_number - suppress_trailing = token.suppress_trailing? - token = lexer.next_token + looper3 = [nil] + looper3.each do + looper3 << nil # while true - suppress_trailing_whitespace(token, suppress_trailing) + if chars[pos].nil? || (chars[pos] == '<' && chars[pos + 1] == '%') + looper3.clear # break + elsif chars[pos] == '\n' + line_number += 1 + column_number = 1 + pos += 1 + else + column_number += 1 + pos += 1 + end + end - str << "#" - append_loc(str, filename, line_number, column_number) - str << ' ' unless string.starts_with?(' ') - str << string - str << "#" - str << '\n' - when .eof? - break + tokens << { + type: :string, + value: string[start_pos...pos], + line_number: start_line_number, + column_number: start_column_number, + } end end - end - end - private def suppress_leading_indentation(token, string) - # To suppress leading indentation we find the last index of a newline and - # then check if all chars after that are whitespace. - # We use a Char::Reader for this for maximum efficiency. - if (token.type.output? || token.type.control?) && token.suppress_leading? - char_index = string.rindex('\n') - char_index = char_index ? char_index + 1 : 0 - byte_index = string.char_index_to_byte_index(char_index).not_nil! - reader = Char::Reader.new(string) - reader.pos = byte_index - while reader.current_char.ascii_whitespace? && reader.has_next? - reader.next_char - end - if reader.pos == string.bytesize - string = string.byte_slice(0, byte_index) + pieces = [] of String + tokens.each_with_index do |token, i| + if token[:type] == :string + value = token[:value] + if i > 0 && tokens[i - 1][:suppress_trailing] + value = value.gsub(/\A.*\n/, "") + end + if i < tokens.size - 1 && tokens[i + 1][:suppress_leading] + value = value.gsub(/ +\z/, "") + end + pieces << buffer_name + pieces << " << " + pieces << value.stringify + pieces << '\n' + elsif token[:type] == :output + pieces << "#(" + pieces << "#' + pieces << token[:value] + pieces << ")#.to_s " + pieces << buffer_name + pieces << '\n' + else + pieces << "#" + pieces << "#' + pieces << ' ' unless token[:value].starts_with?(' ') + pieces << token[:value] + pieces << "#" + pieces << '\n' + end end - end - string - end - - private def suppress_trailing_whitespace(token, suppress_trailing) - if suppress_trailing && token.type.string? - newline_index = token.value.index('\n') - token.value = token.value[newline_index + 1..-1] if newline_index - end - end - - private def append_loc(str, filename, line_number, column_number) - str << %(#' + program = pieces.map(&.id).join("") + %}{{ quote ? program : program.id }} end end