Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
1314 lines (1142 sloc) 37.1 KB
class RubyLexer
attr_accessor :command_start
attr_accessor :cmdarg
attr_accessor :cond
attr_accessor :nest
ESC_RE = /\\([0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|[^0-7xMCc])/
# Additional context surrounding tokens that both the lexer and
# grammar use.
attr_reader :lex_state
attr_accessor :lex_strterm
attr_accessor :parser # HACK for very end of lexer... *sigh*
# Stream of data that yylex examines.
attr_reader :src
# Last token read via yylex.
attr_accessor :token
attr_accessor :string_buffer
# Value of last token which had a value associated with it.
attr_accessor :yacc_value
# What handles warnings
attr_accessor :warnings
EOF = :eof_haha!
# ruby constants for strings (should this be moved somewhere else?)
STR_FUNC_BORING = 0x00
STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
STR_FUNC_EXPAND = 0x02
STR_FUNC_REGEXP = 0x04
STR_FUNC_AWORDS = 0x08
STR_FUNC_SYMBOL = 0x10
STR_FUNC_INDENT = 0x20 # <<-HEREDOC
STR_SQUOTE = STR_FUNC_BORING
STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
STR_SSYM = STR_FUNC_SYMBOL
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
TOKENS = {
"!" => :tBANG,
"!=" => :tNEQ,
"!~" => :tNMATCH,
"," => :tCOMMA,
".." => :tDOT2,
"..." => :tDOT3,
"=" => :tEQL,
"==" => :tEQ,
"===" => :tEQQ,
"=>" => :tASSOC,
"=~" => :tMATCH,
}
# How the parser advances to the next token.
#
# @return true if not at end of file (EOF).
def advance
r = yylex
self.token = r
raise "yylex returned nil" unless r
return RubyLexer::EOF != r
end
def arg_ambiguous
self.warning("Ambiguous first argument. make sure.")
end
def comments
c = @comments.join
@comments.clear
c
end
def expr_beg_push val
cond.push false
cmdarg.push false
self.lex_state = :expr_beg
self.yacc_value = val
end
def fix_arg_lex_state
self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
:expr_arg
else
:expr_beg
end
end
def heredoc here # 63 lines
_, eos, func, last_line = here
indent = (func & STR_FUNC_INDENT) != 0
expand = (func & STR_FUNC_EXPAND) != 0
eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
err_msg = "can't match #{eos_re.inspect} anywhere in "
rb_compile_error err_msg if
src.eos?
if src.beginning_of_line? && src.scan(eos_re) then
src.unread_many last_line # TODO: figure out how to remove this
self.yacc_value = eos
return :tSTRING_END
end
self.string_buffer = []
if expand then
case
when src.scan(/#[$@]/) then
src.pos -= 1 # FIX omg stupid
self.yacc_value = src.matched
return :tSTRING_DVAR
when src.scan(/#[{]/) then
self.yacc_value = src.matched
return :tSTRING_DBEG
when src.scan(/#/) then
string_buffer << '#'
end
until src.scan(eos_re) do
c = tokadd_string func, "\n", nil
rb_compile_error err_msg if
c == RubyLexer::EOF
if c != "\n" then
self.yacc_value = string_buffer.join.delete("\r")
return :tSTRING_CONTENT
else
string_buffer << src.scan(/\n/)
end
rb_compile_error err_msg if
src.eos?
end
# tack on a NL after the heredoc token - FIX NL should not be needed
src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
else
until src.check(eos_re) do
string_buffer << src.scan(/.*(\n|\z)/)
rb_compile_error err_msg if
src.eos?
end
end
self.lex_strterm = [:heredoc, eos, func, last_line]
self.yacc_value = string_buffer.join.delete("\r")
return :tSTRING_CONTENT
end
def heredoc_identifier # 51 lines
term, func = nil, STR_FUNC_BORING
self.string_buffer = []
case
when src.scan(/(-?)(['"`])(.*?)\2/) then
term = src[2]
unless src[1].empty? then
func |= STR_FUNC_INDENT
end
func |= case term
when "\'" then
STR_SQUOTE
when '"' then
STR_DQUOTE
else
STR_XQUOTE
end
string_buffer << src[3]
when src.scan(/-?(['"`])(?!\1*\Z)/) then
rb_compile_error "unterminated here document identifier"
when src.scan(/(-?)(\w+)/) then
term = '"'
func |= STR_DQUOTE
unless src[1].empty? then
func |= STR_FUNC_INDENT
end
string_buffer << src[2]
else
return nil
end
if src.check(/.*\n/) then
# TODO: think about storing off the char range instead
line = src.string[src.pos, src.matched_size]
src.string[src.pos, src.matched_size] = "\n"
src.extra_lines_added += 1
src.pos += 1
else
line = nil
end
self.lex_strterm = [:heredoc, string_buffer.join, func, line]
if term == '`' then
self.yacc_value = "`"
return :tXSTRING_BEG
else
self.yacc_value = "\""
return :tSTRING_BEG
end
end
def initialize
self.cond = RubyParser::StackState.new(:cond)
self.cmdarg = RubyParser::StackState.new(:cmdarg)
self.nest = 0
@comments = []
reset
end
def int_with_base base
rb_compile_error "Invalid numeric format" if src.matched =~ /__/
self.yacc_value = src.matched.to_i(base)
return :tINTEGER
end
def lex_state= o
raise "wtf\?" unless Symbol === o
@lex_state = o
end
attr_writer :lineno
def lineno
@lineno ||= src.lineno
end
##
# Parse a number from the input stream.
#
# @param c The first character of the number.
# @return A int constant wich represents a token.
def parse_number
self.lex_state = :expr_end
case
when src.scan(/[+-]?0[xbd]\b/) then
rb_compile_error "Invalid numeric format"
when src.scan(/[+-]?0x[a-f0-9_]+/i) then
int_with_base(16)
when src.scan(/[+-]?0b[01_]+/) then
int_with_base(2)
when src.scan(/[+-]?0d[0-9_]+/) then
int_with_base(10)
when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
rb_compile_error "Illegal octal digit."
when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
int_with_base(8)
when src.scan(/[+-]?[\d_]+_(e|\.)/) then
rb_compile_error "Trailing '_' in number."
when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
number = src.matched
if number =~ /__/ then
rb_compile_error "Invalid numeric format"
end
self.yacc_value = number.to_f
:tFLOAT
when src.scan(/[+-]?0\b/) then
int_with_base(10)
when src.scan(/[+-]?[\d_]+\b/) then
int_with_base(10)
else
rb_compile_error "Bad number format"
end
end
def parse_quote # 58 lines
beg, nnd, short_hand, c = nil, nil, false, nil
if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
rb_compile_error "unknown type of %string" if src.matched_size == 2
c, beg, short_hand = src.matched, src.getch, false
else # Short-hand (e.g. %{, %., %!, etc)
c, beg, short_hand = 'Q', src.getch, true
end
if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
rb_compile_error "unterminated quoted string meets end of file"
end
# Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
nnd, beg = beg, "\0" if nnd.nil?
token_type, self.yacc_value = nil, "%#{c}#{beg}"
token_type, string_type = case c
when 'Q' then
ch = short_hand ? nnd : c + beg
self.yacc_value = "%#{ch}"
[:tSTRING_BEG, STR_DQUOTE]
when 'q' then
[:tSTRING_BEG, STR_SQUOTE]
when 'W' then
src.scan(/\s*/)
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS]
when 'w' then
src.scan(/\s*/)
[:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS]
when 'x' then
[:tXSTRING_BEG, STR_XQUOTE]
when 'r' then
[:tREGEXP_BEG, STR_REGEXP]
when 's' then
self.lex_state = :expr_fname
[:tSYMBEG, STR_SSYM]
end
rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
token_type.nil?
self.lex_strterm = [:strterm, string_type, nnd, beg]
return token_type
end
def parse_string(quote) # 65 lines
_, string_type, term, open = quote
space = false # FIX: remove these
func = string_type
paren = open
term_re = Regexp.escape term
awords = (func & STR_FUNC_AWORDS) != 0
regexp = (func & STR_FUNC_REGEXP) != 0
expand = (func & STR_FUNC_EXPAND) != 0
unless func then # FIX: impossible, prolly needs == 0
self.lineno = nil
return :tSTRING_END
end
space = true if awords and src.scan(/\s+/)
if self.nest == 0 && src.scan(/#{term_re}/) then
if awords then
quote[1] = nil
return :tSPACE
elsif regexp then
self.yacc_value = self.regx_options
self.lineno = nil
return :tREGEXP_END
else
self.yacc_value = term
self.lineno = nil
return :tSTRING_END
end
end
if space then
return :tSPACE
end
self.string_buffer = []
if expand
case
when src.scan(/#(?=[$@])/) then
return :tSTRING_DVAR
when src.scan(/#[{]/) then
return :tSTRING_DBEG
when src.scan(/#/) then
string_buffer << '#'
end
end
if tokadd_string(func, term, paren) == RubyLexer::EOF then
rb_compile_error "unterminated string meets end of file"
end
self.yacc_value = string_buffer.join
return :tSTRING_CONTENT
end
def rb_compile_error msg
msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
raise SyntaxError, msg
end
def read_escape # 51 lines
case
when src.scan(/\\/) then # Backslash
'\\'
when src.scan(/n/) then # newline
"\n"
when src.scan(/t/) then # horizontal tab
"\t"
when src.scan(/r/) then # carriage-return
"\r"
when src.scan(/f/) then # form-feed
"\f"
when src.scan(/v/) then # vertical tab
"\13"
when src.scan(/a/) then # alarm(bell)
"\007"
when src.scan(/e/) then # escape
"\033"
when src.scan(/b/) then # backspace
"\010"
when src.scan(/s/) then # space
" "
when src.scan(/[0-7]{1,3}/) then # octal constant
src.matched.to_i(8).chr
when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
src[1].to_i(16).chr
when src.check(/M-\\[\\MCc]/) then
src.scan(/M-\\/) # eat it
c = self.read_escape
c[0] = (c[0].ord | 0x80).chr
c
when src.scan(/M-(.)/) then
c = src[1]
c[0] = (c[0].ord | 0x80).chr
c
when src.check(/(C-|c)\\[\\MCc]/) then
src.scan(/(C-|c)\\/) # eat it
c = self.read_escape
c[0] = (c[0].ord & 0x9f).chr
c
when src.scan(/C-\?|c\?/) then
127.chr
when src.scan(/(C-|c)(.)/) then
c = src[2]
c[0] = (c[0].ord & 0x9f).chr
c
when src.scan(/[McCx0-9]/) || src.eos? then
rb_compile_error("Invalid escape character syntax")
else
src.getch
end
end
def regx_options # 15 lines
good, bad = [], []
if src.scan(/[a-z]+/) then
good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
end
unless bad.empty? then
rb_compile_error("unknown regexp option%s - %s" %
[(bad.size > 1 ? "s" : ""), bad.join.inspect])
end
return good.join
end
def reset
self.command_start = true
self.lex_strterm = nil
self.token = nil
self.yacc_value = nil
@src = nil
@lex_state = nil
end
def src= src
raise "bad src: #{src.inspect}" unless String === src
@src = RPStringScanner.new(src)
end
def tokadd_escape term # 20 lines
case
when src.scan(/\\\n/) then
# just ignore
when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
self.string_buffer << src.matched
when src.scan(/\\([MC]-|c)(?=\\)/) then
self.string_buffer << src.matched
self.tokadd_escape term
when src.scan(/\\([MC]-|c)(.)/) then
self.string_buffer << src.matched
when src.scan(/\\[McCx]/) then
rb_compile_error "Invalid escape character syntax"
when src.scan(/\\(.)/m) then
self.string_buffer << src.matched
else
rb_compile_error "Invalid escape character syntax"
end
end
def tokadd_string(func, term, paren) # 105 lines
awords = (func & STR_FUNC_AWORDS) != 0
escape = (func & STR_FUNC_ESCAPE) != 0
expand = (func & STR_FUNC_EXPAND) != 0
regexp = (func & STR_FUNC_REGEXP) != 0
symbol = (func & STR_FUNC_SYMBOL) != 0
paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
term_re = Regexp.new(Regexp.escape(term))
until src.eos? do
c = nil
handled = true
case
when self.nest == 0 && src.scan(term_re) then
src.pos -= 1
break
when paren_re && src.scan(paren_re) then
self.nest += 1
when src.scan(term_re) then
self.nest -= 1
when awords && src.scan(/\s/) then
src.pos -= 1
break
when expand && src.scan(/#(?=[\$\@\{])/) then
src.pos -= 1
break
when expand && src.scan(/#(?!\n)/) then
# do nothing
when src.check(/\\/) then
case
when awords && src.scan(/\\\n/) then
string_buffer << "\n"
next
when awords && src.scan(/\\\s/) then
c = ' '
when expand && src.scan(/\\\n/) then
next
when regexp && src.check(/\\/) then
self.tokadd_escape term
next
when expand && src.scan(/\\/) then
c = self.read_escape
when src.scan(/\\\n/) then
# do nothing
when src.scan(/\\\\/) then
string_buffer << '\\' if escape
c = '\\'
when src.scan(/\\/) then
unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
string_buffer << "\\"
end
else
handled = false
end
else
handled = false
end # case
unless handled then
t = Regexp.escape term
x = Regexp.escape(paren) if paren && paren != "\000"
re = if awords then
/[^#{t}#{x}\#\0\\\n\ ]+|./ # |. to pick up whatever
else
/[^#{t}#{x}\#\0\\]+|./
end
src.scan re
c = src.matched
rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
end # unless handled
c ||= src.matched
string_buffer << c
end # until
c ||= src.matched
c = RubyLexer::EOF if src.eos?
return c
end
def unescape s
r = {
"a" => "\007",
"b" => "\010",
"e" => "\033",
"f" => "\f",
"n" => "\n",
"r" => "\r",
"s" => " ",
"t" => "\t",
"v" => "\13",
"\\" => '\\',
"\n" => "",
"C-\?" => 127.chr,
"c\?" => 127.chr,
}[s]
return r if r
case s
when /^[0-7]{1,3}/ then
$&.to_i(8).chr
when /^x([0-9a-fA-F]{1,2})/ then
$1.to_i(16).chr
when /^M-(.)/ then
($1[0].ord | 0x80).chr
when /^(C-|c)(.)/ then
($2[0].ord & 0x9f).chr
when /^[McCx0-9]/ then
rb_compile_error("Invalid escape character syntax")
else
s
end
end
def warning s
# do nothing for now
end
##
# Returns the next token. Also sets yy_val is needed.
#
# @return Description of the Returned Value
def yylex # 826 lines
c = ''
space_seen = false
command_state = false
src = self.src
self.token = nil
self.yacc_value = nil
return yylex_string if lex_strterm
command_state = self.command_start
self.command_start = false
last_state = lex_state
loop do # START OF CASE
if src.scan(/[\ \t\r\f\v]/) then # \s - \n + \v
space_seen = true
next
elsif src.check(/[^a-zA-Z]/) then
if src.scan(/\n|#/) then
self.lineno = nil
c = src.matched
if c == '#' then
src.pos -= 1
while src.scan(/\s*#.*(\n+|\z)/) do
@comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
end
if src.eos? then
return RubyLexer::EOF
end
end
# Replace a string of newlines with a single one
src.scan(/\n+/)
if [:expr_beg, :expr_fname,
:expr_dot, :expr_class].include? lex_state then
next
end
self.command_start = true
self.lex_state = :expr_beg
return :tNL
elsif src.scan(/[\]\)\}]/) then
cond.lexpop
cmdarg.lexpop
self.lex_state = :expr_end
self.yacc_value = src.matched
result = {
")" => :tRPAREN,
"]" => :tRBRACK,
"}" => :tRCURLY
}[src.matched]
return result
elsif src.scan(/\.\.\.?|,|![=~]?/) then
self.lex_state = :expr_beg
tok = self.yacc_value = src.matched
return TOKENS[tok]
elsif src.check(/\./) then
if src.scan(/\.\d/) then
rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
elsif src.scan(/\./) then
self.lex_state = :expr_dot
self.yacc_value = "."
return :tDOT
end
elsif src.scan(/\(/) then
result = :tLPAREN2
self.command_start = true
if lex_state == :expr_beg || lex_state == :expr_mid then
result = :tLPAREN
elsif space_seen then
if lex_state == :expr_cmdarg then
result = :tLPAREN_ARG
elsif lex_state == :expr_arg then
warning("don't put space before argument parentheses")
result = :tLPAREN2
end
end
self.expr_beg_push "("
return result
elsif src.check(/\=/) then
if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
self.fix_arg_lex_state
tok = self.yacc_value = src.matched
return TOKENS[tok]
elsif src.scan(/\=begin(?=\s)/) then
# @comments << '=' << src.matched
@comments << src.matched
unless src.scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
@comments.clear
rb_compile_error("embedded document meets end of file")
end
@comments << src.matched
next
else
raise "you shouldn't be able to get here"
end
elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then
self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
self.lex_state = :expr_end
return :tSTRING
elsif src.scan(/\"/) then # FALLBACK
self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this
self.yacc_value = "\""
return :tSTRING_BEG
elsif src.scan(/\@\@?\w*/) then
self.token = src.matched
rb_compile_error "`#{token}` is not allowed as a variable name" if
token =~ /\@\d/
return process_token(command_state)
elsif src.scan(/\:\:/) then
if (lex_state == :expr_beg ||
lex_state == :expr_mid ||
lex_state == :expr_class ||
(lex_state.is_argument && space_seen)) then
self.lex_state = :expr_beg
self.yacc_value = "::"
return :tCOLON3
end
self.lex_state = :expr_dot
self.yacc_value = "::"
return :tCOLON2
elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then
self.yacc_value = src[1]
self.lex_state = :expr_end
return :tSYMBOL
elsif src.scan(/\:/) then
# ?: / then / when
if (lex_state == :expr_end || lex_state == :expr_endarg||
src.check(/\s/)) then
self.lex_state = :expr_beg
self.yacc_value = ":"
return :tCOLON
end
case
when src.scan(/\'/) then
self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"]
when src.scan(/\"/) then
self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"]
end
self.lex_state = :expr_fname
self.yacc_value = ":"
return :tSYMBEG
elsif src.check(/[0-9]/) then
return parse_number
elsif src.scan(/\[/) then
result = src.matched
if lex_state == :expr_fname || lex_state == :expr_dot then
self.lex_state = :expr_arg
case
when src.scan(/\]\=/) then
self.yacc_value = "[]="
return :tASET
when src.scan(/\]/) then
self.yacc_value = "[]"
return :tAREF
else
rb_compile_error "unexpected '['"
end
elsif lex_state == :expr_beg || lex_state == :expr_mid then
result = :tLBRACK
elsif lex_state.is_argument && space_seen then
result = :tLBRACK
end
self.expr_beg_push "["
return result
elsif src.scan(/\'(\\.|[^\'])*\'/) then
self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
self.lex_state = :expr_end
return :tSTRING
elsif src.check(/\|/) then
if src.scan(/\|\|\=/) then
self.lex_state = :expr_beg
self.yacc_value = "||"
return :tOP_ASGN
elsif src.scan(/\|\|/) then
self.lex_state = :expr_beg
self.yacc_value = "||"
return :tOROP
elsif src.scan(/\|\=/) then
self.lex_state = :expr_beg
self.yacc_value = "|"
return :tOP_ASGN
elsif src.scan(/\|/) then
self.fix_arg_lex_state
self.yacc_value = "|"
return :tPIPE
end
elsif src.scan(/\{/) then
result = if lex_state.is_argument || lex_state == :expr_end then
:tLCURLY # block (primary)
elsif lex_state == :expr_endarg then
:tLBRACE_ARG # block (expr)
else
:tLBRACE # hash
end
self.expr_beg_push "{"
self.command_start = true unless result == :tLBRACE
return result
elsif src.scan(/[+-]/) then
sign = src.matched
utype, type = if sign == "+" then
[:tUPLUS, :tPLUS]
else
[:tUMINUS, :tMINUS]
end
if lex_state == :expr_fname || lex_state == :expr_dot then
self.lex_state = :expr_arg
if src.scan(/@/) then
self.yacc_value = "#{sign}@"
return utype
else
self.yacc_value = sign
return type
end
end
if src.scan(/\=/) then
self.lex_state = :expr_beg
self.yacc_value = sign
return :tOP_ASGN
end
if (lex_state == :expr_beg || lex_state == :expr_mid ||
(lex_state.is_argument && space_seen && !src.check(/\s/))) then
if lex_state.is_argument then
arg_ambiguous
end
self.lex_state = :expr_beg
self.yacc_value = sign
if src.check(/\d/) then
if utype == :tUPLUS then
return self.parse_number
else
return :tUMINUS_NUM
end
end
return utype
end
self.lex_state = :expr_beg
self.yacc_value = sign
return type
elsif src.check(/\*/) then
if src.scan(/\*\*=/) then
self.lex_state = :expr_beg
self.yacc_value = "**"
return :tOP_ASGN
elsif src.scan(/\*\*/) then
self.yacc_value = "**"
self.fix_arg_lex_state
return :tPOW
elsif src.scan(/\*\=/) then
self.lex_state = :expr_beg
self.yacc_value = "*"
return :tOP_ASGN
elsif src.scan(/\*/) then
result = if lex_state.is_argument && space_seen && src.check(/\S/) then
warning("`*' interpreted as argument prefix")
:tSTAR
elsif lex_state == :expr_beg || lex_state == :expr_mid then
:tSTAR
else
:tSTAR2
end
self.yacc_value = "*"
self.fix_arg_lex_state
return result
end
elsif src.check(/\</) then
if src.scan(/\<\=\>/) then
self.fix_arg_lex_state
self.yacc_value = "<=>"
return :tCMP
elsif src.scan(/\<\=/) then
self.fix_arg_lex_state
self.yacc_value = "<="
return :tLEQ
elsif src.scan(/\<\<\=/) then
self.fix_arg_lex_state
self.lex_state = :expr_beg
self.yacc_value = "\<\<"
return :tOP_ASGN
elsif src.scan(/\<\</) then
if (! [:expr_end, :expr_dot,
:expr_endarg, :expr_class].include?(lex_state) &&
(!lex_state.is_argument || space_seen)) then
tok = self.heredoc_identifier
if tok then
return tok
end
end
self.fix_arg_lex_state
self.yacc_value = "\<\<"
return :tLSHFT
elsif src.scan(/\</) then
self.fix_arg_lex_state
self.yacc_value = "<"
return :tLT
end
elsif src.check(/\>/) then
if src.scan(/\>\=/) then
self.fix_arg_lex_state
self.yacc_value = ">="
return :tGEQ
elsif src.scan(/\>\>=/) then
self.fix_arg_lex_state
self.lex_state = :expr_beg
self.yacc_value = ">>"
return :tOP_ASGN
elsif src.scan(/\>\>/) then
self.fix_arg_lex_state
self.yacc_value = ">>"
return :tRSHFT
elsif src.scan(/\>/) then
self.fix_arg_lex_state
self.yacc_value = ">"
return :tGT
end
elsif src.scan(/\`/) then
self.yacc_value = "`"
case lex_state
when :expr_fname then
self.lex_state = :expr_end
return :tBACK_REF2
when :expr_dot then
self.lex_state = if command_state then
:expr_cmdarg
else
:expr_arg
end
return :tBACK_REF2
end
self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
return :tXSTRING_BEG
elsif src.scan(/\?/) then
if lex_state == :expr_end || lex_state == :expr_endarg then
self.lex_state = :expr_beg
self.yacc_value = "?"
return :tEH
end
if src.eos? then
rb_compile_error "incomplete character syntax"
end
if src.check(/\s|\v/) then
unless lex_state.is_argument then
c2 = { " " => 's',
"\n" => 'n',
"\t" => 't',
"\v" => 'v',
"\r" => 'r',
"\f" => 'f' }[src.matched]
if c2 then
warning("invalid character syntax; use ?\\" + c2)
end
end
# ternary
self.lex_state = :expr_beg
self.yacc_value = "?"
return :tEH
elsif src.check(/\w(?=\w)/) then # ternary, also
self.lex_state = :expr_beg
self.yacc_value = "?"
return :tEH
end
c = if src.scan(/\\/) then
self.read_escape
else
src.getch
end
self.lex_state = :expr_end
self.yacc_value = c[0].ord & 0xff
return :tINTEGER
elsif src.check(/\&/) then
if src.scan(/\&\&\=/) then
self.yacc_value = "&&"
self.lex_state = :expr_beg
return :tOP_ASGN
elsif src.scan(/\&\&/) then
self.lex_state = :expr_beg
self.yacc_value = "&&"
return :tANDOP
elsif src.scan(/\&\=/) then
self.yacc_value = "&"
self.lex_state = :expr_beg
return :tOP_ASGN
elsif src.scan(/&/) then
result = if lex_state.is_argument && space_seen &&
!src.check(/\s/) then
warning("`&' interpreted as argument prefix")
:tAMPER
elsif lex_state == :expr_beg || lex_state == :expr_mid then
:tAMPER
else
:tAMPER2
end
self.fix_arg_lex_state
self.yacc_value = "&"
return result
end
elsif src.scan(/\//) then
if lex_state == :expr_beg || lex_state == :expr_mid then
self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
self.yacc_value = "/"
return :tREGEXP_BEG
end
if src.scan(/\=/) then
self.yacc_value = "/"
self.lex_state = :expr_beg
return :tOP_ASGN
end
if lex_state.is_argument && space_seen then
unless src.scan(/\s/) then
arg_ambiguous
self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
self.yacc_value = "/"
return :tREGEXP_BEG
end
end
self.fix_arg_lex_state
self.yacc_value = "/"
return :tDIVIDE
elsif src.scan(/\^=/) then
self.lex_state = :expr_beg
self.yacc_value = "^"
return :tOP_ASGN
elsif src.scan(/\^/) then
self.fix_arg_lex_state
self.yacc_value = "^"
return :tCARET
elsif src.scan(/\;/) then
self.command_start = true
self.lex_state = :expr_beg
self.yacc_value = ";"
return :tSEMI
elsif src.scan(/\~/) then
if lex_state == :expr_fname || lex_state == :expr_dot then
src.scan(/@/)
end
self.fix_arg_lex_state
self.yacc_value = "~"
return :tTILDE
elsif src.scan(/\\/) then
if src.scan(/\n/) then
self.lineno = nil
space_seen = true
next
end
rb_compile_error "bare backslash only allowed before newline"
elsif src.scan(/\%/) then
if lex_state == :expr_beg || lex_state == :expr_mid then
return parse_quote
end
if src.scan(/\=/) then
self.lex_state = :expr_beg
self.yacc_value = "%"
return :tOP_ASGN
end
if lex_state.is_argument && space_seen && ! src.check(/\s/) then
return parse_quote
end
self.fix_arg_lex_state
self.yacc_value = "%"
return :tPERCENT
elsif src.check(/\$/) then
if src.scan(/(\$_)(\w+)/) then
self.lex_state = :expr_end
self.token = src.matched
return process_token(command_state)
elsif src.scan(/\$_/) then
self.lex_state = :expr_end
self.token = src.matched
self.yacc_value = src.matched
return :tGVAR
elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
self.lex_state = :expr_end
self.yacc_value = src.matched
return :tGVAR
elsif src.scan(/\$([\&\`\'\+])/) then
self.lex_state = :expr_end
# Explicit reference to these vars as symbols...
if last_state == :expr_fname then
self.yacc_value = src.matched
return :tGVAR
else
self.yacc_value = src[1].to_sym
return :tBACK_REF
end
elsif src.scan(/\$([1-9]\d*)/) then
self.lex_state = :expr_end
if last_state == :expr_fname then
self.yacc_value = src.matched
return :tGVAR
else
self.yacc_value = src[1].to_i
return :tNTH_REF
end
elsif src.scan(/\$0/) then
self.lex_state = :expr_end
self.token = src.matched
return process_token(command_state)
elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
self.lex_state = :expr_end
self.yacc_value = "$"
return "$"
elsif src.scan(/\$\w+/)
self.lex_state = :expr_end
self.token = src.matched
return process_token(command_state)
end
elsif src.check(/\_/) then
if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
self.lineno = nil
return RubyLexer::EOF
elsif src.scan(/\_\w*/) then
self.token = src.matched
return process_token(command_state)
end
end
end # END OF CASE
if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF
return RubyLexer::EOF
else # alpha check
if src.scan(/\W/) then
rb_compile_error "Invalid char #{src.matched.inspect} in expression"
end
end
self.token = src.matched if self.src.scan(/\w+/)
return process_token(command_state)
end
end
def process_token(command_state)
token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/)
result = nil
last_state = lex_state
case token
when /^\$/ then
self.lex_state, result = :expr_end, :tGVAR
when /^@@/ then
self.lex_state, result = :expr_end, :tCVAR
when /^@/ then
self.lex_state, result = :expr_end, :tIVAR
else
if token =~ /[!?]$/ then
result = :tFID
else
if lex_state == :expr_fname then
# ident=, not =~ => == or followed by =>
# TODO test lexing of a=>b vs a==>b
if src.scan(/=(?:(?![~>=])|(?==>))/) then
result = :tIDENTIFIER
token << src.matched
end
end
result ||= if token =~ /^[A-Z]/ then
:tCONSTANT
else
:tIDENTIFIER
end
end
unless lex_state == :expr_dot then
# See if it is a reserved word.
keyword = RubyParser::Keyword.keyword token
if keyword then
state = lex_state
self.lex_state = keyword.state
self.yacc_value = [token, src.lineno]
if state == :expr_fname then
self.yacc_value = keyword.name
return keyword.id0
end
if keyword.id0 == :kDO then
self.command_start = true
return :kDO_COND if cond.is_in_state
return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
return :kDO_BLOCK if state == :expr_endarg
return :kDO
end
return keyword.id0 if state == :expr_beg or state == :expr_value
self.lex_state = :expr_beg if keyword.id0 != keyword.id1
return keyword.id1
end
end
if (lex_state == :expr_beg || lex_state == :expr_mid ||
lex_state == :expr_dot || lex_state == :expr_arg ||
lex_state == :expr_cmdarg) then
if command_state then
self.lex_state = :expr_cmdarg
else
self.lex_state = :expr_arg
end
else
self.lex_state = :expr_end
end
end
self.yacc_value = token
self.lex_state = :expr_end if
last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
return result
end
def yylex_string # 23 lines
token = if lex_strterm[0] == :heredoc then
self.heredoc lex_strterm
else
self.parse_string lex_strterm
end
if token == :tSTRING_END || token == :tREGEXP_END then
self.lineno = nil
self.lex_strterm = nil
self.lex_state = :expr_end
end
return token
end
end
Something went wrong with that request. Please try again.