From 516529e98c260771f8cf39b7988e21847a02854d Mon Sep 17 00:00:00 2001 From: azjezz Date: Thu, 1 Dec 2022 03:32:50 +0100 Subject: [PATCH] feat: make parser, and lexer immutable Signed-off-by: azjezz --- bin/snapshot.rs | 13 +- build.rs | 32 +- src/lexer/mod.rs | 657 ++++++++--------- src/lexer/state.rs | 80 +++ src/main.rs | 4 +- src/parser/block.rs | 15 +- src/parser/classish.rs | 176 ++--- src/parser/classish_statement.rs | 225 +++--- src/parser/comments.rs | 30 - src/parser/flags.rs | 61 +- src/parser/functions.rs | 62 +- src/parser/ident.rs | 55 +- src/parser/macros.rs | 68 +- src/parser/mod.rs | 1132 +++++++++++++++--------------- src/parser/params.rs | 63 +- src/parser/punc.rs | 35 +- src/parser/state.rs | 59 ++ src/parser/vars.rs | 19 +- tests/third_party_tests.rs | 2 +- 19 files changed, 1428 insertions(+), 1360 deletions(-) create mode 100644 src/lexer/state.rs delete mode 100644 src/parser/comments.rs create mode 100644 src/parser/state.rs diff --git a/bin/snapshot.rs b/bin/snapshot.rs index 378af610..a738c5d0 100644 --- a/bin/snapshot.rs +++ b/bin/snapshot.rs @@ -3,6 +3,9 @@ use std::env; use std::fs::read_dir; use std::path::PathBuf; +static PARSER: Parser = Parser::new(); +static LEXER: Lexer = Lexer::new(); + fn main() { let manifest = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); let mut entries = read_dir(manifest.join("tests")) @@ -14,10 +17,6 @@ fn main() { entries.sort(); - let mut content = String::new(); - content.push_str("/// this file is auto-generated by the build script.\n"); - content.push_str("/// you should never manually change it.\n\n\n"); - for entry in entries { let code_filename = entry.join("code.php"); let ast_filename = entry.join("ast.txt"); @@ -46,8 +45,7 @@ fn main() { } let code = std::fs::read_to_string(&code_filename).unwrap(); - let mut lexer = Lexer::new(); - let tokens = lexer.tokenize(code.as_bytes()); + let tokens = LEXER.tokenize(code.as_bytes()); match tokens { Ok(tokens) => { @@ -57,8 +55,7 @@ fn main() { entry.to_string_lossy() ); - let mut parser = Parser::new(None); - let ast = parser.parse(tokens); + let ast = PARSER.parse(tokens); match ast { Ok(ast) => { std::fs::write(ast_filename, format!("{:#?}\n", ast)).unwrap(); diff --git a/build.rs b/build.rs index a4d3e13e..2e173c09 100644 --- a/build.rs +++ b/build.rs @@ -26,7 +26,11 @@ fn main() { let mut content = String::new(); content.push_str("/// this file is auto-generated by the build script.\n"); - content.push_str("/// you should never manually change it.\n\n\n"); + content.push_str("/// you should never manually change it.\n\n"); + content.push_str("use php_parser_rs::prelude::{Lexer, Parser};\n"); + content.push_str("use pretty_assertions::assert_str_eq;\n\n"); + content.push_str("static PARSER: Parser = Parser::new();\n"); + content.push_str("static LEXER: Lexer = Lexer::new();\n\n"); for entry in entries { let code_filename = entry.join("code.php"); @@ -98,10 +102,6 @@ fn build_success_test( format!( r#"#[test] fn test_success_{}() {{ - use php_parser_rs::prelude::Parser; - use php_parser_rs::prelude::Lexer; - use pretty_assertions::assert_str_eq; - let code_filename = "{}"; let ast_filename = "{}"; let tokens_filename = "{}"; @@ -110,13 +110,11 @@ fn test_success_{}() {{ let expected_ast = std::fs::read_to_string(&ast_filename).unwrap(); let expected_tokens = std::fs::read_to_string(&tokens_filename).unwrap(); - let mut lexer = Lexer::new(); - let tokens = lexer.tokenize(code.as_bytes()).unwrap(); + let tokens = LEXER.tokenize(code.as_bytes()).unwrap(); assert_str_eq!(expected_tokens.trim(), format!("{{:#?}}", tokens)); - let mut parser = Parser::new(None); - let ast = parser.parse(tokens).unwrap(); + let ast = PARSER.parse(tokens).unwrap(); assert_str_eq!(expected_ast.trim(), format!("{{:#?}}", ast)); }} @@ -137,17 +135,13 @@ fn build_lexer_error_test( format!( r#"#[test] fn test_lexer_error_{}() {{ - use php_parser_rs::prelude::Lexer; - use pretty_assertions::assert_str_eq; - let code_filename = "{}"; let lexer_error_filename = "{}"; let code = std::fs::read_to_string(&code_filename).unwrap(); let expected_error = std::fs::read_to_string(&lexer_error_filename).unwrap(); - let mut lexer = Lexer::new(); - let error = lexer.tokenize(code.as_bytes()).err().unwrap(); + let error = LEXER.tokenize(code.as_bytes()).err().unwrap(); assert_str_eq!( expected_error.trim(), @@ -171,10 +165,6 @@ fn build_parser_error_test( format!( r#"#[test] fn test_paser_error_{}() {{ - use php_parser_rs::prelude::Parser; - use php_parser_rs::prelude::Lexer; - use pretty_assertions::assert_str_eq; - let code_filename = "{}"; let tokens_filename = "{}"; let parser_error_filename = "{}"; @@ -183,13 +173,11 @@ fn test_paser_error_{}() {{ let expected_tokens = std::fs::read_to_string(&tokens_filename).unwrap(); let expected_error = std::fs::read_to_string(&parser_error_filename).unwrap(); - let mut lexer = Lexer::new(); - let tokens = lexer.tokenize(code.as_bytes()).unwrap(); + let tokens = LEXER.tokenize(code.as_bytes()).unwrap(); assert_str_eq!(expected_tokens.trim(), format!("{{:#?}}", tokens)); - let mut parser = Parser::new(None); - let error = parser.parse(tokens).err().unwrap(); + let error = PARSER.parse(tokens).err().unwrap(); assert_str_eq!( expected_error.trim(), diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 46dc993b..de47b286 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -1,12 +1,16 @@ pub mod byte_string; pub mod error; -mod macros; pub mod token; +mod macros; +mod state; + use std::num::IntErrorKind; use crate::lexer::byte_string::ByteString; use crate::lexer::error::SyntaxError; +use crate::lexer::state::StackState; +use crate::lexer::state::State; use crate::lexer::token::OpenTagKind; use crate::lexer::token::Span; use crate::lexer::token::Token; @@ -15,104 +19,71 @@ use crate::lexer::token::TokenKind; use crate::ident; use crate::ident_start; -#[derive(Debug, PartialEq, Eq)] -pub enum LexerState { - Initial, - Scripting, - Halted, - DoubleQuote, - LookingForVarname, - LookingForProperty, - VarOffset, -} - -pub struct Lexer { - state_stack: Vec, - chars: Vec, - cursor: usize, - current: Option, - span: Span, -} - -impl Default for Lexer { - fn default() -> Self { - Self::new() - } -} +#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)] +pub struct Lexer; impl Lexer { - pub fn new() -> Self { - Self { - state_stack: vec![LexerState::Initial], - chars: Vec::new(), - cursor: 0, - current: None, - span: (1, 1), - } + pub const fn new() -> Self { + Self {} } - pub fn tokenize>( - &mut self, - input: &B, - ) -> Result, SyntaxError> { + pub fn tokenize>(&self, input: &B) -> Result, SyntaxError> { + let mut state = State::new(input); let mut tokens = Vec::new(); - self.chars = input.as_ref().to_vec(); - self.current = self.chars.first().copied(); - - while self.current.is_some() { - match self.state_stack.last().unwrap() { + while state.current.is_some() { + match state.stack.last().unwrap() { // The "Initial" state is used to parse inline HTML. It is essentially a catch-all // state that will build up a single token buffer until it encounters an open tag // of some description. - LexerState::Initial => { - tokens.append(&mut self.initial()?); + StackState::Initial => { + tokens.append(&mut self.initial(&mut state)?); } // The scripting state is entered when an open tag is encountered in the source code. // This tells the lexer to start analysing characters at PHP tokens instead of inline HTML. - LexerState::Scripting => { - self.skip_whitespace(); + StackState::Scripting => { + self.skip_whitespace(&mut state); // If we have consumed whitespace and then reached the end of the file, we should break. - if self.current.is_none() { + if state.current.is_none() { break; } - tokens.push(self.scripting()?); + tokens.push(self.scripting(&mut state)?); } // The "Halted" state is entered when the `__halt_compiler` token is encountered. // In this state, all the text that follows is no longer parsed as PHP as is collected // into a single "InlineHtml" token (kind of cheating, oh well). - LexerState::Halted => { + StackState::Halted => { tokens.push(Token { - kind: TokenKind::InlineHtml(self.chars[self.cursor..].into()), - span: self.span, + kind: TokenKind::InlineHtml(state.chars[state.cursor..].into()), + span: state.span, }); break; } // The double quote state is entered when inside a double-quoted string that // contains variables. - LexerState::DoubleQuote => tokens.extend(self.double_quote()?), + StackState::DoubleQuote => tokens.extend(self.double_quote(&mut state)?), // LookingForProperty is entered inside double quotes, // backticks, or a heredoc, expecting a variable name. // If one isn't found, it switches to scripting. - LexerState::LookingForVarname => { - if let Some(token) = self.looking_for_varname() { + StackState::LookingForVarname => { + if let Some(token) = self.looking_for_varname(&mut state) { tokens.push(token); } } // LookingForProperty is entered inside double quotes, // backticks, or a heredoc, expecting an arrow followed by a // property name. - LexerState::LookingForProperty => { - tokens.push(self.looking_for_property()?); + StackState::LookingForProperty => { + tokens.push(self.looking_for_property(&mut state)?); } - LexerState::VarOffset => { - if self.current.is_none() { + StackState::VarOffset => { + if state.current.is_none() { break; } - tokens.push(self.var_offset()?); + tokens.push(self.var_offset(&mut state)?); } } } @@ -120,21 +91,21 @@ impl Lexer { Ok(tokens) } - fn skip_whitespace(&mut self) { - while let Some(b' ' | b'\n' | b'\r' | b'\t') = self.current { - self.next(); + fn skip_whitespace(&self, state: &mut State) { + while let Some(b' ' | b'\n' | b'\r' | b'\t') = state.current { + state.next(); } } - fn initial(&mut self) -> Result, SyntaxError> { - let inline_span = self.span; + fn initial(&self, state: &mut State) -> Result, SyntaxError> { + let inline_span = state.span; let mut buffer = Vec::new(); - while let Some(char) = self.current { - if self.try_read(b" Result { - let span = self.span; - let kind = match self.peek_buf() { + fn scripting(&self, state: &mut State) -> Result { + let span = state.span; + let kind = match state.peek_buf() { [b'@', ..] => { - self.next(); + state.next(); TokenKind::At } [b'!', b'=', b'=', ..] => { - self.skip(3); + state.skip(3); TokenKind::BangDoubleEquals } [b'!', b'=', ..] => { - self.skip(2); + state.skip(2); TokenKind::BangEquals } [b'!', ..] => { - self.next(); + state.next(); TokenKind::Bang } [b'&', b'&', ..] => { - self.skip(2); + state.skip(2); TokenKind::BooleanAnd } [b'&', b'=', ..] => { - self.skip(2); + state.skip(2); TokenKind::AmpersandEquals } [b'&', ..] => { - self.next(); + state.next(); TokenKind::Ampersand } [b'?', b'>', ..] => { // This is a close tag, we can enter "Initial" mode again. - self.skip(2); + state.skip(2); - self.enter_state(LexerState::Initial); + state.enter_state(StackState::Initial); TokenKind::CloseTag } [b'?', b'?', b'=', ..] => { - self.skip(3); + state.skip(3); TokenKind::CoalesceEqual } [b'?', b'?', ..] => { - self.skip(2); + state.skip(2); TokenKind::Coalesce } [b'?', b':', ..] => { - self.skip(2); + state.skip(2); TokenKind::QuestionColon } [b'?', b'-', b'>', ..] => { - self.skip(3); + state.skip(3); TokenKind::NullsafeArrow } [b'?', ..] => { - self.next(); + state.next(); TokenKind::Question } [b'=', b'>', ..] => { - self.skip(2); + state.skip(2); TokenKind::DoubleArrow } [b'=', b'=', b'=', ..] => { - self.skip(3); + state.skip(3); TokenKind::TripleEquals } [b'=', b'=', ..] => { - self.skip(2); + state.skip(2); TokenKind::DoubleEquals } [b'=', ..] => { - self.next(); + state.next(); TokenKind::Equals } // Single quoted string. [b'\'', ..] => { - self.next(); - self.tokenize_single_quote_string()? + state.next(); + self.tokenize_single_quote_string(state)? } [b'b' | b'B', b'\'', ..] => { - self.skip(2); - self.tokenize_single_quote_string()? + state.skip(2); + self.tokenize_single_quote_string(state)? } [b'"', ..] => { - self.next(); - self.tokenize_double_quote_string()? + state.next(); + self.tokenize_double_quote_string(state)? } [b'b' | b'B', b'"', ..] => { - self.skip(2); - self.tokenize_double_quote_string()? + state.skip(2); + self.tokenize_double_quote_string(state)? } [b'$', ident_start!(), ..] => { - self.next(); - self.tokenize_variable() + state.next(); + self.tokenize_variable(state) } [b'$', ..] => { - self.next(); + state.next(); TokenKind::Dollar } [b'.', b'=', ..] => { - self.skip(2); + state.skip(2); TokenKind::DotEquals } - [b'.', b'0'..=b'9', ..] => self.tokenize_number()?, + [b'.', b'0'..=b'9', ..] => self.tokenize_number(state)?, [b'.', b'.', b'.', ..] => { - self.skip(3); + state.skip(3); TokenKind::Ellipsis } [b'.', ..] => { - self.next(); + state.next(); TokenKind::Dot } - &[b'0'..=b'9', ..] => self.tokenize_number()?, + &[b'0'..=b'9', ..] => self.tokenize_number(state)?, &[b'\\', ident_start!(), ..] => { - self.next(); + state.next(); - match self.scripting()? { + match self.scripting(state)? { Token { kind: TokenKind::Identifier(ByteString(mut i)) @@ -295,19 +266,19 @@ impl Lexer { } } [b'\\', ..] => { - self.next(); + state.next(); TokenKind::NamespaceSeparator } &[b @ ident_start!(), ..] => { - self.next(); + state.next(); let mut qualified = false; let mut last_was_slash = false; let mut buffer = vec![b]; - while let Some(next) = self.current { + while let Some(next) = state.current { if next.is_ascii_alphanumeric() || next == b'_' { buffer.push(next); - self.next(); + state.next(); last_was_slash = false; continue; } @@ -316,7 +287,7 @@ impl Lexer { qualified = true; last_was_slash = true; buffer.push(next); - self.next(); + state.next(); continue; } @@ -330,12 +301,12 @@ impl Lexer { .unwrap_or_else(|| TokenKind::Identifier(buffer.into())); if kind == TokenKind::HaltCompiler { - match self.peek_buf() { + match state.peek_buf() { [b'(', b')', b';', ..] => { - self.skip(3); - self.enter_state(LexerState::Halted); + state.skip(3); + state.enter_state(StackState::Halted); } - _ => return Err(SyntaxError::InvalidHaltCompiler(self.span)), + _ => return Err(SyntaxError::InvalidHaltCompiler(state.span)), } } @@ -343,24 +314,24 @@ impl Lexer { } } [b'/', b'*', ..] => { - self.next(); + state.next(); let mut buffer = vec![b'/']; - while self.current.is_some() { - match self.peek_buf() { + while state.current.is_some() { + match state.peek_buf() { [b'*', b'/', ..] => { - self.skip(2); + state.skip(2); buffer.extend_from_slice(b"*/"); break; } &[t, ..] => { - self.next(); + state.next(); buffer.push(t); } [] => unreachable!(), } } - self.next(); + state.next(); if buffer.starts_with(b"/**") { TokenKind::DocComment(buffer.into()) @@ -369,287 +340,287 @@ impl Lexer { } } [b'#', b'[', ..] => { - self.skip(2); + state.skip(2); TokenKind::Attribute } &[ch @ b'/', b'/', ..] | &[ch @ b'#', ..] => { let mut buffer = if ch == b'/' { - self.skip(2); + state.skip(2); b"//".to_vec() } else { - self.next(); + state.next(); b"#".to_vec() }; - while let Some(c) = self.current { + while let Some(c) = state.current { if c == b'\n' { break; } buffer.push(c); - self.next(); + state.next(); } - self.next(); + state.next(); TokenKind::Comment(buffer.into()) } [b'/', b'=', ..] => { - self.skip(2); + state.skip(2); TokenKind::SlashEquals } [b'/', ..] => { - self.next(); + state.next(); TokenKind::Slash } [b'*', b'*', b'=', ..] => { - self.skip(3); + state.skip(3); TokenKind::PowEquals } [b'*', b'*', ..] => { - self.skip(2); + state.skip(2); TokenKind::Pow } [b'*', b'=', ..] => { - self.skip(2); + state.skip(2); TokenKind::AsteriskEqual } [b'*', ..] => { - self.next(); + state.next(); TokenKind::Asterisk } [b'|', b'|', ..] => { - self.skip(2); + state.skip(2); TokenKind::Pipe } [b'|', b'=', ..] => { - self.skip(2); + state.skip(2); TokenKind::PipeEquals } [b'|', ..] => { - self.next(); + state.next(); TokenKind::Pipe } [b'^', b'=', ..] => { - self.skip(2); + state.skip(2); TokenKind::CaretEquals } [b'^', ..] => { - self.next(); + state.next(); TokenKind::Caret } [b'{', ..] => { - self.next(); - self.push_state(LexerState::Scripting); + state.next(); + state.push_state(StackState::Scripting); TokenKind::LeftBrace } [b'}', ..] => { - self.next(); - self.pop_state(); + state.next(); + state.pop_state(); TokenKind::RightBrace } [b'(', ..] => { - self.next(); + state.next(); - if self.try_read(b"int)") { - self.skip(4); + if state.try_read(b"int)") { + state.skip(4); TokenKind::IntCast - } else if self.try_read(b"integer)") { - self.skip(8); + } else if state.try_read(b"integer)") { + state.skip(8); TokenKind::IntegerCast - } else if self.try_read(b"bool)") { - self.skip(5); + } else if state.try_read(b"bool)") { + state.skip(5); TokenKind::BoolCast - } else if self.try_read(b"boolean)") { - self.skip(8); + } else if state.try_read(b"boolean)") { + state.skip(8); TokenKind::BooleanCast - } else if self.try_read(b"float)") { - self.skip(6); + } else if state.try_read(b"float)") { + state.skip(6); TokenKind::FloatCast - } else if self.try_read(b"double)") { - self.skip(7); + } else if state.try_read(b"double)") { + state.skip(7); TokenKind::DoubleCast - } else if self.try_read(b"real)") { - self.skip(5); + } else if state.try_read(b"real)") { + state.skip(5); TokenKind::RealCast - } else if self.try_read(b"string)") { - self.skip(7); + } else if state.try_read(b"string)") { + state.skip(7); TokenKind::StringCast - } else if self.try_read(b"binary)") { - self.skip(7); + } else if state.try_read(b"binary)") { + state.skip(7); TokenKind::BinaryCast - } else if self.try_read(b"array)") { - self.skip(6); + } else if state.try_read(b"array)") { + state.skip(6); TokenKind::ArrayCast - } else if self.try_read(b"object)") { - self.skip(7); + } else if state.try_read(b"object)") { + state.skip(7); TokenKind::ObjectCast - } else if self.try_read(b"unset)") { - self.skip(6); + } else if state.try_read(b"unset)") { + state.skip(6); TokenKind::UnsetCast } else { TokenKind::LeftParen } } [b')', ..] => { - self.next(); + state.next(); TokenKind::RightParen } [b';', ..] => { - self.next(); + state.next(); TokenKind::SemiColon } [b'+', b'+', ..] => { - self.skip(2); + state.skip(2); TokenKind::Increment } [b'+', b'=', ..] => { - self.skip(2); + state.skip(2); TokenKind::PlusEquals } [b'+', ..] => { - self.next(); + state.next(); TokenKind::Plus } [b'%', b'=', ..] => { - self.skip(2); + state.skip(2); TokenKind::PercentEquals } [b'%', ..] => { - self.next(); + state.next(); TokenKind::Percent } [b'-', b'-', ..] => { - self.skip(2); + state.skip(2); TokenKind::Decrement } [b'-', b'>', ..] => { - self.skip(2); + state.skip(2); TokenKind::Arrow } [b'-', b'=', ..] => { - self.skip(2); + state.skip(2); TokenKind::MinusEquals } [b'-', ..] => { - self.next(); + state.next(); TokenKind::Minus } [b'<', b'<', b'<', ..] => { // TODO: Handle both heredocs and nowdocs. - self.skip(3); + state.skip(3); todo!("heredocs & nowdocs"); } [b'<', b'<', b'=', ..] => { - self.skip(3); + state.skip(3); TokenKind::LeftShiftEquals } [b'<', b'<', ..] => { - self.skip(2); + state.skip(2); TokenKind::LeftShift } [b'<', b'=', b'>', ..] => { - self.skip(3); + state.skip(3); TokenKind::Spaceship } [b'<', b'=', ..] => { - self.skip(2); + state.skip(2); TokenKind::LessThanEquals } [b'<', b'>', ..] => { - self.skip(2); + state.skip(2); TokenKind::AngledLeftRight } [b'<', ..] => { - self.next(); + state.next(); TokenKind::LessThan } [b'>', b'>', b'=', ..] => { - self.skip(3); + state.skip(3); TokenKind::RightShiftEquals } [b'>', b'>', ..] => { - self.skip(2); + state.skip(2); TokenKind::RightShift } [b'>', b'=', ..] => { - self.skip(2); + state.skip(2); TokenKind::GreaterThanEquals } [b'>', ..] => { - self.next(); + state.next(); TokenKind::GreaterThan } [b',', ..] => { - self.next(); + state.next(); TokenKind::Comma } [b'[', ..] => { - self.next(); + state.next(); TokenKind::LeftBracket } [b']', ..] => { - self.next(); + state.next(); TokenKind::RightBracket } [b':', b':', ..] => { - self.skip(2); + state.skip(2); TokenKind::DoubleColon } [b':', ..] => { - self.next(); + state.next(); TokenKind::Colon } &[b'~', ..] => { - self.next(); + state.next(); TokenKind::BitwiseNot } &[b, ..] => unimplemented!( " char: {}, line: {}, col: {}", b as char, - self.span.0, - self.span.1 + state.span.0, + state.span.1 ), // We should never reach this point since we have the empty checks surrounding // the call to this function, but it's better to be safe than sorry. - [] => return Err(SyntaxError::UnexpectedEndOfFile(self.span)), + [] => return Err(SyntaxError::UnexpectedEndOfFile(state.span)), }; Ok(Token { kind, span }) } - fn double_quote(&mut self) -> Result, SyntaxError> { - let span = self.span; + fn double_quote(&self, state: &mut State) -> Result, SyntaxError> { + let span = state.span; let mut buffer = Vec::new(); let kind = loop { - match self.peek_buf() { + match state.peek_buf() { [b'$', b'{', ..] => { - self.skip(2); - self.push_state(LexerState::LookingForVarname); + state.skip(2); + state.push_state(StackState::LookingForVarname); break TokenKind::DollarLeftBrace; } [b'{', b'$', ..] => { // Intentionally only consume the left brace. - self.next(); - self.push_state(LexerState::Scripting); + state.next(); + state.push_state(StackState::Scripting); break TokenKind::LeftBrace; } [b'"', ..] => { - self.next(); - self.enter_state(LexerState::Scripting); + state.next(); + state.enter_state(StackState::Scripting); break TokenKind::DoubleQuote; } [b'$', ident_start!(), ..] => { - self.next(); - let ident = self.consume_identifier(); + state.next(); + let ident = self.consume_identifier(state); - match self.peek_buf() { - [b'[', ..] => self.push_state(LexerState::VarOffset), + match state.peek_buf() { + [b'[', ..] => state.push_state(StackState::VarOffset), [b'-', b'>', ident_start!(), ..] | [b'?', b'-', b'>', ident_start!(), ..] => { - self.push_state(LexerState::LookingForProperty) + state.push_state(StackState::LookingForProperty) } _ => {} } @@ -657,10 +628,10 @@ impl Lexer { break TokenKind::Variable(ident.into()); } &[b, ..] => { - self.next(); + state.next(); buffer.push(b); } - [] => return Err(SyntaxError::UnexpectedEndOfFile(self.span)), + [] => return Err(SyntaxError::UnexpectedEndOfFile(state.span)), } }; @@ -676,13 +647,15 @@ impl Lexer { Ok(tokens) } - fn looking_for_varname(&mut self) -> Option { - if let Some(ident) = self.peek_identifier() { - if let Some(b'[' | b'}') = self.peek_byte(ident.len()) { + fn looking_for_varname(&self, state: &mut State) -> Option { + let identifier = self.peek_identifier(state); + + if let Some(ident) = identifier { + if let Some(b'[' | b'}') = state.peek_byte(ident.len()) { let ident = ident.to_vec(); - let span = self.span; - self.skip(ident.len()); - self.enter_state(LexerState::Scripting); + let span = state.span; + state.skip(ident.len()); + state.enter_state(StackState::Scripting); return Some(Token { kind: TokenKind::Identifier(ident.into()), span, @@ -690,24 +663,24 @@ impl Lexer { } } - self.enter_state(LexerState::Scripting); + state.enter_state(StackState::Scripting); None } - fn looking_for_property(&mut self) -> Result { - let span = self.span; - let kind = match self.peek_buf() { + fn looking_for_property(&self, state: &mut State) -> Result { + let span = state.span; + let kind = match state.peek_buf() { [b'-', b'>', ..] => { - self.skip(2); + state.skip(2); TokenKind::Arrow } [b'?', b'-', b'>', ..] => { - self.skip(3); + state.skip(3); TokenKind::NullsafeArrow } &[ident_start!(), ..] => { - let buffer = self.consume_identifier(); - self.pop_state(); + let buffer = self.consume_identifier(state); + state.pop_state(); TokenKind::Identifier(buffer.into()) } // Should be impossible as we already looked ahead this far inside double_quote. @@ -716,114 +689,114 @@ impl Lexer { Ok(Token { kind, span }) } - fn var_offset(&mut self) -> Result { - let span = self.span; - let kind = match self.peek_buf() { + fn var_offset(&self, state: &mut State) -> Result { + let span = state.span; + let kind = match state.peek_buf() { [b'$', ident_start!(), ..] => { - self.next(); - self.tokenize_variable() + state.next(); + self.tokenize_variable(state) } &[b'0'..=b'9', ..] => { // TODO: all integer literals are allowed, but only decimal integers with no underscores // are actually treated as numbers. Others are treated as strings. // Float literals are not allowed, but that could be handled in the parser. - self.tokenize_number()? + self.tokenize_number(state)? } [b'[', ..] => { - self.next(); + state.next(); TokenKind::LeftBracket } [b'-', ..] => { - self.next(); + state.next(); TokenKind::Minus } [b']', ..] => { - self.next(); - self.pop_state(); + state.next(); + state.pop_state(); TokenKind::RightBracket } &[ident_start!(), ..] => { - let label = self.consume_identifier(); + let label = self.consume_identifier(state); TokenKind::Identifier(label.into()) } &[b, ..] => unimplemented!( " char: {}, line: {}, col: {}", b as char, - self.span.0, - self.span.1 + state.span.0, + state.span.1 ), - [] => return Err(SyntaxError::UnexpectedEndOfFile(self.span)), + [] => return Err(SyntaxError::UnexpectedEndOfFile(state.span)), }; Ok(Token { kind, span }) } - fn tokenize_single_quote_string(&mut self) -> Result { + fn tokenize_single_quote_string(&self, state: &mut State) -> Result { let mut buffer = Vec::new(); loop { - match self.peek_buf() { + match state.peek_buf() { [b'\'', ..] => { - self.next(); + state.next(); break; } &[b'\\', b @ b'\'' | b @ b'\\', ..] => { - self.skip(2); + state.skip(2); buffer.push(b); } &[b, ..] => { - self.next(); + state.next(); buffer.push(b); } - [] => return Err(SyntaxError::UnexpectedEndOfFile(self.span)), + [] => return Err(SyntaxError::UnexpectedEndOfFile(state.span)), } } Ok(TokenKind::LiteralString(buffer.into())) } - fn tokenize_double_quote_string(&mut self) -> Result { + fn tokenize_double_quote_string(&self, state: &mut State) -> Result { let mut buffer = Vec::new(); let constant = loop { - match self.peek_buf() { + match state.peek_buf() { [b'"', ..] => { - self.next(); + state.next(); break true; } &[b'\\', b @ (b'"' | b'\\' | b'$'), ..] => { - self.skip(2); + state.skip(2); buffer.push(b); } &[b'\\', b'n', ..] => { - self.skip(2); + state.skip(2); buffer.push(b'\n'); } &[b'\\', b'r', ..] => { - self.skip(2); + state.skip(2); buffer.push(b'\r'); } &[b'\\', b't', ..] => { - self.skip(2); + state.skip(2); buffer.push(b'\t'); } &[b'\\', b'v', ..] => { - self.skip(2); + state.skip(2); buffer.push(b'\x0b'); } &[b'\\', b'e', ..] => { - self.skip(2); + state.skip(2); buffer.push(b'\x1b'); } &[b'\\', b'f', ..] => { - self.skip(2); + state.skip(2); buffer.push(b'\x0c'); } &[b'\\', b'x', b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'), ..] => { - self.skip(3); + state.skip(3); let mut hex = String::from(b as char); - if let Some(b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) = self.current { - self.next(); + if let Some(b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) = state.current { + state.next(); hex.push(b as char); } @@ -831,23 +804,23 @@ impl Lexer { buffer.push(b); } &[b'\\', b'u', b'{', ..] => { - self.skip(3); + state.skip(3); let mut code_point = String::new(); - while let Some(b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) = self.current { - self.next(); + while let Some(b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) = state.current { + state.next(); code_point.push(b as char); } - if code_point.is_empty() || self.current != Some(b'}') { - return Err(SyntaxError::InvalidUnicodeEscape(self.span)); + if code_point.is_empty() || state.current != Some(b'}') { + return Err(SyntaxError::InvalidUnicodeEscape(state.span)); } - self.next(); + state.next(); let c = if let Ok(c) = u32::from_str_radix(&code_point, 16) { c } else { - return Err(SyntaxError::InvalidUnicodeEscape(self.span)); + return Err(SyntaxError::InvalidUnicodeEscape(state.span)); }; if let Some(c) = char::from_u32(c) { @@ -855,85 +828,85 @@ impl Lexer { let bytes = c.encode_utf8(&mut tmp); buffer.extend(bytes.as_bytes()); } else { - return Err(SyntaxError::InvalidUnicodeEscape(self.span)); + return Err(SyntaxError::InvalidUnicodeEscape(state.span)); } } &[b'\\', b @ b'0'..=b'7', ..] => { - self.skip(2); + state.skip(2); let mut octal = String::from(b as char); - if let Some(b @ b'0'..=b'7') = self.current { - self.next(); + if let Some(b @ b'0'..=b'7') = state.current { + state.next(); octal.push(b as char); } - if let Some(b @ b'0'..=b'7') = self.current { - self.next(); + if let Some(b @ b'0'..=b'7') = state.current { + state.next(); octal.push(b as char); } if let Ok(b) = u8::from_str_radix(&octal, 8) { buffer.push(b); } else { - return Err(SyntaxError::InvalidOctalEscape(self.span)); + return Err(SyntaxError::InvalidOctalEscape(state.span)); } } [b'$', ident_start!(), ..] | [b'{', b'$', ..] | [b'$', b'{', ..] => { break false; } &[b, ..] => { - self.next(); + state.next(); buffer.push(b); } - [] => return Err(SyntaxError::UnexpectedEndOfFile(self.span)), + [] => return Err(SyntaxError::UnexpectedEndOfFile(state.span)), } }; Ok(if constant { TokenKind::LiteralString(buffer.into()) } else { - self.enter_state(LexerState::DoubleQuote); + state.enter_state(StackState::DoubleQuote); TokenKind::StringPart(buffer.into()) }) } - fn peek_identifier(&self) -> Option<&[u8]> { - let mut cursor = self.cursor; - if let Some(ident_start!()) = self.chars.get(cursor) { + fn peek_identifier<'a>(&'a self, state: &'a State) -> Option<&[u8]> { + let mut cursor = state.cursor; + if let Some(ident_start!()) = state.chars.get(cursor) { cursor += 1; - while let Some(ident!()) = self.chars.get(cursor) { + while let Some(ident!()) = state.chars.get(cursor) { cursor += 1; } - Some(&self.chars[self.cursor..cursor]) + Some(&state.chars[state.cursor..cursor]) } else { None } } - fn consume_identifier(&mut self) -> Vec { - let ident = self.peek_identifier().unwrap().to_vec(); - self.skip(ident.len()); + fn consume_identifier(&self, state: &mut State) -> Vec { + let ident = self.peek_identifier(state).unwrap().to_vec(); + state.skip(ident.len()); ident } - fn tokenize_variable(&mut self) -> TokenKind { - TokenKind::Variable(self.consume_identifier().into()) + fn tokenize_variable(&self, state: &mut State) -> TokenKind { + TokenKind::Variable(self.consume_identifier(state).into()) } - fn tokenize_number(&mut self) -> Result { + fn tokenize_number(&self, state: &mut State) -> Result { let mut buffer = String::new(); - let (base, kind) = match self.peek_buf() { + let (base, kind) = match state.peek_buf() { [b'0', b'B' | b'b', ..] => { - self.skip(2); + state.skip(2); (2, NumberKind::Int) } [b'0', b'O' | b'o', ..] => { - self.skip(2); + state.skip(2); (8, NumberKind::Int) } [b'0', b'X' | b'x', ..] => { - self.skip(2); + state.skip(2); (16, NumberKind::Int) } [b'0', ..] => (10, NumberKind::OctalOrFloat), @@ -942,15 +915,15 @@ impl Lexer { }; if kind != NumberKind::Float { - self.read_digits(&mut buffer, base); + self.read_digits(state, &mut buffer, base); if kind == NumberKind::Int { - return parse_int(&buffer, base as u32, self.span); + return parse_int(&buffer, base as u32, state.span); } } // Remaining cases: decimal integer, legacy octal integer, or float. let is_float = matches!( - self.peek_buf(), + state.peek_buf(), [b'.', ..] | [b'e' | b'E', b'-' | b'+', b'0'..=b'9', ..] | [b'e' | b'E', b'0'..=b'9', ..] @@ -961,55 +934,60 @@ impl Lexer { } else { 10 }; - return parse_int(&buffer, base as u32, self.span); + return parse_int(&buffer, base as u32, state.span); } - if self.current == Some(b'.') { + if state.current == Some(b'.') { buffer.push('.'); - self.next(); - self.read_digits(&mut buffer, 10); + state.next(); + self.read_digits(state, &mut buffer, 10); } - if let Some(b'e' | b'E') = self.current { + if let Some(b'e' | b'E') = state.current { buffer.push('e'); - self.next(); - if let Some(b @ (b'-' | b'+')) = self.current { + state.next(); + if let Some(b @ (b'-' | b'+')) = state.current { buffer.push(b as char); - self.next(); + state.next(); } - self.read_digits(&mut buffer, 10); + self.read_digits(state, &mut buffer, 10); } Ok(TokenKind::LiteralFloat(buffer.parse().unwrap())) } - fn read_digits(&mut self, buffer: &mut String, base: usize) { + fn read_digits(&self, state: &mut State, buffer: &mut String, base: usize) { if base == 16 { - self.read_digits_fn(buffer, u8::is_ascii_hexdigit); + self.read_digits_fn(state, buffer, u8::is_ascii_hexdigit); } else { let max = b'0' + base as u8; - self.read_digits_fn(buffer, |b| (b'0'..max).contains(b)); + self.read_digits_fn(state, buffer, |b| (b'0'..max).contains(b)); }; } - fn read_digits_fn bool>(&mut self, buffer: &mut String, is_digit: F) { - if let Some(b) = self.current { + fn read_digits_fn bool>( + &self, + state: &mut State, + buffer: &mut String, + is_digit: F, + ) { + if let Some(b) = state.current { if is_digit(&b) { - self.next(); + state.next(); buffer.push(b as char); } else { return; } } loop { - match *self.peek_buf() { + match *state.peek_buf() { [b, ..] if is_digit(&b) => { - self.next(); + state.next(); buffer.push(b as char); } [b'_', b, ..] if is_digit(&b) => { - self.next(); - self.next(); + state.next(); + state.next(); buffer.push(b as char); } _ => { @@ -1018,49 +996,6 @@ impl Lexer { } } } - - fn enter_state(&mut self, state: LexerState) { - *self.state_stack.last_mut().unwrap() = state; - } - - fn push_state(&mut self, state: LexerState) { - self.state_stack.push(state); - } - - fn pop_state(&mut self) { - self.state_stack.pop(); - } - - fn peek_buf(&self) -> &[u8] { - &self.chars[self.cursor..] - } - - fn peek_byte(&self, delta: usize) -> Option { - self.chars.get(self.cursor + delta).copied() - } - - fn try_read(&self, search: &'static [u8]) -> bool { - self.peek_buf().starts_with(search) - } - - fn skip(&mut self, count: usize) { - for _ in 0..count { - self.next(); - } - } - - fn next(&mut self) { - match self.current { - Some(b'\n') => { - self.span.0 += 1; - self.span.1 = 1; - } - Some(_) => self.span.1 += 1, - _ => {} - } - self.cursor += 1; - self.current = self.chars.get(self.cursor).copied(); - } } // Parses an integer literal in the given base and converts errors to SyntaxError. diff --git a/src/lexer/state.rs b/src/lexer/state.rs new file mode 100644 index 00000000..59a0ba80 --- /dev/null +++ b/src/lexer/state.rs @@ -0,0 +1,80 @@ +use crate::lexer::token::Span; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum StackState { + Initial, + Scripting, + Halted, + DoubleQuote, + LookingForVarname, + LookingForProperty, + VarOffset, +} + +// TODO(azjezz): make `chars` a `[u8, N]`, and `State`, `State` +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct State { + pub stack: Vec, + pub chars: Vec, + pub cursor: usize, + pub current: Option, + pub span: Span, +} + +impl State { + pub fn new>(input: &B) -> Self { + let chars = input.as_ref().to_vec(); + let current = chars.first().copied(); + + Self { + stack: vec![StackState::Initial], + chars, + current, + cursor: 0, + span: (1, 1), + } + } + + pub fn enter_state(&mut self, state: StackState) { + *self.stack.last_mut().unwrap() = state; + } + + pub fn push_state(&mut self, state: StackState) { + self.stack.push(state); + } + + pub fn pop_state(&mut self) { + self.stack.pop(); + } + + pub fn peek_buf(&self) -> &[u8] { + &self.chars[self.cursor..] + } + + pub fn peek_byte(&self, delta: usize) -> Option { + self.chars.get(self.cursor + delta).copied() + } + + pub fn try_read(&self, search: &'static [u8]) -> bool { + self.peek_buf().starts_with(search) + } + + pub fn skip(&mut self, count: usize) { + for _ in 0..count { + self.next(); + } + } + + pub fn next(&mut self) { + match self.current { + Some(b'\n') => { + self.span.0 += 1; + self.span.1 = 1; + } + Some(_) => self.span.1 += 1, + _ => {} + } + self.cursor += 1; + self.current = self.chars.get(self.cursor).copied(); + } +} diff --git a/src/main.rs b/src/main.rs index 676b7437..bd54b47d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,7 +20,7 @@ fn main() { } }; - let mut lexer = Lexer::new(); + let lexer = Lexer::new(); let tokens = match lexer.tokenize(contents.as_bytes()) { Ok(tokens) => tokens, Err(error) => { @@ -30,7 +30,7 @@ fn main() { } }; - let mut parser = Parser::new(None); + let parser = Parser::new(); let ast = match parser.parse(tokens) { Ok(ast) => ast, Err(error) => { diff --git a/src/parser/block.rs b/src/parser/block.rs index d1d4a3db..af388651 100644 --- a/src/parser/block.rs +++ b/src/parser/block.rs @@ -1,17 +1,22 @@ use crate::lexer::token::TokenKind; use crate::parser::ast::Block; use crate::parser::error::ParseResult; +use crate::parser::state::State; use crate::parser::Parser; impl Parser { - pub(in crate::parser) fn block(&mut self, until: &TokenKind) -> ParseResult { - self.skip_comments(); + pub(in crate::parser) fn block( + &self, + state: &mut State, + until: &TokenKind, + ) -> ParseResult { + state.skip_comments(); let mut block = Block::new(); - while !self.is_eof() && &self.current.kind != until { - block.push(self.statement()?); - self.skip_comments(); + while !state.is_eof() && &state.current.kind != until { + block.push(self.statement(state)?); + state.skip_comments(); } Ok(block) diff --git a/src/parser/classish.rs b/src/parser/classish.rs index a7c774e8..7dd5f033 100644 --- a/src/parser/classish.rs +++ b/src/parser/classish.rs @@ -6,49 +6,50 @@ use crate::parser::ast::Expression; use crate::parser::ast::Identifier; use crate::parser::ast::Statement; use crate::parser::error::ParseResult; +use crate::parser::state::State; use crate::parser::Parser; use crate::expect_token; use crate::expected_token_err; impl Parser { - pub(in crate::parser) fn class_definition(&mut self) -> ParseResult { - let flags: Vec = self.class_flags()?.iter().map(|f| f.into()).collect(); + pub(in crate::parser) fn class_definition(&self, state: &mut State) -> ParseResult { + let flags: Vec = self.class_flags(state)?.iter().map(|f| f.into()).collect(); - expect_token!([TokenKind::Class], self, ["`class`"]); + expect_token!([TokenKind::Class], state, ["`class`"]); - let name = self.ident()?; + let name = self.ident(state)?; let mut extends: Option = None; - if self.current.kind == TokenKind::Extends { - self.next(); - extends = Some(self.full_name()?.into()); + if state.current.kind == TokenKind::Extends { + state.next(); + extends = Some(self.full_name(state)?.into()); } - let implements = if self.current.kind == TokenKind::Implements { - self.next(); + let implements = if state.current.kind == TokenKind::Implements { + state.next(); - self.at_least_one_comma_separated::(&|parser| { - Ok(parser.full_name()?.into()) + self.at_least_one_comma_separated::(state, &|parser, state| { + Ok(parser.full_name(state)?.into()) })? } else { Vec::new() }; - self.lbrace()?; + self.lbrace(state)?; let mut body = Vec::new(); - while self.current.kind != TokenKind::RightBrace { - self.gather_comments(); + while state.current.kind != TokenKind::RightBrace { + state.gather_comments(); - if self.current.kind == TokenKind::RightBrace { - self.clear_comments(); + if state.current.kind == TokenKind::RightBrace { + state.clear_comments(); break; } - body.push(self.class_statement(flags.clone())?); + body.push(self.class_statement(state, flags.clone())?); } - self.rbrace()?; + self.rbrace(state)?; Ok(Statement::Class { name: name.into(), @@ -59,35 +60,38 @@ impl Parser { }) } - pub(in crate::parser) fn interface_definition(&mut self) -> ParseResult { - expect_token!([TokenKind::Interface], self, ["`interface`"]); + pub(in crate::parser) fn interface_definition( + &self, + state: &mut State, + ) -> ParseResult { + expect_token!([TokenKind::Interface], state, ["`interface`"]); - let name = self.ident()?; + let name = self.ident(state)?; - let extends = if self.current.kind == TokenKind::Extends { - self.next(); + let extends = if state.current.kind == TokenKind::Extends { + state.next(); - self.at_least_one_comma_separated::(&|parser| { - Ok(parser.full_name()?.into()) + self.at_least_one_comma_separated::(state, &|parser, state| { + Ok(parser.full_name(state)?.into()) })? } else { Vec::new() }; - self.lbrace()?; + self.lbrace(state)?; let mut body = Vec::new(); - while self.current.kind != TokenKind::RightBrace && !self.is_eof() { - self.gather_comments(); + while state.current.kind != TokenKind::RightBrace && !state.is_eof() { + state.gather_comments(); - if self.current.kind == TokenKind::RightBrace { - self.clear_comments(); + if state.current.kind == TokenKind::RightBrace { + state.clear_comments(); break; } - body.push(self.interface_statement()?); + body.push(self.interface_statement(state)?); } - self.rbrace()?; + self.rbrace(state)?; Ok(Statement::Interface { name: name.into(), @@ -96,25 +100,25 @@ impl Parser { }) } - pub(in crate::parser) fn trait_definition(&mut self) -> ParseResult { - expect_token!([TokenKind::Trait], self, ["`trait`"]); + pub(in crate::parser) fn trait_definition(&self, state: &mut State) -> ParseResult { + expect_token!([TokenKind::Trait], state, ["`trait`"]); - let name = self.ident()?; + let name = self.ident(state)?; - self.lbrace()?; + self.lbrace(state)?; let mut body = Vec::new(); - while self.current.kind != TokenKind::RightBrace && !self.is_eof() { - self.gather_comments(); + while state.current.kind != TokenKind::RightBrace && !state.is_eof() { + state.gather_comments(); - if self.current.kind == TokenKind::RightBrace { - self.clear_comments(); + if state.current.kind == TokenKind::RightBrace { + state.clear_comments(); break; } - body.push(self.trait_statement()?); + body.push(self.trait_statement(state)?); } - self.rbrace()?; + self.rbrace(state)?; Ok(Statement::Trait { name: name.into(), @@ -122,47 +126,50 @@ impl Parser { }) } - pub(in crate::parser) fn anonymous_class_definition(&mut self) -> ParseResult { - self.next(); + pub(in crate::parser) fn anonymous_class_definition( + &self, + state: &mut State, + ) -> ParseResult { + state.next(); - expect_token!([TokenKind::Class], self, ["`class`"]); + expect_token!([TokenKind::Class], state, ["`class`"]); let mut args = vec![]; - if self.current.kind == TokenKind::LeftParen { - self.lparen()?; + if state.current.kind == TokenKind::LeftParen { + self.lparen(state)?; - args = self.args_list()?; + args = self.args_list(state)?; - self.rparen()?; + self.rparen(state)?; } let mut extends: Option = None; - if self.current.kind == TokenKind::Extends { - self.next(); - extends = Some(self.full_name()?.into()); + if state.current.kind == TokenKind::Extends { + state.next(); + extends = Some(self.full_name(state)?.into()); } let mut implements = Vec::new(); - if self.current.kind == TokenKind::Implements { - self.next(); + if state.current.kind == TokenKind::Implements { + state.next(); - while self.current.kind != TokenKind::LeftBrace { - self.optional_comma()?; + while state.current.kind != TokenKind::LeftBrace { + self.optional_comma(state)?; - implements.push(self.full_name()?.into()); + implements.push(self.full_name(state)?.into()); } } - self.lbrace()?; + self.lbrace(state)?; let mut body = Vec::new(); - while self.current.kind != TokenKind::RightBrace && !self.is_eof() { - body.push(self.anonymous_class_statement()?); + while state.current.kind != TokenKind::RightBrace && !state.is_eof() { + body.push(self.anonymous_class_statement(state)?); } - self.rbrace()?; + self.rbrace(state)?; Ok(Expression::New { target: Box::new(Expression::AnonymousClass { @@ -174,17 +181,17 @@ impl Parser { }) } - pub(in crate::parser) fn enum_definition(&mut self) -> ParseResult { - self.next(); + pub(in crate::parser) fn enum_definition(&self, state: &mut State) -> ParseResult { + state.next(); - let name = self.ident()?; + let name = self.ident(state)?; - let backed_type: Option = if self.current.kind == TokenKind::Colon { - self.colon()?; + let backed_type: Option = if state.current.kind == TokenKind::Colon { + self.colon(state)?; - match self.current.kind.clone() { + match state.current.kind.clone() { TokenKind::Identifier(s) if s == b"string" || s == b"int" => { - self.next(); + state.next(); Some(match &s[..] { b"string" => BackedEnumType::String, @@ -193,7 +200,7 @@ impl Parser { }) } _ => { - return expected_token_err!(["`string`", "`int`"], self); + return expected_token_err!(["`string`", "`int`"], state); } } } else { @@ -201,25 +208,25 @@ impl Parser { }; let mut implements = Vec::new(); - if self.current.kind == TokenKind::Implements { - self.next(); + if state.current.kind == TokenKind::Implements { + state.next(); - while self.current.kind != TokenKind::LeftBrace { - implements.push(self.full_name()?.into()); + while state.current.kind != TokenKind::LeftBrace { + implements.push(self.full_name(state)?.into()); - self.optional_comma()?; + self.optional_comma(state)?; } } - self.lbrace()?; + self.lbrace(state)?; let mut body = Block::new(); - while self.current.kind != TokenKind::RightBrace { - self.skip_comments(); - body.push(self.enum_statement(backed_type.is_some())?); + while state.current.kind != TokenKind::RightBrace { + state.skip_comments(); + body.push(self.enum_statement(state, backed_type.is_some())?); } - self.rbrace()?; + self.rbrace(state)?; match backed_type { Some(backed_type) => Ok(Statement::BackedEnum { @@ -237,17 +244,18 @@ impl Parser { } fn at_least_one_comma_separated( - &mut self, - func: &(dyn Fn(&mut Parser) -> ParseResult), + &self, + state: &mut State, + func: &(dyn Fn(&Parser, &mut State) -> ParseResult), ) -> ParseResult> { let mut result: Vec = vec![]; loop { - result.push(func(self)?); - if self.current.kind != TokenKind::Comma { + result.push(func(self, state)?); + if state.current.kind != TokenKind::Comma { break; } - self.next(); + state.next(); } Ok(result) diff --git a/src/parser/classish_statement.rs b/src/parser/classish_statement.rs index 0f382281..f8f3966c 100644 --- a/src/parser/classish_statement.rs +++ b/src/parser/classish_statement.rs @@ -7,6 +7,7 @@ use crate::parser::ast::TraitAdaptation; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; use crate::parser::precedence::Precedence; +use crate::parser::state::State; use crate::parser::Parser; use crate::expect_token; @@ -24,121 +25,136 @@ pub enum ClassishDefinitionType { impl Parser { pub(in crate::parser) fn class_statement( - &mut self, + &self, + state: &mut State, flags: Vec, ) -> ParseResult { - self.complete_class_statement(ClassishDefinitionType::Class(flags)) + self.complete_class_statement(state, ClassishDefinitionType::Class(flags)) } - pub(in crate::parser) fn interface_statement(&mut self) -> ParseResult { - if self.current.kind == TokenKind::Const { - return self.parse_classish_const(vec![]); + pub(in crate::parser) fn interface_statement( + &self, + state: &mut State, + ) -> ParseResult { + if state.current.kind == TokenKind::Const { + return self.parse_classish_const(state, vec![]); } - if self.current.kind == TokenKind::Function { - return self.method(ClassishDefinitionType::Interface, vec![]); + if state.current.kind == TokenKind::Function { + return self.method(state, ClassishDefinitionType::Interface, vec![]); } - let member_flags = self.interface_members_flags()?; + let member_flags = self.interface_members_flags(state)?; peek_token!([ - TokenKind::Const => self.parse_classish_const(member_flags), + TokenKind::Const => self.parse_classish_const(state, member_flags), TokenKind::Function => self.method( + state, ClassishDefinitionType::Interface, member_flags.iter().map(|t| t.clone().into()).collect(), ) - ], self, ["`const`", "`function`"]) + ], state, ["`const`", "`function`"]) } - pub(in crate::parser) fn trait_statement(&mut self) -> ParseResult { - self.complete_class_statement(ClassishDefinitionType::Trait) + pub(in crate::parser) fn trait_statement(&self, state: &mut State) -> ParseResult { + self.complete_class_statement(state, ClassishDefinitionType::Trait) } - pub(in crate::parser) fn anonymous_class_statement(&mut self) -> ParseResult { - self.complete_class_statement(ClassishDefinitionType::AnonymousClass) + pub(in crate::parser) fn anonymous_class_statement( + &self, + state: &mut State, + ) -> ParseResult { + self.complete_class_statement(state, ClassishDefinitionType::AnonymousClass) } - pub(in crate::parser) fn enum_statement(&mut self, backed: bool) -> ParseResult { - if self.current.kind == TokenKind::Case { - self.next(); + pub(in crate::parser) fn enum_statement( + &self, + state: &mut State, + backed: bool, + ) -> ParseResult { + if state.current.kind == TokenKind::Case { + state.next(); - let name = self.ident()?; + let name = self.ident(state)?; if backed { - expect_token!([TokenKind::Equals], self, "`=`"); + expect_token!([TokenKind::Equals], state, "`=`"); - let value = self.expression(Precedence::Lowest)?; - self.semi()?; + let value = self.expression(state, Precedence::Lowest)?; + self.semi(state)?; return Ok(Statement::BackedEnumCase { name: name.into(), value, }); } else { - self.semi()?; + self.semi(state)?; return Ok(Statement::UnitEnumCase { name: name.into() }); } } - if self.current.kind == TokenKind::Const { - return self.parse_classish_const(vec![]); + if state.current.kind == TokenKind::Const { + return self.parse_classish_const(state, vec![]); } - if self.current.kind == TokenKind::Function { - return self.method(ClassishDefinitionType::Enum, vec![]); + if state.current.kind == TokenKind::Function { + return self.method(state, ClassishDefinitionType::Enum, vec![]); } - let member_flags = self.enum_members_flags()?; + let member_flags = self.enum_members_flags(state)?; peek_token!([ - TokenKind::Const => self.parse_classish_const(member_flags), + TokenKind::Const => self.parse_classish_const(state, member_flags), TokenKind::Function => self.method( + state, ClassishDefinitionType::Enum, member_flags.iter().map(|t| t.clone().into()).collect(), ) - ], self, ["`const`", "`function`"]) + ], state, ["`const`", "`function`"]) } fn complete_class_statement( - &mut self, + &self, + state: &mut State, class_type: ClassishDefinitionType, ) -> ParseResult { - if self.current.kind == TokenKind::Use { - return self.parse_classish_uses(); + if state.current.kind == TokenKind::Use { + return self.parse_classish_uses(state); } - if self.current.kind == TokenKind::Var { - return self.parse_classish_var(); + if state.current.kind == TokenKind::Var { + return self.parse_classish_var(state); } - if self.current.kind == TokenKind::Const { - return self.parse_classish_const(vec![]); + if state.current.kind == TokenKind::Const { + return self.parse_classish_const(state, vec![]); } - if self.current.kind == TokenKind::Function { - return self.method(class_type, vec![]); + if state.current.kind == TokenKind::Function { + return self.method(state, class_type, vec![]); } - let member_flags = self.class_members_flags()?; + let member_flags = self.class_members_flags(state)?; - match &self.current.kind { - TokenKind::Const => self.parse_classish_const(member_flags), + match &state.current.kind { + TokenKind::Const => self.parse_classish_const(state, member_flags), TokenKind::Function => self.method( + state, class_type, member_flags.iter().map(|t| t.clone().into()).collect(), ), // TODO TokenKind::Variable(_) => { - let var = self.var()?; + let var = self.var(state)?; let mut value = None; - if self.current.kind == TokenKind::Equals { - self.next(); - value = Some(self.expression(Precedence::Lowest)?); + if state.current.kind == TokenKind::Equals { + state.next(); + value = Some(self.expression(state, Precedence::Lowest)?); } - self.semi()?; + self.semi(state)?; Ok(Statement::Property { var, @@ -153,19 +169,19 @@ impl Parser { | TokenKind::FullyQualifiedIdentifier(_) | TokenKind::Array | TokenKind::Null => { - let prop_type = self.type_string()?; - let var = self.var()?; + let prop_type = self.type_string(state)?; + let var = self.var(state)?; let mut value = None; - if self.current.kind == TokenKind::Equals { - self.next(); - value = Some(self.expression(Precedence::Lowest)?); + if state.current.kind == TokenKind::Equals { + state.next(); + value = Some(self.expression(state, Precedence::Lowest)?); } // TODO: Support comma-separated property declarations. // nikic/php-parser does this with a single Property statement // that is capable of holding multiple property declarations. - self.semi()?; + self.semi(state)?; Ok(Statement::Property { var, @@ -176,30 +192,30 @@ impl Parser { } _ => expected_token_err!( ["`const`", "`function`", "an identifier", "a varaible"], - self + state ), } } - fn parse_classish_var(&mut self) -> ParseResult { - self.next(); + fn parse_classish_var(&self, state: &mut State) -> ParseResult { + state.next(); let mut var_type = None; - if !matches!(self.current.kind, TokenKind::Variable(_)) || self.config.force_type_strings { - var_type = Some(self.type_string()?); + if !matches!(state.current.kind, TokenKind::Variable(_)) { + var_type = Some(self.type_string(state)?); } - let var = self.var()?; + let var = self.var(state)?; let mut value = None; - if self.current.kind == TokenKind::Equals { - self.next(); + if state.current.kind == TokenKind::Equals { + state.next(); - value = Some(self.expression(Precedence::Lowest)?); + value = Some(self.expression(state, Precedence::Lowest)?); } - self.semi()?; + self.semi(state)?; Ok(Statement::Var { var, @@ -208,51 +224,52 @@ impl Parser { }) } - fn parse_classish_uses(&mut self) -> ParseResult { - self.next(); + fn parse_classish_uses(&self, state: &mut State) -> ParseResult { + state.next(); let mut traits = Vec::new(); - while self.current.kind != TokenKind::SemiColon && self.current.kind != TokenKind::LeftBrace + while state.current.kind != TokenKind::SemiColon + && state.current.kind != TokenKind::LeftBrace { - self.optional_comma()?; + self.optional_comma(state)?; - let t = self.full_name()?; + let t = self.full_name(state)?; traits.push(t.into()); } let mut adaptations = Vec::new(); - if self.current.kind == TokenKind::LeftBrace { - self.lbrace()?; + if state.current.kind == TokenKind::LeftBrace { + self.lbrace(state)?; - while self.current.kind != TokenKind::RightBrace { - let (r#trait, method): (Option, Identifier) = match self.peek.kind { + while state.current.kind != TokenKind::RightBrace { + let (r#trait, method): (Option, Identifier) = match state.peek.kind { TokenKind::DoubleColon => { - let r#trait = self.full_name()?; - self.next(); - let method = self.ident()?; + let r#trait = self.full_name(state)?; + state.next(); + let method = self.ident(state)?; (Some(r#trait.into()), method.into()) } - _ => (None, self.ident()?.into()), + _ => (None, self.ident(state)?.into()), }; - match self.current.kind { + match state.current.kind { TokenKind::As => { - self.next(); + state.next(); - match self.current.kind { + match state.current.kind { TokenKind::Public | TokenKind::Protected | TokenKind::Private => { - let visibility: MethodFlag = self.current.kind.clone().into(); - self.next(); + let visibility: MethodFlag = state.current.kind.clone().into(); + state.next(); - if self.current.kind == TokenKind::SemiColon { + if state.current.kind == TokenKind::SemiColon { adaptations.push(TraitAdaptation::Visibility { r#trait, method, visibility, }); } else { - let alias: Identifier = self.name()?.into(); + let alias: Identifier = self.name(state)?.into(); adaptations.push(TraitAdaptation::Alias { r#trait, method, @@ -262,7 +279,7 @@ impl Parser { } } _ => { - let alias: Identifier = self.name()?.into(); + let alias: Identifier = self.name(state)?.into(); adaptations.push(TraitAdaptation::Alias { r#trait, method, @@ -273,13 +290,13 @@ impl Parser { } } TokenKind::Insteadof => { - self.next(); + state.next(); let mut insteadof = Vec::new(); - insteadof.push(self.full_name()?.into()); - while self.current.kind != TokenKind::SemiColon { - self.optional_comma()?; - insteadof.push(self.full_name()?.into()); + insteadof.push(self.full_name(state)?.into()); + while state.current.kind != TokenKind::SemiColon { + self.optional_comma(state)?; + insteadof.push(self.full_name(state)?.into()); } adaptations.push(TraitAdaptation::Precedence { @@ -290,18 +307,18 @@ impl Parser { } _ => { return Err(ParseError::UnexpectedToken( - self.current.kind.to_string(), - self.current.span, + state.current.kind.to_string(), + state.current.span, )) } }; - self.semi()?; + self.semi(state)?; } - self.rbrace()?; + self.rbrace(state)?; } else { - self.semi()?; + self.semi(state)?; } Ok(Statement::TraitUse { @@ -310,30 +327,34 @@ impl Parser { }) } - fn parse_classish_const(&mut self, const_flags: Vec) -> ParseResult { + fn parse_classish_const( + &self, + state: &mut State, + const_flags: Vec, + ) -> ParseResult { if const_flags.contains(&TokenKind::Static) { - return Err(ParseError::StaticModifierOnConstant(self.current.span)); + return Err(ParseError::StaticModifierOnConstant(state.current.span)); } if const_flags.contains(&TokenKind::Readonly) { - return Err(ParseError::ReadonlyModifierOnConstant(self.current.span)); + return Err(ParseError::ReadonlyModifierOnConstant(state.current.span)); } if const_flags.contains(&TokenKind::Final) && const_flags.contains(&TokenKind::Private) { return Err(ParseError::FinalModifierOnPrivateConstant( - self.current.span, + state.current.span, )); } - self.next(); + state.next(); - let name = self.ident()?; + let name = self.ident(state)?; - expect_token!([TokenKind::Equals], self, "`=`"); + expect_token!([TokenKind::Equals], state, "`=`"); - let value = self.expression(Precedence::Lowest)?; + let value = self.expression(state, Precedence::Lowest)?; - self.semi()?; + self.semi(state)?; Ok(Statement::ClassishConstant { name: name.into(), diff --git a/src/parser/comments.rs b/src/parser/comments.rs deleted file mode 100644 index 29cdc313..00000000 --- a/src/parser/comments.rs +++ /dev/null @@ -1,30 +0,0 @@ -use crate::lexer::token::Token; -use crate::lexer::token::TokenKind; -use crate::parser::Parser; - -impl Parser { - pub(in crate::parser) fn skip_comments(&mut self) { - while matches!( - self.current.kind, - TokenKind::Comment(_) | TokenKind::DocComment(_) - ) { - self.next(); - } - } - - pub(in crate::parser) fn gather_comments(&mut self) { - while matches!( - self.current.kind, - TokenKind::Comment(_) | TokenKind::DocComment(_) - ) { - self.comments.push(self.current.clone()); - self.next(); - } - } - - pub(in crate::parser) fn clear_comments(&mut self) -> Vec { - let c = self.comments.clone(); - self.comments = vec![]; - c - } -} diff --git a/src/parser/flags.rs b/src/parser/flags.rs index 847827ee..df1c3cd3 100644 --- a/src/parser/flags.rs +++ b/src/parser/flags.rs @@ -1,6 +1,7 @@ use crate::lexer::token::TokenKind; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; +use crate::parser::state::State; use crate::parser::Parser; #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -13,22 +14,31 @@ enum FlagTarget { } impl Parser { - pub(in crate::parser) fn class_flags(&mut self) -> ParseResult> { + pub(in crate::parser) fn class_flags(&self, state: &mut State) -> ParseResult> { self.collect( + state, vec![TokenKind::Final, TokenKind::Abstract, TokenKind::Readonly], FlagTarget::Class, ) } - pub(in crate::parser) fn interface_members_flags(&mut self) -> ParseResult> { + pub(in crate::parser) fn interface_members_flags( + &self, + state: &mut State, + ) -> ParseResult> { self.collect( + state, vec![TokenKind::Public, TokenKind::Static], FlagTarget::InterfaceMember, ) } - pub(in crate::parser) fn class_members_flags(&mut self) -> ParseResult> { + pub(in crate::parser) fn class_members_flags( + &self, + state: &mut State, + ) -> ParseResult> { self.collect( + state, vec![ TokenKind::Final, TokenKind::Abstract, @@ -42,8 +52,12 @@ impl Parser { ) } - pub(in crate::parser) fn enum_members_flags(&mut self) -> ParseResult> { + pub(in crate::parser) fn enum_members_flags( + &self, + state: &mut State, + ) -> ParseResult> { self.collect( + state, vec![ TokenKind::Final, TokenKind::Private, @@ -55,8 +69,12 @@ impl Parser { ) } - pub(in crate::parser) fn promoted_property_flags(&mut self) -> ParseResult> { + pub(in crate::parser) fn promoted_property_flags( + &self, + state: &mut State, + ) -> ParseResult> { self.collect( + state, vec![ TokenKind::Private, TokenKind::Protected, @@ -68,63 +86,64 @@ impl Parser { } fn collect( - &mut self, + &self, + state: &mut State, flags: Vec, target: FlagTarget, ) -> ParseResult> { let mut collected: Vec = vec![]; loop { - if flags.contains(&self.current.kind) { - if collected.contains(&self.current.kind) { + if flags.contains(&state.current.kind) { + if collected.contains(&state.current.kind) { return Err(ParseError::MultipleModifiers( - self.current.kind.to_string(), - self.current.span, + state.current.kind.to_string(), + state.current.span, )); } - match self.current.kind { + match state.current.kind { TokenKind::Private if collected.contains(&TokenKind::Protected) || collected.contains(&TokenKind::Public) => { - return Err(ParseError::MultipleAccessModifiers(self.current.span)); + return Err(ParseError::MultipleAccessModifiers(state.current.span)); } TokenKind::Protected if collected.contains(&TokenKind::Private) || collected.contains(&TokenKind::Public) => { - return Err(ParseError::MultipleAccessModifiers(self.current.span)); + return Err(ParseError::MultipleAccessModifiers(state.current.span)); } TokenKind::Public if collected.contains(&TokenKind::Private) || collected.contains(&TokenKind::Protected) => { - return Err(ParseError::MultipleAccessModifiers(self.current.span)); + return Err(ParseError::MultipleAccessModifiers(state.current.span)); } _ => {} }; if matches!(target, FlagTarget::ClassMember | FlagTarget::Class) { - match self.current.kind { + match state.current.kind { TokenKind::Final if collected.contains(&TokenKind::Abstract) => { if target == FlagTarget::Class { return Err(ParseError::FinalModifierOnAbstractClass( - self.current.span, + state.current.span, )); } else { return Err(ParseError::FinalModifierOnAbstractClassMember( - self.current.span, + state.current.span, )); } } TokenKind::Abstract if collected.contains(&TokenKind::Final) => { if target == FlagTarget::Class { return Err(ParseError::FinalModifierOnAbstractClass( - self.current.span, + state.current.span, )); } else { return Err(ParseError::FinalModifierOnAbstractClassMember( - self.current.span, + state.current.span, )); } } @@ -132,8 +151,8 @@ impl Parser { }; } - collected.push(self.current.kind.clone()); - self.next(); + collected.push(state.current.kind.clone()); + state.next(); } else { break; } diff --git a/src/parser/functions.rs b/src/parser/functions.rs index eac96e29..a2093fda 100644 --- a/src/parser/functions.rs +++ b/src/parser/functions.rs @@ -7,40 +7,41 @@ use crate::parser::classish_statement::ClassishDefinitionType; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; use crate::parser::params::ParamPosition; +use crate::parser::state::State; use crate::parser::Parser; impl Parser { - pub(in crate::parser) fn function(&mut self) -> ParseResult { - self.next(); + pub(in crate::parser) fn function(&self, state: &mut State) -> ParseResult { + state.next(); - let by_ref = if self.current.kind == TokenKind::Ampersand { - self.next(); + let by_ref = if state.current.kind == TokenKind::Ampersand { + state.next(); true } else { false }; - let name = self.ident()?; + let name = self.ident(state)?; - self.lparen()?; + self.lparen(state)?; - let params = self.param_list(ParamPosition::Function)?; + let params = self.param_list(state, ParamPosition::Function)?; - self.rparen()?; + self.rparen(state)?; let mut return_type = None; - if self.current.kind == TokenKind::Colon || self.config.force_type_strings { - self.colon()?; + if state.current.kind == TokenKind::Colon { + self.colon(state)?; - return_type = Some(self.type_string()?); + return_type = Some(self.type_string(state)?); } - self.lbrace()?; + self.lbrace(state)?; - let body = self.block(&TokenKind::RightBrace)?; + let body = self.block(state, &TokenKind::RightBrace)?; - self.rbrace()?; + self.rbrace(state)?; Ok(Statement::Function { name: name.into(), @@ -52,7 +53,8 @@ impl Parser { } pub(in crate::parser) fn method( - &mut self, + &self, + state: &mut State, class_type: ClassishDefinitionType, flags: Vec, ) -> ParseResult { @@ -62,13 +64,13 @@ impl Parser { if !cf.contains(&ClassFlag::Abstract) && flags.contains(&MethodFlag::Abstract) => { return Err(ParseError::AbstractModifierOnNonAbstractClassMethod( - self.current.span, + state.current.span, )); } _ => (), } - self.next(); + state.next(); let has_body = match &class_type { ClassishDefinitionType::Class(_) | ClassishDefinitionType::Trait => { @@ -78,33 +80,33 @@ impl Parser { ClassishDefinitionType::Enum | ClassishDefinitionType::AnonymousClass => true, }; - let by_ref = if self.current.kind == TokenKind::Ampersand { - self.next(); + let by_ref = if state.current.kind == TokenKind::Ampersand { + state.next(); true } else { false }; - let name = self.ident_maybe_reserved()?; + let name = self.ident_maybe_reserved(state)?; - self.lparen()?; + self.lparen(state)?; let position = position_from_flags_and_name(class_type, flags.clone(), name.clone()); - let params = self.param_list(position)?; + let params = self.param_list(state, position)?; - self.rparen()?; + self.rparen(state)?; let mut return_type = None; - if self.current.kind == TokenKind::Colon || self.config.force_type_strings { - self.colon()?; + if state.current.kind == TokenKind::Colon { + self.colon(state)?; - return_type = Some(self.type_string()?); + return_type = Some(self.type_string(state)?); } if !has_body { - self.semi()?; + self.semi(state)?; Ok(Statement::AbstractMethod { name: name.into(), @@ -114,11 +116,11 @@ impl Parser { by_ref, }) } else { - self.lbrace()?; + self.lbrace(state)?; - let body = self.block(&TokenKind::RightBrace)?; + let body = self.block(state, &TokenKind::RightBrace)?; - self.rbrace()?; + self.rbrace(state)?; Ok(Statement::Method { name: name.into(), diff --git a/src/parser/ident.rs b/src/parser/ident.rs index 21aa8f50..3000caf1 100644 --- a/src/parser/ident.rs +++ b/src/parser/ident.rs @@ -1,71 +1,78 @@ use crate::lexer::byte_string::ByteString; use crate::lexer::token::TokenKind; use crate::parser::error::ParseResult; +use crate::parser::state::State; use crate::parser::Parser; use crate::expect_token; impl Parser { /// Expect an unqualified identifier such as Foo or Bar. - pub(in crate::parser) fn ident(&mut self) -> ParseResult { + pub(in crate::parser) fn ident(&self, state: &mut State) -> ParseResult { Ok(expect_token!([ TokenKind::Identifier(identifier) => identifier, - ], self, "an identifier")) + ], state, "an identifier")) } /// Expect an unqualified or qualified identifier such as Foo, Bar or Foo\Bar. - pub(in crate::parser) fn name(&mut self) -> ParseResult { + pub(in crate::parser) fn name(&self, state: &mut State) -> ParseResult { Ok(expect_token!([ TokenKind::Identifier(identifier) => identifier, TokenKind::QualifiedIdentifier(qualified) => qualified, - ], self, "an identifier")) + ], state, "an identifier")) } /// Expect an unqualified, qualified or fully qualified identifier such as Foo, Foo\Bar or \Foo\Bar. - pub(in crate::parser) fn full_name(&mut self) -> ParseResult { + pub(in crate::parser) fn full_name(&self, state: &mut State) -> ParseResult { Ok(expect_token!([ TokenKind::Identifier(identifier) => identifier, TokenKind::QualifiedIdentifier(qualified) => qualified, TokenKind::FullyQualifiedIdentifier(fully_qualified) => fully_qualified, - ], self, "an identifier")) + ], state, "an identifier")) } - pub(in crate::parser) fn var(&mut self) -> ParseResult { + pub(in crate::parser) fn var(&self, state: &mut State) -> ParseResult { Ok(expect_token!([ TokenKind::Variable(v) => v, - ], self, "a variable")) + ], state, "a variable")) } - pub(in crate::parser) fn full_name_maybe_type_keyword(&mut self) -> ParseResult { - match self.current.kind { + pub(in crate::parser) fn full_name_maybe_type_keyword( + &self, + state: &mut State, + ) -> ParseResult { + match state.current.kind { TokenKind::Array | TokenKind::Callable => { - let r = Ok(self.current.kind.to_string().into()); - self.next(); + let r = Ok(state.current.kind.to_string().into()); + state.next(); r } - _ => self.full_name(), + _ => self.full_name(state), } } - pub(in crate::parser) fn type_with_static(&mut self) -> ParseResult { - Ok(match self.current.kind { + pub(in crate::parser) fn type_with_static(&self, state: &mut State) -> ParseResult { + Ok(match state.current.kind { TokenKind::Static | TokenKind::Null | TokenKind::True | TokenKind::False => { - let str = self.current.kind.to_string(); - self.next(); + let str = state.current.kind.to_string(); + state.next(); str.into() } - _ => self.full_name_maybe_type_keyword()?, + _ => self.full_name_maybe_type_keyword(state)?, }) } - pub(in crate::parser) fn ident_maybe_reserved(&mut self) -> ParseResult { - match self.current.kind { - _ if is_reserved_ident(&self.current.kind) => { - let string = self.current.kind.to_string().into(); - self.next(); + pub(in crate::parser) fn ident_maybe_reserved( + &self, + state: &mut State, + ) -> ParseResult { + match state.current.kind { + _ if is_reserved_ident(&state.current.kind) => { + let string = state.current.kind.to_string().into(); + state.next(); Ok(string) } - _ => self.ident(), + _ => self.ident(state), } } } diff --git a/src/parser/macros.rs b/src/parser/macros.rs index 1bc4f279..54bb0779 100644 --- a/src/parser/macros.rs +++ b/src/parser/macros.rs @@ -1,68 +1,68 @@ #[macro_export] macro_rules! peek_token { - ([ $($expected:pat => $out:expr),+ $(,)? ], $parser:expr, [ $($message:literal),+ $(,)? ]) => {{ - $parser.skip_comments(); - match $parser.current.kind.clone() { + ([ $($expected:pat => $out:expr),+ $(,)? ], $state:expr, [ $($message:literal),+ $(,)? ]) => {{ + $state.skip_comments(); + match $state.current.kind.clone() { $( $expected => $out, )+ _ => { - return $crate::expected_token_err!([ $($message,)+ ], $parser); + return $crate::expected_token_err!([ $($message,)+ ], $state); } } }}; - ([ $($expected:pat),+ $(,)? ], $parser:expr, [ $($message:literal),+ $(,)? ]) => {{ - $parser.skip_comments(); - if !matches!($parser.current.kind, $(| $expected )+) { - return $crate::expected_token_err!([ $($message,)+ ], $parser); + ([ $($expected:pat),+ $(,)? ], $state:expr, [ $($message:literal),+ $(,)? ]) => {{ + $state.skip_comments(); + if !matches!($state.current.kind, $(| $expected )+) { + return $crate::expected_token_err!([ $($message,)+ ], $state); } }}; - ([ $($expected:pat => $out:expr),+ $(,)? ], $parser:expr, $message:literal) => { - $crate::peek_token!([ $($expected => $out,)+ ], $parser, [$message]) + ([ $($expected:pat => $out:expr),+ $(,)? ], $state:expr, $message:literal) => { + $crate::peek_token!([ $($expected => $out,)+ ], $state, [$message]) }; - ([ $($expected:pat),+ $(,)? ], $parser:expr, $message:literal) => { - $crate::peek_token!([ $($expected,)+ ], $parser, [$message]) + ([ $($expected:pat),+ $(,)? ], $state:expr, $message:literal) => { + $crate::peek_token!([ $($expected,)+ ], $state, [$message]) }; } #[macro_export] macro_rules! expect_token { - ([ $($expected:pat => $out:expr),+ $(,)? ], $parser:expr, [ $($message:literal),+ $(,)? ]) => { - $crate::peek_token!([ $($expected => { $parser.next(); $out },)+ ], $parser, [$($message,)+]) + ([ $($expected:pat => $out:expr),+ $(,)? ], $state:expr, [ $($message:literal),+ $(,)? ]) => { + $crate::peek_token!([ $($expected => { $state.next(); $out },)+ ], $state, [$($message,)+]) }; - ([ $($expected:pat),+ $(,)? ], $parser:expr, [ $($message:literal),+ $(,)? ]) => { - $crate::peek_token!([ $($expected => { $parser.next(); },)+ ], $parser, [$($message,)+]) + ([ $($expected:pat),+ $(,)? ], $state:expr, [ $($message:literal),+ $(,)? ]) => { + $crate::peek_token!([ $($expected => { $state.next(); },)+ ], $state, [$($message,)+]) }; - ([ $($expected:pat => $out:expr),+ $(,)? ], $parser:expr, $message:literal) => { - $crate::peek_token!([ $($expected => { $parser.next(); $out },)+ ], $parser, [$message]) + ([ $($expected:pat => $out:expr),+ $(,)? ], $state:expr, $message:literal) => { + $crate::peek_token!([ $($expected => { $state.next(); $out },)+ ], $state, [$message]) }; - ([ $($expected:pat),+ $(,)? ], $parser:expr, $message:literal) => { - $crate::peek_token!([ $($expected => { $parser.next(); },)+ ], $parser, [$message]) + ([ $($expected:pat),+ $(,)? ], $state:expr, $message:literal) => { + $crate::peek_token!([ $($expected => { $state.next(); },)+ ], $state, [$message]) }; } #[macro_export] macro_rules! expect_literal { - ($parser:expr) => {{ - $parser.skip_comments(); - match $parser.current.kind.clone() { + ($state:expr) => {{ + $state.skip_comments(); + match $state.current.kind.clone() { TokenKind::LiteralInteger(i) => { let e = Expression::LiteralInteger { i }; - $parser.next(); + $state.next(); e } TokenKind::LiteralFloat(f) => { let e = Expression::LiteralFloat { f }; - $parser.next(); + $state.next(); e } TokenKind::LiteralString(s) => { let e = Expression::LiteralString { value: s.clone() }; - $parser.next(); + $state.next(); e } _ => { - return $crate::expected_token_err!(["a literal"], $parser); + return $crate::expected_token_err!(["a literal"], $state); } } }}; @@ -70,26 +70,26 @@ macro_rules! expect_literal { #[macro_export] macro_rules! expected_token_err { - ([ $($expected:literal),+ $(,)? ], $parser:expr $(,)?) => {{ - match &$parser.current.kind { + ([ $($expected:literal),+ $(,)? ], $state:expr $(,)?) => {{ + match &$state.current.kind { TokenKind::Eof => { Err($crate::parser::error::ParseError::ExpectedToken( vec![$($expected.into()),+], None, - $parser.current.span, + $state.current.span, )) }, _ => { Err($crate::parser::error::ParseError::ExpectedToken( vec![$($expected.into()),+], - Some($parser.current.kind.to_string()), - $parser.current.span, + Some($state.current.kind.to_string()), + $state.current.span, )) } } }}; - ($expected:literal, $parser:expr $(,)?) => { - $crate::expected_token_err!([$expected], $parser) + ($expected:literal, $state:expr $(,)?) => { + $crate::expected_token_err!([$expected], $state) }; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 54b4d660..66884d50 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,5 +1,3 @@ -use std::vec::IntoIter; - use crate::expect_literal; use crate::expect_token; use crate::expected_token_err; @@ -16,6 +14,7 @@ use crate::parser::error::ParseResult; use crate::parser::ident::is_reserved_ident; use crate::parser::params::ParamPosition; use crate::parser::precedence::{Associativity, Precedence}; +use crate::parser::state::State; pub mod ast; pub mod error; @@ -23,7 +22,6 @@ pub mod error; mod block; mod classish; mod classish_statement; -mod comments; mod flags; mod functions; mod ident; @@ -31,88 +29,62 @@ mod macros; mod params; mod precedence; mod punc; +mod state; mod vars; -pub struct ParserConfig { - force_type_strings: bool, -} +#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)] +pub struct Parser; -#[allow(clippy::derivable_impls)] -impl Default for ParserConfig { - fn default() -> Self { - Self { - force_type_strings: false, - } - } -} - -pub struct Parser { - config: ParserConfig, - pub current: Token, - pub peek: Token, - iter: IntoIter, - comments: Vec, -} - -#[allow(dead_code)] impl Parser { - pub fn new(config: Option) -> Self { - Self { - config: config.unwrap_or_default(), - current: Token::default(), - peek: Token::default(), - iter: vec![].into_iter(), - comments: vec![], - } + pub const fn new() -> Self { + Self {} } - pub fn parse(&mut self, tokens: Vec) -> ParseResult { - self.iter = tokens.into_iter(); - self.next(); - self.next(); + pub fn parse(&self, tokens: Vec) -> ParseResult { + let mut state = State::new(tokens); let mut ast = Program::new(); - while self.current.kind != TokenKind::Eof { + while state.current.kind != TokenKind::Eof { if matches!( - self.current.kind, + state.current.kind, TokenKind::OpenTag(_) | TokenKind::CloseTag ) { - self.next(); + state.next(); continue; } - self.gather_comments(); + state.gather_comments(); - if self.is_eof() { + if state.is_eof() { break; } - ast.push(self.top_level_statement()?); + ast.push(self.top_level_statement(&mut state)?); - self.clear_comments(); + state.clear_comments(); } Ok(ast.to_vec()) } - fn try_block_caught_type_string(&mut self) -> ParseResult { - let id = self.full_name()?; + fn try_block_caught_type_string(&self, state: &mut State) -> ParseResult { + let id = self.full_name(state)?; - if self.current.kind == TokenKind::Pipe { - self.next(); + if state.current.kind == TokenKind::Pipe { + state.next(); let mut types = vec![id.into()]; - while !self.is_eof() { - let id = self.full_name()?; + while !state.is_eof() { + let id = self.full_name(state)?; types.push(id.into()); - if self.current.kind != TokenKind::Pipe { + if state.current.kind != TokenKind::Pipe { break; } - self.next(); + state.next(); } return Ok(TryBlockCaughtType::Union(types)); @@ -121,81 +93,81 @@ impl Parser { Ok(TryBlockCaughtType::Identifier(id.into())) } - fn type_string(&mut self) -> ParseResult { - if self.current.kind == TokenKind::Question { - self.next(); - let t = self.type_with_static()?; + fn type_string(&self, state: &mut State) -> ParseResult { + if state.current.kind == TokenKind::Question { + state.next(); + let t = self.type_with_static(state)?; return Ok(Type::Nullable(Box::new(parse_simple_type(t)))); } - let id = self.type_with_static()?; + let id = self.type_with_static(state)?; - if self.current.kind == TokenKind::Pipe { - self.next(); + if state.current.kind == TokenKind::Pipe { + state.next(); let r#type = parse_simple_type(id); if r#type.standalone() { return Err(ParseError::StandaloneTypeUsedInCombination( r#type, - self.current.span, + state.current.span, )); } let mut types = vec![r#type]; - while !self.is_eof() { - let id = self.type_with_static()?; + while !state.is_eof() { + let id = self.type_with_static(state)?; let r#type = parse_simple_type(id); if r#type.standalone() { return Err(ParseError::StandaloneTypeUsedInCombination( r#type, - self.current.span, + state.current.span, )); } types.push(r#type); - if self.current.kind != TokenKind::Pipe { + if state.current.kind != TokenKind::Pipe { break; } else { - self.next(); + state.next(); } } return Ok(Type::Union(types)); } - if self.current.kind == TokenKind::Ampersand - && !matches!(self.peek.kind, TokenKind::Variable(_)) + if state.current.kind == TokenKind::Ampersand + && !matches!(state.peek.kind, TokenKind::Variable(_)) { - self.next(); + state.next(); let r#type = parse_simple_type(id); if r#type.standalone() { return Err(ParseError::StandaloneTypeUsedInCombination( r#type, - self.current.span, + state.current.span, )); } let mut types = vec![r#type]; - while !self.is_eof() { - let id = self.type_with_static()?; + while !state.is_eof() { + let id = self.type_with_static(state)?; let r#type = parse_simple_type(id); if r#type.standalone() { return Err(ParseError::StandaloneTypeUsedInCombination( r#type, - self.current.span, + state.current.span, )); } types.push(r#type); - if self.current.kind != TokenKind::Ampersand { + if state.current.kind != TokenKind::Ampersand { break; } else { - self.next(); + state.next(); } } @@ -205,75 +177,75 @@ impl Parser { Ok(parse_simple_type(id)) } - fn top_level_statement(&mut self) -> ParseResult { - self.skip_comments(); + fn top_level_statement(&self, state: &mut State) -> ParseResult { + state.skip_comments(); - let statement = match &self.current.kind { + let statement = match &state.current.kind { TokenKind::Namespace => { - self.next(); + state.next(); let mut braced = false; - let name = if self.current.kind == TokenKind::LeftBrace { + let name = if state.current.kind == TokenKind::LeftBrace { braced = true; - self.lbrace()?; + self.lbrace(state)?; None } else { - Some(self.name()?) + Some(self.name(state)?) }; if name.is_some() { - if self.current.kind == TokenKind::LeftBrace { + if state.current.kind == TokenKind::LeftBrace { braced = true; - self.next(); + state.next(); } else { - self.semi()?; + self.semi(state)?; } } let body = if braced { - self.block(&TokenKind::RightBrace)? + self.block(state, &TokenKind::RightBrace)? } else { let mut body = Block::new(); - while !self.is_eof() { - body.push(self.top_level_statement()?); + while !state.is_eof() { + body.push(self.top_level_statement(state)?); } body }; if braced { - self.rbrace()?; + self.rbrace(state)?; } Statement::Namespace { name, body } } TokenKind::Use => { - self.next(); + state.next(); - let kind = match self.current.kind { + let kind = match state.current.kind { TokenKind::Function => { - self.next(); + state.next(); UseKind::Function } TokenKind::Const => { - self.next(); + state.next(); UseKind::Const } _ => UseKind::Normal, }; - if self.peek.kind == TokenKind::LeftBrace { - let prefix = self.full_name()?; - self.next(); + if state.peek.kind == TokenKind::LeftBrace { + let prefix = self.full_name(state)?; + state.next(); let mut uses = Vec::new(); - while self.current.kind != TokenKind::RightBrace { - let name = self.full_name()?; + while state.current.kind != TokenKind::RightBrace { + let name = self.full_name(state)?; let mut alias = None; - if self.current.kind == TokenKind::As { - self.next(); - alias = Some(self.ident()?.into()); + if state.current.kind == TokenKind::As { + state.next(); + alias = Some(self.ident(state)?.into()); } uses.push(Use { @@ -281,14 +253,14 @@ impl Parser { alias, }); - if self.current.kind == TokenKind::Comma { - self.next(); + if state.current.kind == TokenKind::Comma { + state.next(); continue; } } - self.rbrace()?; - self.semi()?; + self.rbrace(state)?; + self.semi(state)?; Statement::GroupUse { prefix: prefix.into(), @@ -297,13 +269,13 @@ impl Parser { } } else { let mut uses = Vec::new(); - while !self.is_eof() { - let name = self.full_name()?; + while !state.is_eof() { + let name = self.full_name(state)?; let mut alias = None; - if self.current.kind == TokenKind::As { - self.next(); - alias = Some(self.ident()?.into()); + if state.current.kind == TokenKind::As { + state.next(); + alias = Some(self.ident(state)?.into()); } uses.push(Use { @@ -311,12 +283,12 @@ impl Parser { alias, }); - if self.current.kind == TokenKind::Comma { - self.next(); + if state.current.kind == TokenKind::Comma { + state.next(); continue; } - self.semi()?; + self.semi(state)?; break; } @@ -324,34 +296,34 @@ impl Parser { } } TokenKind::Const => { - self.next(); + state.next(); let mut constants = vec![]; - while self.current.kind != TokenKind::SemiColon { - let name = self.ident()?; + while state.current.kind != TokenKind::SemiColon { + let name = self.ident(state)?; - expect_token!([TokenKind::Equals], self, "`=`"); + expect_token!([TokenKind::Equals], state, "`=`"); - let value = self.expression(Precedence::Lowest)?; + let value = self.expression(state, Precedence::Lowest)?; constants.push(Constant { name: name.into(), value, }); - self.optional_comma()?; + self.optional_comma(state)?; } - self.semi()?; + self.semi(state)?; Statement::Constant { constants } } TokenKind::HaltCompiler => { - self.next(); + state.next(); - let content = if let TokenKind::InlineHtml(content) = self.current.kind.clone() { - self.next(); + let content = if let TokenKind::InlineHtml(content) = state.current.kind.clone() { + state.next(); Some(content) } else { None @@ -359,47 +331,47 @@ impl Parser { Statement::HaltCompiler { content } } - _ => self.statement()?, + _ => self.statement(state)?, }; - self.clear_comments(); + state.clear_comments(); Ok(statement) } - fn statement(&mut self) -> ParseResult { - self.skip_comments(); + fn statement(&self, state: &mut State) -> ParseResult { + state.skip_comments(); - let statement = match &self.current.kind { + let statement = match &state.current.kind { TokenKind::Goto => { - self.next(); + state.next(); - let label = self.ident()?.into(); + let label = self.ident(state)?.into(); - self.semi()?; + self.semi(state)?; Statement::Goto { label } } - TokenKind::Identifier(_) if self.peek.kind == TokenKind::Colon => { - let label = self.ident()?.into(); + TokenKind::Identifier(_) if state.peek.kind == TokenKind::Colon => { + let label = self.ident(state)?.into(); - self.colon()?; + self.colon(state)?; Statement::Label { label } } TokenKind::Declare => { - self.next(); - self.lparen()?; + state.next(); + self.lparen(state)?; let mut declares = Vec::new(); - while self.current.kind != TokenKind::RightParen { - let key = self.ident()?; + while state.current.kind != TokenKind::RightParen { + let key = self.ident(state)?; - expect_token!([TokenKind::Equals], self, "`=`"); + expect_token!([TokenKind::Equals], state, "`=`"); - let value = expect_literal!(self); + let value = expect_literal!(state); - self.optional_comma()?; + self.optional_comma(state)?; declares.push(DeclareItem { key: key.into(), @@ -407,113 +379,115 @@ impl Parser { }); } - self.rparen()?; + self.rparen(state)?; - let body = if self.current.kind == TokenKind::LeftBrace { - self.next(); - let b = self.block(&TokenKind::RightBrace)?; - self.rbrace()?; + let body = if state.current.kind == TokenKind::LeftBrace { + state.next(); + let b = self.block(state, &TokenKind::RightBrace)?; + self.rbrace(state)?; b - } else if self.current.kind == TokenKind::Colon { - self.colon()?; - let b = self.block(&TokenKind::EndDeclare)?; - expect_token!([TokenKind::EndDeclare], self, "`enddeclare`"); - self.semi()?; + } else if state.current.kind == TokenKind::Colon { + self.colon(state)?; + let b = self.block(state, &TokenKind::EndDeclare)?; + expect_token!([TokenKind::EndDeclare], state, "`enddeclare`"); + self.semi(state)?; b } else { - self.semi()?; + self.semi(state)?; vec![] }; Statement::Declare { declares, body } } TokenKind::Global => { - self.next(); + state.next(); let mut vars = vec![]; - while self.current.kind != TokenKind::SemiColon { - vars.push(self.var()?.into()); + while state.current.kind != TokenKind::SemiColon { + vars.push(self.var(state)?.into()); - self.optional_comma()?; + self.optional_comma(state)?; } - self.semi()?; + self.semi(state)?; Statement::Global { vars } } - TokenKind::Static if matches!(self.peek.kind, TokenKind::Variable(_)) => { - self.next(); + TokenKind::Static if matches!(state.peek.kind, TokenKind::Variable(_)) => { + state.next(); let mut vars = vec![]; - while self.current.kind != TokenKind::SemiColon { - let var = Expression::Variable { name: self.var()? }; + while state.current.kind != TokenKind::SemiColon { + let var = Expression::Variable { + name: self.var(state)?, + }; let mut default = None; - if self.current.kind == TokenKind::Equals { - expect_token!([TokenKind::Equals], self, "`=`"); - default = Some(self.expression(Precedence::Lowest)?); + if state.current.kind == TokenKind::Equals { + expect_token!([TokenKind::Equals], state, "`=`"); + default = Some(self.expression(state, Precedence::Lowest)?); } - self.optional_comma()?; + self.optional_comma(state)?; vars.push(StaticVar { var, default }) } - self.semi()?; + self.semi(state)?; Statement::Static { vars } } TokenKind::InlineHtml(html) => { let s = Statement::InlineHtml(html.clone()); - self.next(); + state.next(); s } TokenKind::Comment(comment) => { let s = Statement::Comment { comment: comment.clone(), }; - self.next(); + state.next(); s } TokenKind::Do => { - self.next(); + state.next(); - self.lbrace()?; - let body = self.block(&TokenKind::RightBrace)?; - self.rbrace()?; + self.lbrace(state)?; + let body = self.block(state, &TokenKind::RightBrace)?; + self.rbrace(state)?; - expect_token!([TokenKind::While], self, "`while`"); + expect_token!([TokenKind::While], state, "`while`"); - self.lparen()?; - let condition = self.expression(Precedence::Lowest)?; - self.rparen()?; - self.semi()?; + self.lparen(state)?; + let condition = self.expression(state, Precedence::Lowest)?; + self.rparen(state)?; + self.semi(state)?; Statement::DoWhile { condition, body } } TokenKind::While => { - self.next(); - self.lparen()?; + state.next(); + self.lparen(state)?; - let condition = self.expression(Precedence::Lowest)?; + let condition = self.expression(state, Precedence::Lowest)?; - self.rparen()?; + self.rparen(state)?; - let end_token = if self.current.kind == TokenKind::Colon { - self.colon()?; + let end_token = if state.current.kind == TokenKind::Colon { + self.colon(state)?; TokenKind::EndWhile } else { - self.lbrace()?; + self.lbrace(state)?; TokenKind::RightBrace }; - let body = self.block(&end_token)?; + let body = self.block(state, &end_token)?; if end_token == TokenKind::RightBrace { - self.rbrace()?; + self.rbrace(state)?; } else { - expect_token!([TokenKind::EndWhile], self, "`endwhile`"); - self.semi()?; + expect_token!([TokenKind::EndWhile], state, "`endwhile`"); + self.semi(state)?; } Statement::While { condition, body } @@ -522,54 +496,54 @@ impl Parser { | TokenKind::IncludeOnce | TokenKind::Require | TokenKind::RequireOnce => { - let kind: IncludeKind = (&self.current.kind).into(); - self.next(); + let kind: IncludeKind = (&state.current.kind).into(); + state.next(); - let path = self.expression(Precedence::Lowest)?; + let path = self.expression(state, Precedence::Lowest)?; - self.semi()?; + self.semi(state)?; Statement::Include { kind, path } } TokenKind::For => { - self.next(); + state.next(); - self.lparen()?; + self.lparen(state)?; let mut init = None; - if self.current.kind != TokenKind::SemiColon { - init = Some(self.expression(Precedence::Lowest)?); + if state.current.kind != TokenKind::SemiColon { + init = Some(self.expression(state, Precedence::Lowest)?); } - self.semi()?; + self.semi(state)?; let mut condition = None; - if self.current.kind != TokenKind::SemiColon { - condition = Some(self.expression(Precedence::Lowest)?); + if state.current.kind != TokenKind::SemiColon { + condition = Some(self.expression(state, Precedence::Lowest)?); } - self.semi()?; + self.semi(state)?; let mut r#loop = None; - if self.current.kind != TokenKind::RightParen { - r#loop = Some(self.expression(Precedence::Lowest)?); + if state.current.kind != TokenKind::RightParen { + r#loop = Some(self.expression(state, Precedence::Lowest)?); } - self.rparen()?; + self.rparen(state)?; - let end_token = if self.current.kind == TokenKind::Colon { - self.colon()?; + let end_token = if state.current.kind == TokenKind::Colon { + self.colon(state)?; TokenKind::EndFor } else { - self.lbrace()?; + self.lbrace(state)?; TokenKind::RightBrace }; - let then = self.block(&end_token)?; + let then = self.block(state, &end_token)?; if end_token == TokenKind::EndFor { - expect_token!([TokenKind::EndFor], self, "`endfor`"); - self.semi()?; + expect_token!([TokenKind::EndFor], state, "`endfor`"); + self.semi(state)?; } else { - self.rbrace()?; + self.rbrace(state)?; }; Statement::For { @@ -580,52 +554,52 @@ impl Parser { } } TokenKind::Foreach => { - self.next(); + state.next(); - self.lparen()?; + self.lparen(state)?; - let expr = self.expression(Precedence::Lowest)?; + let expr = self.expression(state, Precedence::Lowest)?; - expect_token!([TokenKind::As], self, ["`as`"]); + expect_token!([TokenKind::As], state, ["`as`"]); - let mut by_ref = self.current.kind == TokenKind::Ampersand; + let mut by_ref = state.current.kind == TokenKind::Ampersand; if by_ref { - self.next(); + state.next(); } let mut key_var = None; - let mut value_var = self.expression(Precedence::Lowest)?; + let mut value_var = self.expression(state, Precedence::Lowest)?; - if self.current.kind == TokenKind::DoubleArrow { - self.next(); + if state.current.kind == TokenKind::DoubleArrow { + state.next(); key_var = Some(value_var.clone()); - by_ref = self.current.kind == TokenKind::Ampersand; + by_ref = state.current.kind == TokenKind::Ampersand; if by_ref { - self.next(); + state.next(); } - value_var = self.expression(Precedence::Lowest)?; + value_var = self.expression(state, Precedence::Lowest)?; } - self.rparen()?; + self.rparen(state)?; - let end_token = if self.current.kind == TokenKind::Colon { - self.colon()?; + let end_token = if state.current.kind == TokenKind::Colon { + self.colon(state)?; TokenKind::EndForeach } else { - self.lbrace()?; + self.lbrace(state)?; TokenKind::RightBrace }; - let body = self.block(&end_token)?; + let body = self.block(state, &end_token)?; if end_token == TokenKind::EndForeach { - expect_token!([TokenKind::EndForeach], self, "`endforeach`"); - self.semi()?; + expect_token!([TokenKind::EndForeach], state, "`endforeach`"); + self.semi(state)?; } else { - self.rbrace()?; + self.rbrace(state)?; } Statement::Foreach { @@ -636,54 +610,54 @@ impl Parser { body, } } - TokenKind::Abstract => self.class_definition()?, - TokenKind::Readonly => self.class_definition()?, - TokenKind::Final => self.class_definition()?, - TokenKind::Class => self.class_definition()?, - TokenKind::Interface => self.interface_definition()?, - TokenKind::Trait => self.trait_definition()?, - TokenKind::Enum => self.enum_definition()?, + TokenKind::Abstract => self.class_definition(state)?, + TokenKind::Readonly => self.class_definition(state)?, + TokenKind::Final => self.class_definition(state)?, + TokenKind::Class => self.class_definition(state)?, + TokenKind::Interface => self.interface_definition(state)?, + TokenKind::Trait => self.trait_definition(state)?, + TokenKind::Enum => self.enum_definition(state)?, TokenKind::Switch => { - self.next(); + state.next(); - self.lparen()?; + self.lparen(state)?; - let condition = self.expression(Precedence::Lowest)?; + let condition = self.expression(state, Precedence::Lowest)?; - self.rparen()?; + self.rparen(state)?; - let end_token = if self.current.kind == TokenKind::Colon { - self.colon()?; + let end_token = if state.current.kind == TokenKind::Colon { + self.colon(state)?; TokenKind::EndSwitch } else { - self.lbrace()?; + self.lbrace(state)?; TokenKind::RightBrace }; let mut cases = Vec::new(); loop { - if self.current.kind == end_token { + if state.current.kind == end_token { break; } - match self.current.kind { + match state.current.kind { TokenKind::Case => { - self.next(); + state.next(); - let condition = self.expression(Precedence::Lowest)?; + let condition = self.expression(state, Precedence::Lowest)?; expect_token!( [TokenKind::Colon, TokenKind::SemiColon], - self, + state, ["`:`", "`;`"] ); let mut body = Block::new(); - while self.current.kind != TokenKind::Case - && self.current.kind != TokenKind::Default - && self.current.kind != TokenKind::RightBrace + while state.current.kind != TokenKind::Case + && state.current.kind != TokenKind::Default + && state.current.kind != TokenKind::RightBrace { - body.push(self.statement()?); + body.push(self.statement(state)?); } cases.push(Case { @@ -692,21 +666,21 @@ impl Parser { }); } TokenKind::Default => { - self.next(); + state.next(); expect_token!( [TokenKind::Colon, TokenKind::SemiColon], - self, + state, ["`:`", "`;`"] ); let mut body = Block::new(); - while self.current.kind != TokenKind::Case - && self.current.kind != TokenKind::Default - && self.current.kind != TokenKind::RightBrace + while state.current.kind != TokenKind::Case + && state.current.kind != TokenKind::Default + && state.current.kind != TokenKind::RightBrace { - body.push(self.statement()?); + body.push(self.statement(state)?); } cases.push(Case { @@ -715,81 +689,81 @@ impl Parser { }); } _ => { - return expected_token_err!(["`case`", "`default`"], self); + return expected_token_err!(["`case`", "`default`"], state); } } } if end_token == TokenKind::EndSwitch { - expect_token!([TokenKind::EndSwitch], self, ["`endswitch`"]); - self.semi()?; + expect_token!([TokenKind::EndSwitch], state, ["`endswitch`"]); + self.semi(state)?; } else { - self.rbrace()?; + self.rbrace(state)?; } Statement::Switch { condition, cases } } TokenKind::If => { - self.next(); + state.next(); - self.lparen()?; + self.lparen(state)?; - let condition = self.expression(Precedence::Lowest)?; + let condition = self.expression(state, Precedence::Lowest)?; - self.rparen()?; + self.rparen(state)?; // FIXME: Tidy up duplication and make the intent a bit clearer. - match self.current.kind { + match state.current.kind { TokenKind::Colon => { - self.next(); + state.next(); let mut then = vec![]; while !matches!( - self.current.kind, + state.current.kind, TokenKind::ElseIf | TokenKind::Else | TokenKind::EndIf ) { - then.push(self.statement()?); + then.push(self.statement(state)?); } let mut else_ifs = vec![]; loop { - if self.current.kind != TokenKind::ElseIf { + if state.current.kind != TokenKind::ElseIf { break; } - self.next(); + state.next(); - self.lparen()?; - let condition = self.expression(Precedence::Lowest)?; - self.rparen()?; + self.lparen(state)?; + let condition = self.expression(state, Precedence::Lowest)?; + self.rparen(state)?; - self.colon()?; + self.colon(state)?; let mut body = vec![]; while !matches!( - self.current.kind, + state.current.kind, TokenKind::ElseIf | TokenKind::Else | TokenKind::EndIf ) { - body.push(self.statement()?); + body.push(self.statement(state)?); } else_ifs.push(ElseIf { condition, body }); } let mut r#else = None; - if self.current.kind == TokenKind::Else { - self.next(); - self.colon()?; + if state.current.kind == TokenKind::Else { + state.next(); + self.colon(state)?; let mut body = vec![]; - while self.current.kind != TokenKind::EndIf { - body.push(self.statement()?); + while state.current.kind != TokenKind::EndIf { + body.push(self.statement(state)?); } r#else = Some(body); } - expect_token!([TokenKind::EndIf], self, ["`endif`"]); - self.semi()?; + expect_token!([TokenKind::EndIf], state, ["`endif`"]); + self.semi(state)?; Statement::If { condition, @@ -799,36 +773,36 @@ impl Parser { } } _ => { - let body_end_token = if self.current.kind == TokenKind::LeftBrace { - self.next(); + let body_end_token = if state.current.kind == TokenKind::LeftBrace { + state.next(); TokenKind::RightBrace } else { TokenKind::SemiColon }; - let then = self.block(&body_end_token)?; + let then = self.block(state, &body_end_token)?; if body_end_token == TokenKind::RightBrace { - self.rbrace()?; + self.rbrace(state)?; } let mut else_ifs: Vec = Vec::new(); loop { - if self.current.kind == TokenKind::ElseIf { - self.next(); + if state.current.kind == TokenKind::ElseIf { + state.next(); - self.lparen()?; + self.lparen(state)?; - let condition = self.expression(Precedence::Lowest)?; + let condition = self.expression(state, Precedence::Lowest)?; - self.rparen()?; + self.rparen(state)?; - self.lbrace()?; + self.lbrace(state)?; - let body = self.block(&TokenKind::RightBrace)?; + let body = self.block(state, &TokenKind::RightBrace)?; - self.rbrace()?; + self.rbrace(state)?; else_ifs.push(ElseIf { condition, body }); } else { @@ -836,7 +810,7 @@ impl Parser { } } - if self.current.kind != TokenKind::Else { + if state.current.kind != TokenKind::Else { return Ok(Statement::If { condition, then, @@ -845,13 +819,13 @@ impl Parser { }); } - expect_token!([TokenKind::Else], self, ["`else`"]); + expect_token!([TokenKind::Else], state, ["`else`"]); - self.lbrace()?; + self.lbrace(state)?; - let r#else = self.block(&TokenKind::RightBrace)?; + let r#else = self.block(state, &TokenKind::RightBrace)?; - self.rbrace()?; + self.rbrace(state)?; Statement::If { condition, @@ -863,72 +837,72 @@ impl Parser { } } TokenKind::Echo => { - self.next(); + state.next(); let mut values = Vec::new(); - while !self.is_eof() && self.current.kind != TokenKind::SemiColon { - values.push(self.expression(Precedence::Lowest)?); + while !state.is_eof() && state.current.kind != TokenKind::SemiColon { + values.push(self.expression(state, Precedence::Lowest)?); - self.optional_comma()?; + self.optional_comma(state)?; } - self.semi()?; + self.semi(state)?; Statement::Echo { values } } TokenKind::Continue => { - self.next(); + state.next(); let mut num = None; - if self.current.kind != TokenKind::SemiColon { - num = Some(self.expression(Precedence::Lowest)?); + if state.current.kind != TokenKind::SemiColon { + num = Some(self.expression(state, Precedence::Lowest)?); } - self.semi()?; + self.semi(state)?; Statement::Continue { num } } TokenKind::Break => { - self.next(); + state.next(); let mut num = None; - if self.current.kind != TokenKind::SemiColon { - num = Some(self.expression(Precedence::Lowest)?); + if state.current.kind != TokenKind::SemiColon { + num = Some(self.expression(state, Precedence::Lowest)?); } - self.semi()?; + self.semi(state)?; Statement::Break { num } } TokenKind::Return => { - self.next(); + state.next(); if let Token { kind: TokenKind::SemiColon, .. - } = self.current + } = state.current { let ret = Statement::Return { value: None }; - self.semi()?; + self.semi(state)?; ret } else { let ret = Statement::Return { - value: self.expression(Precedence::Lowest).ok(), + value: self.expression(state, Precedence::Lowest).ok(), }; - self.semi()?; + self.semi(state)?; ret } } TokenKind::Function if matches!( - self.peek.kind, + state.peek.kind, TokenKind::Identifier(_) | TokenKind::Ampersand ) => { // FIXME: This is incredibly hacky but we don't have a way to look at // the next N tokens right now. We could probably do with a `peek_buf()` // method like the Lexer has. - if self.peek.kind == TokenKind::Ampersand { - let mut cloned = self.iter.clone(); - if let Some((index, _)) = self.iter.clone().enumerate().next() { + if state.peek.kind == TokenKind::Ampersand { + let mut cloned = state.iter.clone(); + if let Some((index, _)) = state.iter.clone().enumerate().next() { if !matches!( cloned.nth(index), Some(Token { @@ -936,68 +910,68 @@ impl Parser { .. }) ) { - let expr = self.expression(Precedence::Lowest)?; + let expr = self.expression(state, Precedence::Lowest)?; - self.semi()?; + self.semi(state)?; return Ok(Statement::Expression { expr }); } } - self.function()? + self.function(state)? } else { - self.function()? + self.function(state)? } } TokenKind::SemiColon => { - self.next(); + state.next(); Statement::Noop } TokenKind::Try => { - let start_span = self.current.span; + let start_span = state.current.span; - self.next(); - self.lbrace()?; + state.next(); + self.lbrace(state)?; - let body = self.block(&TokenKind::RightBrace)?; + let body = self.block(state, &TokenKind::RightBrace)?; - self.rbrace()?; + self.rbrace(state)?; let mut catches = Vec::new(); loop { - if self.current.kind != TokenKind::Catch { + if state.current.kind != TokenKind::Catch { break; } - self.next(); - self.lparen()?; + state.next(); + self.lparen(state)?; - let types = self.try_block_caught_type_string()?; - let var = if self.current.kind == TokenKind::RightParen { + let types = self.try_block_caught_type_string(state)?; + let var = if state.current.kind == TokenKind::RightParen { None } else { - Some(self.expression(Precedence::Lowest)?) + Some(self.expression(state, Precedence::Lowest)?) }; - self.rparen()?; - self.lbrace()?; + self.rparen(state)?; + self.lbrace(state)?; - let body = self.block(&TokenKind::RightBrace)?; + let body = self.block(state, &TokenKind::RightBrace)?; - self.rbrace()?; + self.rbrace(state)?; catches.push(Catch { types, var, body }) } let mut finally = None; - if self.current.kind == TokenKind::Finally { - self.next(); - self.lbrace()?; + if state.current.kind == TokenKind::Finally { + state.next(); + self.lbrace(state)?; - finally = Some(self.block(&TokenKind::RightBrace)?); + finally = Some(self.block(state, &TokenKind::RightBrace)?); - self.rbrace()?; + self.rbrace(state)?; } if catches.is_empty() && finally.is_none() { @@ -1011,46 +985,46 @@ impl Parser { } } TokenKind::LeftBrace => { - self.next(); - let body = self.block(&TokenKind::RightBrace)?; - self.rbrace()?; + state.next(); + let body = self.block(state, &TokenKind::RightBrace)?; + self.rbrace(state)?; Statement::Block { body } } _ => { - let expr = self.expression(Precedence::Lowest)?; + let expr = self.expression(state, Precedence::Lowest)?; - self.semi()?; + self.semi(state)?; Statement::Expression { expr } } }; - self.skip_comments(); + state.skip_comments(); Ok(statement) } - fn expression(&mut self, precedence: Precedence) -> ParseResult { - if self.is_eof() { + fn expression(&self, state: &mut State, precedence: Precedence) -> ParseResult { + if state.is_eof() { return Err(ParseError::UnexpectedEndOfFile); } - self.skip_comments(); + state.skip_comments(); - let mut left = match &self.current.kind { + let mut left = match &state.current.kind { TokenKind::Throw => { - self.next(); + state.next(); - let value = self.expression(Precedence::Lowest)?; + let value = self.expression(state, Precedence::Lowest)?; Expression::Throw { value: Box::new(value), } } TokenKind::Yield => { - self.next(); + state.next(); - if self.current.kind == TokenKind::SemiColon { + if state.current.kind == TokenKind::SemiColon { Expression::Yield { key: None, value: None, @@ -1058,22 +1032,25 @@ impl Parser { } else { let mut from = false; - if self.current.kind == TokenKind::From { - self.next(); + if state.current.kind == TokenKind::From { + state.next(); from = true; } let mut key = None; - let mut value = Box::new(self.expression(if from { - Precedence::YieldFrom - } else { - Precedence::Yield - })?); + let mut value = Box::new(self.expression( + state, + if from { + Precedence::YieldFrom + } else { + Precedence::Yield + }, + )?); - if self.current.kind == TokenKind::DoubleArrow && !from { - self.next(); + if state.current.kind == TokenKind::DoubleArrow && !from { + state.next(); key = Some(value.clone()); - value = Box::new(self.expression(Precedence::Yield)?); + value = Box::new(self.expression(state, Precedence::Yield)?); } if from { @@ -1087,9 +1064,9 @@ impl Parser { } } TokenKind::Clone => { - self.next(); + state.next(); - let target = self.expression(Precedence::CloneOrNew)?; + let target = self.expression(state, Precedence::CloneOrNew)?; Expression::Clone { target: Box::new(target), @@ -1097,88 +1074,88 @@ impl Parser { } TokenKind::Variable(v) => { let e = Expression::Variable { name: v.clone() }; - self.next(); + state.next(); e } TokenKind::LiteralInteger(i) => { let e = Expression::LiteralInteger { i: *i }; - self.next(); + state.next(); e } TokenKind::LiteralFloat(f) => { let f = Expression::LiteralFloat { f: *f }; - self.next(); + state.next(); f } TokenKind::Identifier(i) | TokenKind::QualifiedIdentifier(i) | TokenKind::FullyQualifiedIdentifier(i) => { let e = Expression::Identifier { name: i.clone() }; - self.next(); + state.next(); e } - TokenKind::Static if matches!(self.peek.kind, TokenKind::DoubleColon) => { - self.next(); + TokenKind::Static if matches!(state.peek.kind, TokenKind::DoubleColon) => { + state.next(); Expression::Static } TokenKind::LiteralString(s) => { let e = Expression::LiteralString { value: s.clone() }; - self.next(); + state.next(); e } - TokenKind::StringPart(_) => self.interpolated_string()?, + TokenKind::StringPart(_) => self.interpolated_string(state)?, TokenKind::True => { let e = Expression::Bool { value: true }; - self.next(); + state.next(); e } TokenKind::False => { let e = Expression::Bool { value: false }; - self.next(); + state.next(); e } TokenKind::Null => { - self.next(); + state.next(); Expression::Null } TokenKind::LeftParen => { - self.next(); + state.next(); - let e = self.expression(Precedence::Lowest)?; + let e = self.expression(state, Precedence::Lowest)?; - self.rparen()?; + self.rparen(state)?; e } TokenKind::Match => { - self.next(); - self.lparen()?; + state.next(); + self.lparen(state)?; - let condition = Box::new(self.expression(Precedence::Lowest)?); + let condition = Box::new(self.expression(state, Precedence::Lowest)?); - self.rparen()?; - self.lbrace()?; + self.rparen(state)?; + self.lbrace(state)?; let mut arms = Vec::new(); - while self.current.kind != TokenKind::RightBrace { + while state.current.kind != TokenKind::RightBrace { let mut conditions = Vec::new(); - while self.current.kind != TokenKind::DoubleArrow { - if self.current.kind == TokenKind::Default { - self.next(); + while state.current.kind != TokenKind::DoubleArrow { + if state.current.kind == TokenKind::Default { + state.next(); break; } - conditions.push(self.expression(Precedence::Lowest)?); + conditions.push(self.expression(state, Precedence::Lowest)?); - self.optional_comma()?; + self.optional_comma(state)?; } - expect_token!([TokenKind::DoubleArrow], self, "`=>`"); + expect_token!([TokenKind::DoubleArrow], state, "`=>`"); - let body = self.expression(Precedence::Lowest)?; + let body = self.expression(state, Precedence::Lowest)?; - self.optional_comma()?; + self.optional_comma(state)?; arms.push(MatchArm { conditions: if conditions.is_empty() { @@ -1190,94 +1167,94 @@ impl Parser { }) } - self.rbrace()?; + self.rbrace(state)?; Expression::Match { condition, arms } } TokenKind::Array => { let mut items = vec![]; - self.next(); + state.next(); - self.lparen()?; + self.lparen(state)?; - while self.current.kind != TokenKind::RightParen { + while state.current.kind != TokenKind::RightParen { let mut key = None; - let unpack = if self.current.kind == TokenKind::Ellipsis { - self.next(); + let unpack = if state.current.kind == TokenKind::Ellipsis { + state.next(); true } else { false }; - let mut value = self.expression(Precedence::Lowest)?; + let mut value = self.expression(state, Precedence::Lowest)?; - if self.current.kind == TokenKind::DoubleArrow { - self.next(); + if state.current.kind == TokenKind::DoubleArrow { + state.next(); key = Some(value); - value = self.expression(Precedence::Lowest)?; + value = self.expression(state, Precedence::Lowest)?; } items.push(ArrayItem { key, value, unpack }); - self.optional_comma()?; + self.optional_comma(state)?; - self.skip_comments(); + state.skip_comments(); } - self.rparen()?; + self.rparen(state)?; Expression::Array { items } } TokenKind::LeftBracket => { let mut items = Vec::new(); - self.next(); + state.next(); - self.skip_comments(); + state.skip_comments(); - while self.current.kind != TokenKind::RightBracket { - if self.current.kind == TokenKind::Comma { + while state.current.kind != TokenKind::RightBracket { + if state.current.kind == TokenKind::Comma { items.push(ArrayItem { key: None, value: Expression::Empty, unpack: false, }); - self.next(); + state.next(); continue; } let mut key = None; - let unpack = if self.current.kind == TokenKind::Ellipsis { - self.next(); + let unpack = if state.current.kind == TokenKind::Ellipsis { + state.next(); true } else { false }; - let mut value = self.expression(Precedence::Lowest)?; + let mut value = self.expression(state, Precedence::Lowest)?; - if self.current.kind == TokenKind::DoubleArrow { - self.next(); + if state.current.kind == TokenKind::DoubleArrow { + state.next(); key = Some(value); - value = self.expression(Precedence::Lowest)?; + value = self.expression(state, Precedence::Lowest)?; } items.push(ArrayItem { key, value, unpack }); - self.optional_comma()?; + self.optional_comma(state)?; - self.skip_comments(); + state.skip_comments(); } - self.rbracket()?; + self.rbracket(state)?; Expression::Array { items } } - TokenKind::Static if matches!(self.peek.kind, TokenKind::Function | TokenKind::Fn) => { - self.next(); + TokenKind::Static if matches!(state.peek.kind, TokenKind::Function | TokenKind::Fn) => { + state.next(); - match self.expression(Precedence::Lowest)? { + match self.expression(state, Precedence::Lowest)? { Expression::Closure { params, uses, @@ -1310,33 +1287,33 @@ impl Parser { } } TokenKind::Function => { - self.next(); + state.next(); - let by_ref = if self.current.kind == TokenKind::Ampersand { - self.next(); + let by_ref = if state.current.kind == TokenKind::Ampersand { + state.next(); true } else { false }; - self.lparen()?; + self.lparen(state)?; - let params = self.param_list(ParamPosition::Function)?; + let params = self.param_list(state, ParamPosition::Function)?; - self.rparen()?; + self.rparen(state)?; let mut uses = vec![]; - if self.current.kind == TokenKind::Use { - self.next(); + if state.current.kind == TokenKind::Use { + state.next(); - self.lparen()?; + self.lparen(state)?; - while self.current.kind != TokenKind::RightParen { - let var = match self.current.kind { + while state.current.kind != TokenKind::RightParen { + let var = match state.current.kind { TokenKind::Ampersand => { - self.next(); + state.next(); - match self.expression(Precedence::Lowest)? { + match self.expression(state, Precedence::Lowest)? { s @ Expression::Variable { .. } => ClosureUse { var: s, by_ref: true, @@ -1344,12 +1321,12 @@ impl Parser { _ => { return Err(ParseError::UnexpectedToken( "expected variable".into(), - self.current.span, + state.current.span, )) } } } - _ => match self.expression(Precedence::Lowest)? { + _ => match self.expression(state, Precedence::Lowest)? { s @ Expression::Variable { .. } => ClosureUse { var: s, by_ref: false, @@ -1357,7 +1334,7 @@ impl Parser { _ => { return Err(ParseError::UnexpectedToken( "expected variable".into(), - self.current.span, + state.current.span, )) } }, @@ -1365,24 +1342,24 @@ impl Parser { uses.push(var); - self.optional_comma()?; + self.optional_comma(state)?; } - self.rparen()?; + self.rparen(state)?; } let mut return_type = None; - if self.current.kind == TokenKind::Colon || self.config.force_type_strings { - self.colon()?; + if state.current.kind == TokenKind::Colon { + self.colon(state)?; - return_type = Some(self.type_string()?); + return_type = Some(self.type_string(state)?); } - self.lbrace()?; + self.lbrace(state)?; - let body = self.block(&TokenKind::RightBrace)?; + let body = self.block(state, &TokenKind::RightBrace)?; - self.rbrace()?; + self.rbrace(state)?; Expression::Closure { params, @@ -1394,32 +1371,32 @@ impl Parser { } } TokenKind::Fn => { - self.next(); + state.next(); - let by_ref = if self.current.kind == TokenKind::Ampersand { - self.next(); + let by_ref = if state.current.kind == TokenKind::Ampersand { + state.next(); true } else { false }; - self.lparen()?; + self.lparen(state)?; - let params = self.param_list(ParamPosition::Function)?; + let params = self.param_list(state, ParamPosition::Function)?; - self.rparen()?; + self.rparen(state)?; let mut return_type = None; - if self.current.kind == TokenKind::Colon || self.config.force_type_strings { - self.colon()?; + if state.current.kind == TokenKind::Colon { + self.colon(state)?; - return_type = Some(self.type_string()?); + return_type = Some(self.type_string(state)?); } - expect_token!([TokenKind::DoubleArrow], self, ["`=>`"]); + expect_token!([TokenKind::DoubleArrow], state, ["`=>`"]); - let value = self.expression(Precedence::Lowest)?; + let value = self.expression(state, Precedence::Lowest)?; Expression::ArrowFunction { params, @@ -1429,21 +1406,21 @@ impl Parser { r#static: false, } } - TokenKind::New if self.peek.kind == TokenKind::Class => { - self.anonymous_class_definition()? + TokenKind::New if state.peek.kind == TokenKind::Class => { + self.anonymous_class_definition(state)? } TokenKind::New => { - self.next(); + state.next(); let mut args = vec![]; - let target = self.expression(Precedence::CloneOrNew)?; + let target = self.expression(state, Precedence::CloneOrNew)?; - if self.current.kind == TokenKind::LeftParen { - self.lparen()?; + if state.current.kind == TokenKind::LeftParen { + self.lparen(state)?; - args = self.args_list()?; + args = self.args_list(state)?; - self.rparen()?; + self.rparen(state)?; } Expression::New { @@ -1452,45 +1429,45 @@ impl Parser { } } TokenKind::DirConstant => { - self.next(); + state.next(); Expression::MagicConst { constant: MagicConst::Dir, } } - _ if is_prefix(&self.current.kind) => { - let op = self.current.kind.clone(); + _ if is_prefix(&state.current.kind) => { + let op = state.current.kind.clone(); - self.next(); + state.next(); let rpred = Precedence::prefix(&op); - let rhs = self.expression(rpred)?; + let rhs = self.expression(state, rpred)?; prefix(&op, rhs) } - TokenKind::Dollar => self.dynamic_variable()?, + TokenKind::Dollar => self.dynamic_variable(state)?, _ => { return Err(ParseError::UnexpectedToken( - self.current.kind.to_string(), - self.current.span, + state.current.kind.to_string(), + state.current.span, )) } }; - if self.current.kind == TokenKind::SemiColon { + if state.current.kind == TokenKind::SemiColon { return Ok(left); } - self.skip_comments(); + state.skip_comments(); loop { - self.skip_comments(); + state.skip_comments(); - if matches!(self.current.kind, TokenKind::SemiColon | TokenKind::Eof) { + if matches!(state.current.kind, TokenKind::SemiColon | TokenKind::Eof) { break; } - let span = self.current.span; - let kind = self.current.kind.clone(); + let span = state.current.span; + let kind = state.current.kind.clone(); if is_postfix(&kind) { let lpred = Precedence::postfix(&kind); @@ -1499,9 +1476,9 @@ impl Parser { break; } - self.next(); + state.next(); - left = self.postfix(left, &kind)?; + left = self.postfix(state, left, &kind)?; continue; } @@ -1522,13 +1499,13 @@ impl Parser { return Err(ParseError::UnexpectedToken(kind.to_string(), span)); } - self.next(); + state.next(); match kind { TokenKind::Question => { - let then = self.expression(Precedence::Lowest)?; - self.colon()?; - let otherwise = self.expression(rpred)?; + let then = self.expression(state, Precedence::Lowest)?; + self.colon(state)?; + let otherwise = self.expression(state, rpred)?; left = Expression::Ternary { condition: Box::new(left), then: Some(Box::new(then)), @@ -1536,7 +1513,7 @@ impl Parser { } } TokenKind::QuestionColon => { - let r#else = self.expression(Precedence::Lowest)?; + let r#else = self.expression(state, Precedence::Lowest)?; left = Expression::Ternary { condition: Box::new(left), then: None, @@ -1544,7 +1521,7 @@ impl Parser { } } _ => { - let rhs = self.expression(rpred)?; + let rhs = self.expression(state, rpred)?; left = infix(left, kind, rhs); } } @@ -1555,15 +1532,20 @@ impl Parser { break; } - self.skip_comments(); + state.skip_comments(); Ok(left) } - fn postfix(&mut self, lhs: Expression, op: &TokenKind) -> Result { + fn postfix( + &self, + state: &mut State, + lhs: Expression, + op: &TokenKind, + ) -> Result { Ok(match op { TokenKind::Coalesce => { - let rhs = self.expression(Precedence::NullCoalesce)?; + let rhs = self.expression(state, Precedence::NullCoalesce)?; Expression::Coalesce { lhs: Box::new(lhs), @@ -1571,9 +1553,9 @@ impl Parser { } } TokenKind::LeftParen => { - let args = self.args_list()?; + let args = self.args_list(state)?; - self.rparen()?; + self.rparen(state)?; Expression::Call { target: Box::new(lhs), @@ -1581,17 +1563,17 @@ impl Parser { } } TokenKind::LeftBracket => { - if self.current.kind == TokenKind::RightBracket { - self.next(); + if state.current.kind == TokenKind::RightBracket { + state.next(); Expression::ArrayIndex { array: Box::new(lhs), index: None, } } else { - let index = self.expression(Precedence::Lowest)?; + let index = self.expression(state, Precedence::Lowest)?; - expect_token!([TokenKind::RightBracket], self, ["`]`"]); + expect_token!([TokenKind::RightBracket], state, ["`]`"]); Expression::ArrayIndex { array: Box::new(lhs), @@ -1602,40 +1584,40 @@ impl Parser { TokenKind::DoubleColon => { let mut must_be_method_call = false; - let property = match self.current.kind.clone() { - TokenKind::Dollar => self.dynamic_variable()?, + let property = match state.current.kind.clone() { + TokenKind::Dollar => self.dynamic_variable(state)?, TokenKind::Variable(var) => { - self.next(); + state.next(); Expression::Variable { name: var } } TokenKind::LeftBrace => { must_be_method_call = true; - self.next(); + state.next(); - let name = self.expression(Precedence::Lowest)?; + let name = self.expression(state, Precedence::Lowest)?; - self.rbrace()?; + self.rbrace(state)?; Expression::DynamicVariable { name: Box::new(name), } } TokenKind::Identifier(ident) => { - self.next(); + state.next(); Expression::Identifier { name: ident } } TokenKind::Class => { - self.next(); + state.next(); // FIXME: Can this be represented in a nicer way? Kind of hacky. Expression::Identifier { name: "class".into(), } } - _ if is_reserved_ident(&self.current.kind) => Expression::Identifier { - name: self.ident_maybe_reserved()?, + _ if is_reserved_ident(&state.current.kind) => Expression::Identifier { + name: self.ident_maybe_reserved(state)?, }, _ => { - return expected_token_err!(["`{`", "`$`", "an identifier"], self); + return expected_token_err!(["`{`", "`$`", "an identifier"], state); } }; @@ -1645,7 +1627,7 @@ impl Parser { // 1. If we have an identifier and the current token is not a left paren, // the resulting expression must be a constant fetch. Expression::Identifier { name } - if self.current.kind != TokenKind::LeftParen => + if state.current.kind != TokenKind::LeftParen => { Expression::ConstFetch { target: lhs, @@ -1655,12 +1637,12 @@ impl Parser { // 2. If the current token is a left paren, or if we know the property expression // is only valid a method call context, we can assume we're parsing a static // method call. - _ if self.current.kind == TokenKind::LeftParen || must_be_method_call => { - self.lparen()?; + _ if state.current.kind == TokenKind::LeftParen || must_be_method_call => { + self.lparen(state)?; - let args = self.args_list()?; + let args = self.args_list(state)?; - self.rparen()?; + self.rparen(state)?; Expression::StaticMethodCall { target: lhs, @@ -1677,30 +1659,30 @@ impl Parser { } } TokenKind::Arrow | TokenKind::NullsafeArrow => { - let property = match self.current.kind { + let property = match state.current.kind { TokenKind::LeftBrace => { - self.lbrace()?; - let expr = self.expression(Precedence::Lowest)?; - self.rbrace()?; + self.lbrace(state)?; + let expr = self.expression(state, Precedence::Lowest)?; + self.rbrace(state)?; expr } TokenKind::Variable(ref var) => { let var = Expression::Variable { name: var.clone() }; - self.next(); + state.next(); var } - TokenKind::Dollar => self.dynamic_variable()?, + TokenKind::Dollar => self.dynamic_variable(state)?, _ => Expression::Identifier { - name: self.ident_maybe_reserved()?, + name: self.ident_maybe_reserved(state)?, }, }; - if self.current.kind == TokenKind::LeftParen { - self.next(); + if state.current.kind == TokenKind::LeftParen { + state.next(); - let args = self.args_list()?; + let args = self.args_list(state)?; - self.rparen()?; + self.rparen(state)?; if op == &TokenKind::NullsafeArrow { Expression::NullsafeMethodCall { @@ -1737,35 +1719,35 @@ impl Parser { }) } - fn interpolated_string(&mut self) -> ParseResult { + fn interpolated_string(&self, state: &mut State) -> ParseResult { let mut parts = Vec::new(); - while self.current.kind != TokenKind::DoubleQuote { - match &self.current.kind { + while state.current.kind != TokenKind::DoubleQuote { + match &state.current.kind { TokenKind::StringPart(s) => { if s.len() > 0 { parts.push(StringPart::Const(s.clone())); } - self.next(); + state.next(); } TokenKind::DollarLeftBrace => { - self.next(); - let e = match (&self.current.kind, &self.peek.kind) { + state.next(); + let e = match (&state.current.kind, &state.peek.kind) { (TokenKind::Identifier(var), TokenKind::RightBrace) => { // "${var}" let e = Expression::Variable { name: var.clone() }; - self.next(); - self.next(); + state.next(); + state.next(); e } (TokenKind::Identifier(var), TokenKind::LeftBracket) => { // "${var[e]}" let var = Expression::Variable { name: var.clone() }; - self.next(); - self.next(); - let e = self.expression(Precedence::Lowest)?; - expect_token!([TokenKind::RightBracket], self, "`]`"); - expect_token!([TokenKind::RightBrace], self, "`}`"); + state.next(); + state.next(); + let e = self.expression(state, Precedence::Lowest)?; + expect_token!([TokenKind::RightBracket], state, "`]`"); + expect_token!([TokenKind::RightBrace], state, "`}`"); Expression::ArrayIndex { array: Box::new(var), index: Some(Box::new(e)), @@ -1773,8 +1755,8 @@ impl Parser { } _ => { // Arbitrary expressions are allowed, but are treated as variable variables. - let e = self.expression(Precedence::Lowest)?; - expect_token!([TokenKind::RightBrace], self, "`}`"); + let e = self.expression(state, Precedence::Lowest)?; + expect_token!([TokenKind::RightBrace], state, "`}`"); Expression::DynamicVariable { name: Box::new(e) } } @@ -1783,77 +1765,77 @@ impl Parser { } TokenKind::LeftBrace => { // "{$expr}" - self.next(); - let e = self.expression(Precedence::Lowest)?; - expect_token!([TokenKind::RightBrace], self, "`}`"); + state.next(); + let e = self.expression(state, Precedence::Lowest)?; + expect_token!([TokenKind::RightBrace], state, "`}`"); parts.push(StringPart::Expr(Box::new(e))); } TokenKind::Variable(var) => { // "$expr", "$expr[0]", "$expr[name]", "$expr->a" let var = Expression::Variable { name: var.clone() }; - self.next(); - let e = match self.current.kind { + state.next(); + let e = match state.current.kind { TokenKind::LeftBracket => { - self.next(); + state.next(); // Full expression syntax is not allowed here, // so we can't call self.expression. - let index = match &self.current.kind { + let index = match &state.current.kind { &TokenKind::LiteralInteger(i) => { - self.next(); + state.next(); Expression::LiteralInteger { i } } TokenKind::Minus => { - self.next(); - if let TokenKind::LiteralInteger(i) = self.current.kind { - self.next(); + state.next(); + if let TokenKind::LiteralInteger(i) = state.current.kind { + state.next(); Expression::Negate { value: Box::new(Expression::LiteralInteger { i }), } } else { - return expected_token_err!("an integer", self); + return expected_token_err!("an integer", state); } } TokenKind::Identifier(ident) => { let e = Expression::LiteralString { value: ident.clone(), }; - self.next(); + state.next(); e } TokenKind::Variable(var) => { let e = Expression::Variable { name: var.clone() }; - self.next(); + state.next(); e } _ => { return expected_token_err!( ["`-`", "an integer", "an identifier", "a variable"], - self + state ); } }; - expect_token!([TokenKind::RightBracket], self, "`]`"); + expect_token!([TokenKind::RightBracket], state, "`]`"); Expression::ArrayIndex { array: Box::new(var), index: Some(Box::new(index)), } } TokenKind::Arrow => { - self.next(); + state.next(); Expression::PropertyFetch { target: Box::new(var), property: Box::new(Expression::Identifier { - name: self.ident_maybe_reserved()?, + name: self.ident_maybe_reserved(state)?, }), } } TokenKind::NullsafeArrow => { - self.next(); + state.next(); Expression::NullsafePropertyFetch { target: Box::new(var), property: Box::new(Expression::Identifier { - name: self.ident_maybe_reserved()?, + name: self.ident_maybe_reserved(state)?, }), } } @@ -1862,22 +1844,14 @@ impl Parser { parts.push(StringPart::Expr(Box::new(e))); } _ => { - return expected_token_err!(["`${`", "`{$", "`\"`", "a variable"], self); + return expected_token_err!(["`${`", "`{$", "`\"`", "a variable"], state); } } } - self.next(); - Ok(Expression::InterpolatedString { parts }) - } - - fn is_eof(&self) -> bool { - self.current.kind == TokenKind::Eof - } + state.next(); - pub fn next(&mut self) { - self.current = self.peek.clone(); - self.peek = self.iter.next().unwrap_or_default() + Ok(Expression::InterpolatedString { parts }) } } diff --git a/src/parser/params.rs b/src/parser/params.rs index 1d1a7dba..d09820a4 100644 --- a/src/parser/params.rs +++ b/src/parser/params.rs @@ -7,6 +7,7 @@ use crate::parser::ast::PropertyFlag; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; use crate::parser::precedence::Precedence; +use crate::parser::state::State; use crate::parser::Parser; use crate::expect_token; @@ -20,16 +21,17 @@ pub enum ParamPosition { impl Parser { pub(in crate::parser) fn param_list( - &mut self, + &self, + state: &mut State, position: ParamPosition, ) -> Result { let mut params = ParamList::new(); - while !self.is_eof() && self.current.kind != TokenKind::RightParen { + while !state.is_eof() && state.current.kind != TokenKind::RightParen { let mut param_type = None; let flags: Vec = self - .promoted_property_flags()? + .promoted_property_flags(state)? .iter() .map(|f| f.into()) .collect(); @@ -38,17 +40,17 @@ impl Parser { match position { ParamPosition::Method(name) if name != "__construct" => { return Err(ParseError::PromotedPropertyOutsideConstructor( - self.current.span, + state.current.span, )); } ParamPosition::AbstractMethod(name) => { if name == "__construct" { return Err(ParseError::PromotedPropertyOnAbstractConstructor( - self.current.span, + state.current.span, )); } else { return Err(ParseError::PromotedPropertyOutsideConstructor( - self.current.span, + state.current.span, )); } } @@ -57,29 +59,28 @@ impl Parser { } // If this is a readonly promoted property, or we don't see a variable - if self.config.force_type_strings - || flags.contains(&PropertyFlag::Readonly) + if flags.contains(&PropertyFlag::Readonly) || !matches!( - self.current.kind, + state.current.kind, TokenKind::Variable(_) | TokenKind::Ellipsis | TokenKind::Ampersand ) { // Try to parse the type. - param_type = Some(self.type_string()?); + param_type = Some(self.type_string(state)?); } let mut variadic = false; let mut by_ref = false; - if matches!(self.current.kind, TokenKind::Ampersand) { - self.next(); + if matches!(state.current.kind, TokenKind::Ampersand) { + state.next(); by_ref = true; } - if matches!(self.current.kind, TokenKind::Ellipsis) { - self.next(); + if matches!(state.current.kind, TokenKind::Ellipsis) { + state.next(); if !flags.is_empty() { - return Err(ParseError::VariadicPromotedProperty(self.current.span)); + return Err(ParseError::VariadicPromotedProperty(state.current.span)); } variadic = true; @@ -88,12 +89,12 @@ impl Parser { // 2. Then expect a variable. let var = expect_token!([ TokenKind::Variable(v) => v - ], self, "a varaible"); + ], state, "a varaible"); let mut default = None; - if self.current.kind == TokenKind::Equals { - self.next(); - default = Some(self.expression(Precedence::Lowest)?); + if state.current.kind == TokenKind::Equals { + state.next(); + default = Some(self.expression(state, Precedence::Lowest)?); } params.push(Param { @@ -105,29 +106,29 @@ impl Parser { by_ref, }); - self.optional_comma()?; + self.optional_comma(state)?; } Ok(params) } - pub(in crate::parser) fn args_list(&mut self) -> ParseResult> { + pub(in crate::parser) fn args_list(&self, state: &mut State) -> ParseResult> { let mut args = Vec::new(); - while !self.is_eof() && self.current.kind != TokenKind::RightParen { + while !state.is_eof() && state.current.kind != TokenKind::RightParen { let mut name = None; let mut unpack = false; - if matches!(self.current.kind, TokenKind::Identifier(_)) - && self.peek.kind == TokenKind::Colon + if matches!(state.current.kind, TokenKind::Identifier(_)) + && state.peek.kind == TokenKind::Colon { - name = Some(self.ident_maybe_reserved()?); - self.next(); - } else if self.current.kind == TokenKind::Ellipsis { - self.next(); + name = Some(self.ident_maybe_reserved(state)?); + state.next(); + } else if state.current.kind == TokenKind::Ellipsis { + state.next(); unpack = true; } - if unpack && self.current.kind == TokenKind::RightParen { + if unpack && state.current.kind == TokenKind::RightParen { args.push(Arg { name: None, unpack: false, @@ -137,7 +138,7 @@ impl Parser { break; } - let value = self.expression(Precedence::Lowest)?; + let value = self.expression(state, Precedence::Lowest)?; args.push(Arg { name, @@ -145,7 +146,7 @@ impl Parser { value, }); - self.optional_comma()?; + self.optional_comma(state)?; } Ok(args) diff --git a/src/parser/punc.rs b/src/parser/punc.rs index 1eb5b9dc..e8a060d5 100644 --- a/src/parser/punc.rs +++ b/src/parser/punc.rs @@ -1,50 +1,51 @@ use crate::lexer::token::TokenKind; use crate::parser::error::ParseResult; +use crate::parser::state::State; use crate::parser::Parser; use crate::expect_token; impl Parser { - pub(in crate::parser) fn semi(&mut self) -> ParseResult<()> { - expect_token!([TokenKind::SemiColon], self, "`;`"); + pub(in crate::parser) fn semi(&self, state: &mut State) -> ParseResult<()> { + expect_token!([TokenKind::SemiColon], state, "`;`"); Ok(()) } - pub(in crate::parser) fn lbrace(&mut self) -> ParseResult<()> { - expect_token!([TokenKind::LeftBrace], self, "`{`"); + pub(in crate::parser) fn lbrace(&self, state: &mut State) -> ParseResult<()> { + expect_token!([TokenKind::LeftBrace], state, "`{`"); Ok(()) } - pub(in crate::parser) fn rbrace(&mut self) -> ParseResult<()> { - expect_token!([TokenKind::RightBrace], self, "`}`"); + pub(in crate::parser) fn rbrace(&self, state: &mut State) -> ParseResult<()> { + expect_token!([TokenKind::RightBrace], state, "`}`"); Ok(()) } - pub(in crate::parser) fn lparen(&mut self) -> ParseResult<()> { - expect_token!([TokenKind::LeftParen], self, "`(`"); + pub(in crate::parser) fn lparen(&self, state: &mut State) -> ParseResult<()> { + expect_token!([TokenKind::LeftParen], state, "`(`"); Ok(()) } - pub(in crate::parser) fn rparen(&mut self) -> ParseResult<()> { - expect_token!([TokenKind::RightParen], self, "`)`"); + pub(in crate::parser) fn rparen(&self, state: &mut State) -> ParseResult<()> { + expect_token!([TokenKind::RightParen], state, "`)`"); Ok(()) } - pub(in crate::parser) fn rbracket(&mut self) -> ParseResult<()> { - expect_token!([TokenKind::RightBracket], self, "`]`"); + pub(in crate::parser) fn rbracket(&self, state: &mut State) -> ParseResult<()> { + expect_token!([TokenKind::RightBracket], state, "`]`"); Ok(()) } - pub(in crate::parser) fn optional_comma(&mut self) -> ParseResult<()> { - if self.current.kind == TokenKind::Comma { - expect_token!([TokenKind::Comma], self, "`,`"); + pub(in crate::parser) fn optional_comma(&self, state: &mut State) -> ParseResult<()> { + if state.current.kind == TokenKind::Comma { + expect_token!([TokenKind::Comma], state, "`,`"); } Ok(()) } - pub(in crate::parser) fn colon(&mut self) -> ParseResult<()> { - expect_token!([TokenKind::Colon], self, "`:`"); + pub(in crate::parser) fn colon(&self, state: &mut State) -> ParseResult<()> { + expect_token!([TokenKind::Colon], state, "`:`"); Ok(()) } diff --git a/src/parser/state.rs b/src/parser/state.rs new file mode 100644 index 00000000..2eb25d82 --- /dev/null +++ b/src/parser/state.rs @@ -0,0 +1,59 @@ +use std::vec::IntoIter; + +use crate::lexer::token::Token; +use crate::lexer::token::TokenKind; + +#[derive(Debug, Clone)] +pub struct State { + pub current: Token, + pub peek: Token, + pub iter: IntoIter, + pub comments: Vec, +} + +impl State { + pub fn new(tokens: Vec) -> Self { + let mut iter = tokens.into_iter(); + + Self { + current: iter.next().unwrap_or_default(), + peek: iter.next().unwrap_or_default(), + iter, + comments: vec![], + } + } + + pub fn skip_comments(&mut self) { + while matches!( + self.current.kind, + TokenKind::Comment(_) | TokenKind::DocComment(_) + ) { + self.next(); + } + } + + pub fn gather_comments(&mut self) { + while matches!( + self.current.kind, + TokenKind::Comment(_) | TokenKind::DocComment(_) + ) { + self.comments.push(self.current.clone()); + self.next(); + } + } + + pub fn clear_comments(&mut self) -> Vec { + let c = self.comments.clone(); + self.comments = vec![]; + c + } + + pub fn is_eof(&mut self) -> bool { + self.current.kind == TokenKind::Eof + } + + pub fn next(&mut self) { + self.current = self.peek.clone(); + self.peek = self.iter.next().unwrap_or_default() + } +} diff --git a/src/parser/vars.rs b/src/parser/vars.rs index 29da2f78..58d5b5be 100644 --- a/src/parser/vars.rs +++ b/src/parser/vars.rs @@ -3,19 +3,20 @@ use crate::parser::ast::Expression; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; use crate::parser::precedence::Precedence; +use crate::parser::state::State; use crate::parser::Parser; impl Parser { - pub(in crate::parser) fn dynamic_variable(&mut self) -> ParseResult { - self.next(); + pub(in crate::parser) fn dynamic_variable(&self, state: &mut State) -> ParseResult { + state.next(); - Ok(match &self.current.kind { + Ok(match &state.current.kind { TokenKind::LeftBrace => { - self.next(); + state.next(); - let name = self.expression(Precedence::Lowest)?; + let name = self.expression(state, Precedence::Lowest)?; - self.rbrace()?; + self.rbrace(state)?; Expression::DynamicVariable { name: Box::new(name), @@ -24,7 +25,7 @@ impl Parser { TokenKind::Variable(variable) => { let variable = variable.clone(); - self.next(); + state.next(); Expression::DynamicVariable { name: Box::new(Expression::Variable { name: variable }), @@ -32,8 +33,8 @@ impl Parser { } _ => { return Err(ParseError::UnexpectedToken( - self.current.kind.to_string(), - self.current.span, + state.current.kind.to_string(), + state.current.span, )) } }) diff --git a/tests/third_party_tests.rs b/tests/third_party_tests.rs index 6cc27983..4522bfaa 100644 --- a/tests/third_party_tests.rs +++ b/tests/third_party_tests.rs @@ -76,7 +76,7 @@ fn test_file(name: &str, filename: PathBuf) { Lexer::new() .tokenize(code.as_bytes()) .map(|tokens| { - Parser::new(None) + Parser::new() .parse(tokens) .map(|_| { println!("✅ successfully parsed file: `\"{}\"`.", name);