Skip to content

Commit

Permalink
Removed regular expression flag lexing
Browse files Browse the repository at this point in the history
  • Loading branch information
Razican committed Dec 25, 2021
1 parent ac882f1 commit dee5b3e
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 158 deletions.
130 changes: 3 additions & 127 deletions boa/src/syntax/lexer/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,11 @@ use crate::{
},
Interner,
};
use bitflags::bitflags;
use std::io::{self, ErrorKind};
use std::str;
use std::{
fmt::{self, Display, Formatter},
io::Read,
io::{self, ErrorKind, Read},
str,
};

#[cfg(feature = "deser")]
use serde::{Deserialize, Deserializer, Serialize, Serializer};

/// Regex literal lexing.
///
/// Lexes Division, Assigndiv or Regex literal.
Expand Down Expand Up @@ -114,15 +108,14 @@ impl<R> Tokenizer<R> for RegexLiteral {
}

let mut flags = Vec::new();
let flags_start = cursor.pos();
cursor.take_while_ascii_pred(&mut flags, &|c: char| c.is_alphabetic())?;

let flags_str = unsafe { str::from_utf8_unchecked(flags.as_slice()) };
if let Ok(body_str) = str::from_utf8(body.as_slice()) {
Ok(Token::new(
TokenKind::regular_expression_literal(
interner.get_or_intern(body_str),
parse_regex_flags(flags_str, flags_start)?,
interner.get_or_intern(flags_str),
),
Span::new(start_pos, cursor.pos()),
))
Expand All @@ -134,120 +127,3 @@ impl<R> Tokenizer<R> for RegexLiteral {
}
}
}

bitflags! {
/// Flags of a regular expression.
#[derive(Default)]
pub struct RegExpFlags: u8 {
const GLOBAL = 0b0000_0001;
const IGNORE_CASE = 0b0000_0010;
const MULTILINE = 0b0000_0100;
const DOT_ALL = 0b0000_1000;
const UNICODE = 0b0001_0000;
const STICKY = 0b0010_0000;
}
}

pub(crate) fn parse_regex_flags(s: &str, start: Position) -> Result<RegExpFlags, Error> {
let mut flags = RegExpFlags::default();
for c in s.bytes() {
let new_flag = match c {
b'g' => RegExpFlags::GLOBAL,
b'i' => RegExpFlags::IGNORE_CASE,
b'm' => RegExpFlags::MULTILINE,
b's' => RegExpFlags::DOT_ALL,
b'u' => RegExpFlags::UNICODE,
b'y' => RegExpFlags::STICKY,
_ => {
return Err(Error::syntax(
format!("invalid regular expression flag {}", char::from(c)),
start,
))
}
};

if !flags.contains(new_flag) {
flags.insert(new_flag);
} else {
return Err(Error::syntax(
format!("invalid regular expression flag {}", char::from(c)),
start,
));
}
}
Ok(flags)
}

impl Display for RegExpFlags {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
use fmt::Write;

if self.contains(Self::GLOBAL) {
f.write_char('g')?;
}
if self.contains(Self::IGNORE_CASE) {
f.write_char('i')?;
}
if self.contains(Self::MULTILINE) {
f.write_char('m')?;
}
if self.contains(Self::DOT_ALL) {
f.write_char('s')?;
}
if self.contains(Self::UNICODE) {
f.write_char('u')?;
}
if self.contains(Self::STICKY) {
f.write_char('y')?;
}
Ok(())
}
}

#[cfg(feature = "deser")]
impl Serialize for RegExpFlags {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&self.to_string())
}
}

#[cfg(feature = "deser")]
impl<'de> Deserialize<'de> for RegExpFlags {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
use serde::de::{self, Visitor};

/// Deserializer visitor implementation for `RegExpFlags`.
#[derive(Debug, Clone, Copy)]
struct RegExpFlagsVisitor;

impl<'de> Visitor<'de> for RegExpFlagsVisitor {
type Value = RegExpFlags;

fn expecting(&self, formatter: &mut Formatter<'_>) -> fmt::Result {
formatter.write_str("a string representing JavaScript regular expression flags")
}

fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
parse_regex_flags(value, Position::new(0, 0)).map_err(E::custom)
}

fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_str(&value)
}
}

deserializer.deserialize_str(RegExpFlagsVisitor)
}
}
27 changes: 10 additions & 17 deletions boa/src/syntax/lexer/tests.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
//! Tests for the lexer.
#![allow(clippy::indexing_slicing)]

use super::regex::RegExpFlags;
use super::token::Numeric;
use super::*;
use super::{Error, Position};
use crate::syntax::ast::Keyword;
use crate::syntax::lexer::template::TemplateString;
use super::{
token::Numeric, Cursor, Error, Interner, Lexer, Position, Punctuator, Read, Span, TokenKind,
};
use crate::syntax::{ast::Keyword, lexer::template::TemplateString};
use std::str;

fn span(start: (u32, u32), end: (u32, u32)) -> Span {
Expand Down Expand Up @@ -625,11 +623,9 @@ fn regex_literal() {
let mut lexer = Lexer::new(&b"/(?:)/"[..]);
let mut interner = Interner::new();

let sym = interner.get_or_intern("(?:)");
let expected = [TokenKind::regular_expression_literal(
sym,
RegExpFlags::default(),
)];
let body_sym = interner.get_or_intern("(?:)");
let flags_sym = interner.get_or_intern("");
let expected = [TokenKind::regular_expression_literal(body_sym, flags_sym)];

expect_tokens(&mut lexer, &expected, &mut interner);
}
Expand All @@ -639,13 +635,10 @@ fn regex_literal_flags() {
let mut lexer = Lexer::new(&br"/\/[^\/]*\/*/gmi"[..]);
let mut interner = Interner::new();

let mut flags = RegExpFlags::default();
flags.insert(RegExpFlags::GLOBAL);
flags.insert(RegExpFlags::MULTILINE);
flags.insert(RegExpFlags::IGNORE_CASE);
let body_sym = interner.get_or_intern("\\/[^\\/]*\\/*");
let flags_sym = interner.get_or_intern("gmi");

let sym = interner.get_or_intern("\\/[^\\/]*\\/*");
let expected = [TokenKind::regular_expression_literal(sym, flags)];
let expected = [TokenKind::regular_expression_literal(body_sym, flags_sym)];

expect_tokens(&mut lexer, &expected, &mut interner);
}
Expand Down
15 changes: 5 additions & 10 deletions boa/src/syntax/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
//!
//! [spec]: https://tc39.es/ecma262/#sec-tokens

use super::regex::RegExpFlags;

use crate::{
syntax::ast::{Keyword, Punctuator, Span},
syntax::lexer::template::TemplateString,
Expand Down Expand Up @@ -126,7 +124,7 @@ pub enum TokenKind {
TemplateMiddle(TemplateString),

/// A regular expression, consisting of body and flags.
RegularExpressionLiteral(Sym, RegExpFlags),
RegularExpressionLiteral { body: Sym, flags: Sym },

/// Indicates the end of a line (`\n`).
LineTerminator,
Expand Down Expand Up @@ -207,11 +205,8 @@ impl TokenKind {
}

/// Creates a `RegularExpressionLiteral` token kind.
pub fn regular_expression_literal<R>(body: Sym, flags: R) -> Self
where
R: Into<RegExpFlags>,
{
Self::RegularExpressionLiteral(body, flags.into())
pub fn regular_expression_literal(body: Sym, flags: Sym) -> Self {
Self::RegularExpressionLiteral { body, flags }
}

/// Creates a `LineTerminator` token kind.
Expand Down Expand Up @@ -251,11 +246,11 @@ impl TokenKind {
.resolve(ts.as_raw())
.expect("string disappeared")
.to_owned(),
Self::RegularExpressionLiteral(body, ref flags) => {
Self::RegularExpressionLiteral { body, flags } => {
format!(
"/{}/{}",
interner.resolve(body).expect("string disappeared"),
flags
interner.resolve(flags).expect("string disappeared")
)
}
Self::LineTerminator => "line terminator".to_owned(),
Expand Down
9 changes: 5 additions & 4 deletions boa/src/syntax/parser/expression/primary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,25 +173,26 @@ where
TokenKind::NumericLiteral(Numeric::Integer(num)) => Ok(Const::from(*num).into()),
TokenKind::NumericLiteral(Numeric::Rational(num)) => Ok(Const::from(*num).into()),
TokenKind::NumericLiteral(Numeric::BigInt(num)) => Ok(Const::from(num.clone()).into()),
TokenKind::RegularExpressionLiteral(body, flags) => {
TokenKind::RegularExpressionLiteral { body, flags } => {
Ok(Node::from(New::from(Call::new(
Identifier::from("RegExp"),
vec![
Const::from(interner.resolve(*body).expect("string disappeared")).into(),
Const::from(flags.to_string()).into(),
Const::from(interner.resolve(*flags).expect("string disappeared")).into(),
],
))))
}
TokenKind::Punctuator(Punctuator::Div) => {
let tok = cursor.lex_regex(tok.span().start(), interner)?;

if let TokenKind::RegularExpressionLiteral(body, flags) = tok.kind() {
if let TokenKind::RegularExpressionLiteral { body, flags } = tok.kind() {
Ok(Node::from(New::from(Call::new(
Identifier::from("RegExp"),
vec![
Const::from(interner.resolve(*body).expect("string disappeared"))
.into(),
Const::from(flags.to_string()).into(),
Const::from(interner.resolve(*flags).expect("string disappeared"))
.into(),
],
))))
} else {
Expand Down

0 comments on commit dee5b3e

Please sign in to comment.