Skip to content

Commit

Permalink
challenge(formatter): (also parser) Respect, parse, and format BOM ch…
Browse files Browse the repository at this point in the history
…aracters from source files (#750)
  • Loading branch information
faultyserver committed Nov 17, 2023
1 parent d7bee75 commit 3aa88e8
Show file tree
Hide file tree
Showing 1,026 changed files with 5,266 additions and 3,864 deletions.
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ serde_json = "1.0.96"
smallvec = { version = "1.10.0", features = ["union", "const_new"] }
tracing = { version = "0.1.37", default-features = false, features = ["std"] }
# pinning to version 1.18 to avoid multiple versions of windows-sys as dependency
tokio = { version = "~1.18.5" }
tokio = { version = "~1.18.5" }
unicode-bom = "2.0.3"


[profile.dev.package.biome_wasm]
Expand Down
35 changes: 27 additions & 8 deletions crates/biome_css_factory/src/generated/node_factory.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion crates/biome_css_factory/src/generated/syntax_factory.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/biome_css_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ biome_js_unicode_table = { workspace = true }
biome_parser = { workspace = true }
biome_rowan = { workspace = true }
tracing = { workspace = true }
unicode-bom = { workspace = true }

[dev-dependencies]
biome_test_utils = { path = "../biome_test_utils" }
Expand Down
43 changes: 43 additions & 0 deletions crates/biome_css_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use biome_js_unicode_table::{is_id_continue, is_id_start, lookup_byte, Dispatch:
use biome_parser::diagnostic::ParseDiagnostic;
use biome_parser::lexer::{LexContext, Lexer, LexerCheckpoint, TokenFlags};
use std::char::REPLACEMENT_CHARACTER;
use unicode_bom::Bom;

#[derive(Debug, Copy, Clone, Eq, PartialEq, Default)]
pub enum CssLexContext {
Expand Down Expand Up @@ -41,6 +42,9 @@ pub(crate) struct CssLexer<'src> {
/// `true` if there has been a line break between the last non-trivia token and the next non-trivia token.
after_newline: bool,

/// If the source starts with a Unicode BOM, this is the number of bytes for that token.
unicode_bom_length: usize,

/// Byte offset of the current token from the start of the source
/// The range of the current token can be computed by `self.position - self.current_start`
current_start: TextSize,
Expand Down Expand Up @@ -78,6 +82,7 @@ impl<'src> Lexer<'src> for CssLexer<'src> {
current_flags: self.current_flags,
current_kind: self.current_kind,
after_line_break: self.after_newline,
unicode_bom_length: self.unicode_bom_length,
diagnostics_pos: self.diagnostics.len() as u32,
}
}
Expand Down Expand Up @@ -139,6 +144,7 @@ impl<'src> Lexer<'src> for CssLexer<'src> {
current_flags,
current_kind,
after_line_break,
unicode_bom_length,
diagnostics_pos,
} = checkpoint;

Expand All @@ -149,6 +155,7 @@ impl<'src> Lexer<'src> for CssLexer<'src> {
self.current_start = current_start;
self.current_flags = current_flags;
self.after_newline = after_line_break;
self.unicode_bom_length = unicode_bom_length;
self.diagnostics.truncate(diagnostics_pos as usize);
}

Expand All @@ -167,6 +174,7 @@ impl<'src> CssLexer<'src> {
Self {
source,
after_newline: false,
unicode_bom_length: 0,
current_kind: TOMBSTONE,
current_start: TextSize::from(0),
current_flags: TokenFlags::empty(),
Expand Down Expand Up @@ -259,6 +267,31 @@ impl<'src> CssLexer<'src> {
}
}

/// Check if the source starts with a Unicode BOM character. If it does,
/// consume it and return the UNICODE_BOM token kind.
///
/// ## Safety
/// Must be called at a valid UT8 char boundary (and realistically only at
/// the start position of the source).
fn consume_potential_bom(&mut self) -> Option<CssSyntaxKind> {
// Bom needs at least the first three bytes of the source to know if it
// matches the UTF-8 BOM and not an alternative. This can be expanded
// to more bytes to support other BOM characters if Biome decides to
// support other encodings like UTF-16.
if let Some(first) = self.source().get(0..3) {
let bom = Bom::from(first.as_bytes());
self.unicode_bom_length = bom.len();
self.advance(self.unicode_bom_length);

match bom {
Bom::Null => None,
_ => Some(UNICODE_BOM),
}
} else {
None
}
}

/// Get the UTF8 char which starts at the current byte
///
/// ## Safety
Expand Down Expand Up @@ -451,6 +484,16 @@ impl<'src> CssLexer<'src> {
PIP => self.consume_pipe(),
EQL => self.eat_byte(T![=]),

UNI => {
// A BOM can only appear at the start of a file, so if we haven't advanced at all yet,
// perform the check. At any other position, the BOM is just considered plain whitespace.
if self.position == 0 && self.consume_potential_bom().is_some() {
UNICODE_BOM
} else {
self.eat_unexpected_character()
}
}

_ => self.eat_unexpected_character(),
}
}
Expand Down
1 change: 1 addition & 0 deletions crates/biome_css_parser/src/syntax/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const BODY_RECOVERY_SET: TokenSet<CssSyntaxKind> =

pub(crate) fn parse_root(p: &mut CssParser) {
let m = p.start();
p.eat(UNICODE_BOM);

parse_rule_list(p);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ expression: snapshot
```
CssRoot {
bom_token: missing (optional),
rules: CssRuleList [
CssRule {
prelude: CssSelectorList [
Expand Down Expand Up @@ -46,7 +47,8 @@ CssRoot {
```
0: CSS_ROOT@0..10
0: CSS_RULE_LIST@0..9
0: (empty)
1: CSS_RULE_LIST@0..9
0: CSS_RULE@0..9
0: CSS_SELECTOR_LIST@0..8
0: CSS_COMPOUND_SELECTOR@0..8
Expand All @@ -61,7 +63,7 @@ CssRoot {
0: L_CURLY@8..9 "{" [] []
1: CSS_DECLARATION_LIST@9..9
2: (empty)
1: EOF@9..10 "" [Newline("\n")] []
2: EOF@9..10 "" [Newline("\n")] []
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ expression: snapshot

```
CssRoot {
bom_token: missing (optional),
rules: CssRuleList [
CssRule {
prelude: CssSelectorList [
Expand Down Expand Up @@ -150,7 +151,8 @@ CssRoot {

```
0: CSS_ROOT@0..66
0: CSS_RULE_LIST@0..65
0: (empty)
1: CSS_RULE_LIST@0..65
0: CSS_RULE@0..11
0: CSS_SELECTOR_LIST@0..9
0: CSS_COMPOUND_SELECTOR@0..9
Expand Down Expand Up @@ -225,7 +227,7 @@ CssRoot {
0: CSS_STRING_LITERAL@58..64 "\"foo\"" [] [Whitespace(" ")]
2: S_KW@64..65 "s" [] []
3: (empty)
1: EOF@65..66 "" [Newline("\n")] []
2: EOF@65..66 "" [Newline("\n")] []
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ expression: snapshot

```
CssRoot {
bom_token: missing (optional),
rules: CssRuleList [
CssRule {
prelude: CssSelectorList [
Expand Down Expand Up @@ -50,7 +51,8 @@ CssRoot {

```
0: CSS_ROOT@0..12
0: CSS_RULE_LIST@0..11
0: (empty)
1: CSS_RULE_LIST@0..11
0: CSS_RULE@0..11
0: CSS_SELECTOR_LIST@0..9
0: CSS_COMPOUND_SELECTOR@0..9
Expand All @@ -68,7 +70,7 @@ CssRoot {
0: L_CURLY@9..10 "{" [] []
1: CSS_DECLARATION_LIST@10..10
2: R_CURLY@10..11 "}" [] []
1: EOF@11..12 "" [Newline("\n")] []
2: EOF@11..12 "" [Newline("\n")] []
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ expression: snapshot

```
CssRoot {
bom_token: missing (optional),
rules: CssRuleList [
CssRule {
prelude: CssSelectorList [
Expand Down Expand Up @@ -50,7 +51,8 @@ CssRoot {

```
0: CSS_ROOT@0..12
0: CSS_RULE_LIST@0..11
0: (empty)
1: CSS_RULE_LIST@0..11
0: CSS_RULE@0..11
0: CSS_SELECTOR_LIST@0..9
0: CSS_COMPOUND_SELECTOR@0..9
Expand All @@ -68,7 +70,7 @@ CssRoot {
0: L_CURLY@9..10 "{" [] []
1: CSS_DECLARATION_LIST@10..10
2: R_CURLY@10..11 "}" [] []
1: EOF@11..12 "" [Newline("\n")] []
2: EOF@11..12 "" [Newline("\n")] []
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ expression: snapshot

```
CssRoot {
bom_token: missing (optional),
rules: CssRuleList [
CssRule {
prelude: CssSelectorList [
Expand Down Expand Up @@ -54,7 +55,8 @@ CssRoot {

```
0: CSS_ROOT@0..16
0: CSS_RULE_LIST@0..15
0: (empty)
1: CSS_RULE_LIST@0..15
0: CSS_RULE@0..15
0: CSS_SELECTOR_LIST@0..11
0: CSS_BOGUS_SELECTOR@0..1
Expand All @@ -72,7 +74,7 @@ CssRoot {
0: L_CURLY@11..12 "{" [] []
1: CSS_DECLARATION_LIST@12..12
2: R_CURLY@12..15 "}" [Newline("\n"), Newline("\n")] []
1: EOF@15..16 "" [Newline("\n")] []
2: EOF@15..16 "" [Newline("\n")] []
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ expression: snapshot

```
CssRoot {
bom_token: missing (optional),
rules: CssRuleList [
CssRule {
prelude: CssSelectorList [
Expand Down Expand Up @@ -55,7 +56,8 @@ CssRoot {

```
0: CSS_ROOT@0..9
0: CSS_RULE_LIST@0..8
0: (empty)
1: CSS_RULE_LIST@0..8
0: CSS_RULE@0..8
0: CSS_SELECTOR_LIST@0..6
0: CSS_COMPOUND_SELECTOR@0..2
Expand All @@ -77,7 +79,7 @@ CssRoot {
0: L_CURLY@6..7 "{" [] []
1: CSS_DECLARATION_LIST@7..7
2: R_CURLY@7..8 "}" [] []
1: EOF@8..9 "" [Newline("\n")] []
2: EOF@8..9 "" [Newline("\n")] []
```
Expand Down

0 comments on commit 3aa88e8

Please sign in to comment.