From 632a34754ed3aa66e046d706302b75ee6d4105d8 Mon Sep 17 00:00:00 2001
From: Micha Reiser <micha@reiser.io>
Date: Wed, 7 Feb 2024 16:50:40 -0500
Subject: [PATCH] Box `LexicalError` to reduce Result size

---
 crates/ruff_python_parser/src/lexer.rs        | 228 +++++++++---------
 ...exer__tests__invalid_leading_zero_big.snap |  14 +-
 ...er__tests__invalid_leading_zero_small.snap |  14 +-
 ...ser__lexer__tests__tet_too_low_dedent.snap |  10 +-
 crates/ruff_python_parser/src/token.rs        |  11 +-
 5 files changed, 143 insertions(+), 134 deletions(-)

diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs
index ef454455ce10b2..4fd9df80d2ad4e 100644
--- a/crates/ruff_python_parser/src/lexer.rs
+++ b/crates/ruff_python_parser/src/lexer.rs
@@ -107,10 +107,10 @@ where
     fn next(&mut self) -> Option<Self::Item> {
         let result = match self.inner.next()? {
             Ok((tok, range)) => Ok((tok, range + self.start_offset)),
-            Err(error) => Err(LexicalError {
-                location: error.location + self.start_offset,
-                ..error
-            }),
+            Err(error) => {
+                let location = error.location() + self.start_offset;
+                Err(LexicalError::new(error.into_error(), location))
+            }
         };
 
         Some(result)
@@ -284,10 +284,10 @@ impl<'source> Lexer<'source> {
         let value = match Int::from_str_radix(number.as_str(), radix.as_u32(), token) {
             Ok(int) => int,
             Err(err) => {
-                return Err(LexicalError {
-                    error: LexicalErrorType::OtherError(format!("{err:?}")),
-                    location: self.token_range().start(),
-                });
+                return Err(LexicalError::new(
+                    LexicalErrorType::OtherError(format!("{err:?}")),
+                    self.token_range().start(),
+                ));
             }
         };
         Ok(Tok::Int { value })
@@ -309,10 +309,10 @@ impl<'source> Lexer<'source> {
             number.push('.');
 
             if self.cursor.eat_char('_') {
-                return Err(LexicalError {
-                    error: LexicalErrorType::OtherError("Invalid Syntax".to_owned()),
-                    location: self.offset() - TextSize::new(1),
-                });
+                return Err(LexicalError::new(
+                    LexicalErrorType::OtherError("Invalid Syntax".to_owned()),
+                    self.offset() - TextSize::new(1),
+                ));
             }
 
             self.radix_run(&mut number, Radix::Decimal);
@@ -340,9 +340,11 @@ impl<'source> Lexer<'source> {
 
         if is_float {
             // Improvement: Use `Cow` instead of pushing to value text
-            let value = f64::from_str(number.as_str()).map_err(|_| LexicalError {
-                error: LexicalErrorType::OtherError("Invalid decimal literal".to_owned()),
-                location: self.token_start(),
+            let value = f64::from_str(number.as_str()).map_err(|_| {
+                LexicalError::new(
+                    LexicalErrorType::OtherError("Invalid decimal literal".to_owned()),
+                    self.token_start(),
+                )
             })?;
 
             // Parse trailing 'j':
@@ -364,18 +366,18 @@ impl<'source> Lexer<'source> {
                     Ok(value) => {
                         if start_is_zero && value.as_u8() != Some(0) {
                             // Leading zeros in decimal integer literals are not permitted.
-                            return Err(LexicalError {
-                                error: LexicalErrorType::OtherError("Invalid Token".to_owned()),
-                                location: self.token_range().start(),
-                            });
+                            return Err(LexicalError::new(
+                                LexicalErrorType::OtherError("Invalid Token".to_owned()),
+                                self.token_range().start(),
+                            ));
                         }
                         value
                     }
                     Err(err) => {
-                        return Err(LexicalError {
-                            error: LexicalErrorType::OtherError(format!("{err:?}")),
-                            location: self.token_range().start(),
-                        })
+                        return Err(LexicalError::new(
+                            LexicalErrorType::OtherError(format!("{err:?}")),
+                            self.token_range().start(),
+                        ))
                     }
                 };
                 Ok(Tok::Int { value })
@@ -584,10 +586,10 @@ impl<'source> Lexer<'source> {
                     } else {
                         FStringErrorType::UnterminatedString
                     };
-                    return Err(LexicalError {
-                        error: LexicalErrorType::FStringError(error),
-                        location: self.offset(),
-                    });
+                    return Err(LexicalError::new(
+                        LexicalErrorType::FStringError(error),
+                        self.offset(),
+                    ));
                 }
                 '\n' | '\r' if !fstring.is_triple_quoted() => {
                     // If we encounter a newline while we're in a format spec, then
@@ -597,10 +599,10 @@ impl<'source> Lexer<'source> {
                     if in_format_spec {
                         break;
                     }
-                    return Err(LexicalError {
-                        error: LexicalErrorType::FStringError(FStringErrorType::UnterminatedString),
-                        location: self.offset(),
-                    });
+                    return Err(LexicalError::new(
+                        LexicalErrorType::FStringError(FStringErrorType::UnterminatedString),
+                        self.offset(),
+                    ));
                 }
                 '\\' => {
                     self.cursor.bump(); // '\'
@@ -705,20 +707,18 @@ impl<'source> Lexer<'source> {
                         // matches with f-strings quotes and if it is, then this must be a
                         // missing '}' token so raise the proper error.
                         if fstring.quote_char() == quote && !fstring.is_triple_quoted() {
-                            return Err(LexicalError {
-                                error: LexicalErrorType::FStringError(
-                                    FStringErrorType::UnclosedLbrace,
-                                ),
-                                location: self.offset() - TextSize::new(1),
-                            });
+                            return Err(LexicalError::new(
+                                LexicalErrorType::FStringError(FStringErrorType::UnclosedLbrace),
+                                self.offset() - TextSize::new(1),
+                            ));
                         }
                     }
-                    return Err(LexicalError {
-                        error: LexicalErrorType::OtherError(
+                    return Err(LexicalError::new(
+                        LexicalErrorType::OtherError(
                             "EOL while scanning string literal".to_owned(),
                         ),
-                        location: self.offset() - TextSize::new(1),
-                    });
+                        self.offset() - TextSize::new(1),
+                    ));
                 }
                 Some(c) if c == quote => {
                     if triple_quoted {
@@ -739,22 +739,20 @@ impl<'source> Lexer<'source> {
                         if fstring.quote_char() == quote
                             && fstring.is_triple_quoted() == triple_quoted
                         {
-                            return Err(LexicalError {
-                                error: LexicalErrorType::FStringError(
-                                    FStringErrorType::UnclosedLbrace,
-                                ),
-                                location: self.offset(),
-                            });
+                            return Err(LexicalError::new(
+                                LexicalErrorType::FStringError(FStringErrorType::UnclosedLbrace),
+                                self.offset(),
+                            ));
                         }
                     }
-                    return Err(LexicalError {
-                        error: if triple_quoted {
+                    return Err(LexicalError::new(
+                        if triple_quoted {
                             LexicalErrorType::Eof
                         } else {
                             LexicalErrorType::StringError
                         },
-                        location: self.offset(),
-                    });
+                        self.offset(),
+                    ));
                 }
             }
         };
@@ -829,10 +827,10 @@ impl<'source> Lexer<'source> {
 
                 Ok((identifier, self.token_range()))
             } else {
-                Err(LexicalError {
-                    error: LexicalErrorType::UnrecognizedToken { tok: c },
-                    location: self.token_start(),
-                })
+                Err(LexicalError::new(
+                    LexicalErrorType::UnrecognizedToken { tok: c },
+                    self.token_start(),
+                ))
             }
         } else {
             // Reached the end of the file. Emit a trailing newline token if not at the beginning of a logical line,
@@ -855,15 +853,12 @@ impl<'source> Lexer<'source> {
                     if self.cursor.eat_char('\r') {
                         self.cursor.eat_char('\n');
                     } else if self.cursor.is_eof() {
-                        return Err(LexicalError {
-                            error: LexicalErrorType::Eof,
-                            location: self.token_start(),
-                        });
+                        return Err(LexicalError::new(LexicalErrorType::Eof, self.token_start()));
                     } else if !self.cursor.eat_char('\n') {
-                        return Err(LexicalError {
-                            error: LexicalErrorType::LineContinuationError,
-                            location: self.token_start(),
-                        });
+                        return Err(LexicalError::new(
+                            LexicalErrorType::LineContinuationError,
+                            self.token_start(),
+                        ));
                     }
                 }
                 // Form feed
@@ -896,15 +891,12 @@ impl<'source> Lexer<'source> {
                     if self.cursor.eat_char('\r') {
                         self.cursor.eat_char('\n');
                     } else if self.cursor.is_eof() {
-                        return Err(LexicalError {
-                            error: LexicalErrorType::Eof,
-                            location: self.token_start(),
-                        });
+                        return Err(LexicalError::new(LexicalErrorType::Eof, self.token_start()));
                     } else if !self.cursor.eat_char('\n') {
-                        return Err(LexicalError {
-                            error: LexicalErrorType::LineContinuationError,
-                            location: self.token_start(),
-                        });
+                        return Err(LexicalError::new(
+                            LexicalErrorType::LineContinuationError,
+                            self.token_start(),
+                        ));
                     }
                     indentation = Indentation::root();
                 }
@@ -955,10 +947,10 @@ impl<'source> Lexer<'source> {
                 Some((Tok::Indent, self.token_range()))
             }
             Err(_) => {
-                return Err(LexicalError {
-                    error: LexicalErrorType::IndentationError,
-                    location: self.offset(),
-                });
+                return Err(LexicalError::new(
+                    LexicalErrorType::IndentationError,
+                    self.offset(),
+                ));
             }
         };
 
@@ -971,10 +963,7 @@ impl<'source> Lexer<'source> {
         if self.nesting > 0 {
             // Reset the nesting to avoid going into infinite loop.
             self.nesting = 0;
-            return Err(LexicalError {
-                error: LexicalErrorType::Eof,
-                location: self.offset(),
-            });
+            return Err(LexicalError::new(LexicalErrorType::Eof, self.offset()));
         }
 
         // Next, insert a trailing newline, if required.
@@ -1139,10 +1128,10 @@ impl<'source> Lexer<'source> {
             '}' => {
                 if let Some(fstring) = self.fstrings.current_mut() {
                     if fstring.nesting() == self.nesting {
-                        return Err(LexicalError {
-                            error: LexicalErrorType::FStringError(FStringErrorType::SingleRbrace),
-                            location: self.token_start(),
-                        });
+                        return Err(LexicalError::new(
+                            LexicalErrorType::FStringError(FStringErrorType::SingleRbrace),
+                            self.token_start(),
+                        ));
                     }
                     fstring.try_end_format_spec(self.nesting);
                 }
@@ -1233,10 +1222,10 @@ impl<'source> Lexer<'source> {
             _ => {
                 self.state = State::Other;
 
-                return Err(LexicalError {
-                    error: LexicalErrorType::UnrecognizedToken { tok: c },
-                    location: self.token_start(),
-                });
+                return Err(LexicalError::new(
+                    LexicalErrorType::UnrecognizedToken { tok: c },
+                    self.token_start(),
+                ));
             }
         };
 
@@ -1295,43 +1284,46 @@ impl FusedIterator for Lexer<'_> {}
 ///
 /// [lexer]: crate::lexer
 #[derive(Debug, Clone, PartialEq)]
-pub struct LexicalError {
-    /// The type of error that occurred.
-    error: LexicalErrorType,
-    /// The location of the error.
-    location: TextSize,
-}
+pub struct LexicalError(Box<LexicalErrorInner>);
 
 impl LexicalError {
     /// Creates a new `LexicalError` with the given error type and location.
     pub fn new(error: LexicalErrorType, location: TextSize) -> Self {
-        Self { error, location }
+        Self(Box::new(LexicalErrorInner { error, location }))
     }
 
     pub fn error(&self) -> &LexicalErrorType {
-        &self.error
+        &self.0.error
     }
 
     pub fn into_error(self) -> LexicalErrorType {
-        self.error
+        self.0.error
     }
 
     pub fn location(&self) -> TextSize {
-        self.location
+        self.0.location
     }
 }
 
+#[derive(Debug, Clone, PartialEq)]
+struct LexicalErrorInner {
+    /// The type of error that occurred.
+    error: LexicalErrorType,
+    /// The location of the error.
+    location: TextSize,
+}
+
 impl std::ops::Deref for LexicalError {
     type Target = LexicalErrorType;
 
     fn deref(&self) -> &Self::Target {
-        &self.error
+        self.error()
     }
 }
 
 impl std::error::Error for LexicalError {
     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
-        Some(&self.error)
+        Some(self.error())
     }
 }
 
@@ -1340,8 +1332,8 @@ impl std::fmt::Display for LexicalError {
         write!(
             f,
             "{} at byte offset {}",
-            &self.error,
-            u32::from(self.location)
+            self.error(),
+            u32::from(self.location())
         )
     }
 }
@@ -2005,8 +1997,8 @@ def f(arg=%timeit a = b):
         match lexed.as_slice() {
             [Err(error)] => {
                 assert_eq!(
-                    error.error,
-                    LexicalErrorType::UnrecognizedToken { tok: '🐦' }
+                    error.error(),
+                    &LexicalErrorType::UnrecognizedToken { tok: '🐦' }
                 );
             }
             result => panic!("Expected an error token but found {result:?}"),
@@ -2219,7 +2211,7 @@ f"{(lambda x:{x})}"
     }
 
     fn lex_fstring_error(source: &str) -> FStringErrorType {
-        match lex_error(source).error {
+        match lex_error(source).into_error() {
             LexicalErrorType::FStringError(error) => error,
             err => panic!("Expected FStringError: {err:?}"),
         }
@@ -2270,21 +2262,25 @@ f"{(lambda x:{x})}"
     #[test]
     fn test_fstring_error_location() {
         assert_debug_snapshot!(lex_error("f'{'"), @r###"
-        LexicalError {
-            error: FStringError(
-                UnclosedLbrace,
-            ),
-            location: 4,
-        }
+        LexicalError(
+            LexicalErrorInner {
+                error: FStringError(
+                    UnclosedLbrace,
+                ),
+                location: 4,
+            },
+        )
         "###);
 
         assert_debug_snapshot!(lex_error("f'{'α"), @r###"
-        LexicalError {
-            error: FStringError(
-                UnclosedLbrace,
-            ),
-            location: 6,
-        }
+        LexicalError(
+            LexicalErrorInner {
+                error: FStringError(
+                    UnclosedLbrace,
+                ),
+                location: 6,
+            },
+        )
         "###);
     }
 }
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_big.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_big.snap
index c2906398a51415..449450d4f65943 100644
--- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_big.snap
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_big.snap
@@ -3,10 +3,12 @@ source: crates/ruff_python_parser/src/lexer.rs
 expression: tokens
 ---
 Err(
-    LexicalError {
-        error: OtherError(
-            "Invalid Token",
-        ),
-        location: 0,
-    },
+    LexicalError(
+        LexicalErrorInner {
+            error: OtherError(
+                "Invalid Token",
+            ),
+            location: 0,
+        },
+    ),
 )
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_small.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_small.snap
index c2906398a51415..449450d4f65943 100644
--- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_small.snap
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_small.snap
@@ -3,10 +3,12 @@ source: crates/ruff_python_parser/src/lexer.rs
 expression: tokens
 ---
 Err(
-    LexicalError {
-        error: OtherError(
-            "Invalid Token",
-        ),
-        location: 0,
-    },
+    LexicalError(
+        LexicalErrorInner {
+            error: OtherError(
+                "Invalid Token",
+            ),
+            location: 0,
+        },
+    ),
 )
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap
index 8a9ba410ae92da..55d67c5867f392 100644
--- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap
@@ -46,10 +46,12 @@ expression: tokens
         ),
     ),
     Err(
-        LexicalError {
-            error: IndentationError,
-            location: 20,
-        },
+        LexicalError(
+            LexicalErrorInner {
+                error: IndentationError,
+                location: 20,
+            },
+        ),
     ),
     Ok(
         (
diff --git a/crates/ruff_python_parser/src/token.rs b/crates/ruff_python_parser/src/token.rs
index 07843bf1713b1c..9472eb1ed20f09 100644
--- a/crates/ruff_python_parser/src/token.rs
+++ b/crates/ruff_python_parser/src/token.rs
@@ -8,7 +8,6 @@ use crate::Mode;
 
 use ruff_python_ast::{Int, IpyEscapeKind};
 use ruff_text_size::TextSize;
-use static_assertions::assert_eq_size;
 use std::fmt;
 
 /// The set of tokens the Python source code can be tokenized in.
@@ -914,4 +913,12 @@ impl From<&Tok> for TokenKind {
     }
 }
 
-assert_eq_size!(Tok, [u8; 32]);
+#[cfg(target_pointer_width = "64")]
+mod sizes {
+    use crate::lexer::LexicalError;
+    use crate::Tok;
+    use static_assertions::assert_eq_size;
+
+    assert_eq_size!(Tok, [u8; 32]);
+    assert_eq_size!(Result<Tok, LexicalError>, [u8; 32]);
+}