Skip to content

Commit 2080cc2

Browse files
committed
Refactor common::str::repr using common::escape
1 parent 9cc8f2d commit 2080cc2

File tree

10 files changed

+292
-203
lines changed

10 files changed

+292
-203
lines changed

common/src/bytes.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
use crate::str::{Quote, ReprOverflowError};
1+
use crate::escape::Quote;
2+
use crate::str::ReprOverflowError;
23

34
pub fn repr(b: &[u8]) -> Result<String, ReprOverflowError> {
45
repr_with(b, &[], "", Quote::Single)
@@ -37,7 +38,7 @@ pub fn repr_with(
3738
out_len = out_len.checked_add(incr).ok_or(ReprOverflowError)?;
3839
}
3940

40-
let (quote, num_escaped_quotes) = crate::str::choose_quotes_for_repr(squote, dquote, quote);
41+
let (quote, num_escaped_quotes) = crate::escape::choose_quote(squote, dquote, quote);
4142
// we'll be adding backslashes in front of the existing inner quotes
4243
out_len += num_escaped_quotes;
4344

@@ -47,7 +48,7 @@ pub fn repr_with(
4748
let mut res = String::with_capacity(out_len);
4849
res.extend(prefixes.iter().copied());
4950
res.push('b');
50-
res.push(quote);
51+
res.push(quote.to_char());
5152
for &ch in b {
5253
match ch {
5354
b'\t' => res.push_str("\\t"),
@@ -56,15 +57,15 @@ pub fn repr_with(
5657
// printable ascii range
5758
0x20..=0x7e => {
5859
let ch = ch as char;
59-
if ch == quote || ch == '\\' {
60+
if ch == quote.to_char() || ch == '\\' {
6061
res.push('\\');
6162
}
6263
res.push(ch);
6364
}
6465
_ => write!(res, "\\x{ch:02x}").unwrap(),
6566
}
6667
}
67-
res.push(quote);
68+
res.push(quote.to_char());
6869
res.push_str(suffix);
6970

7071
Ok(res)

common/src/escape.rs

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
#[derive(Debug, Clone, Copy)]
2+
pub enum Quote {
3+
Single,
4+
Double,
5+
}
6+
7+
impl Quote {
8+
#[inline]
9+
pub const fn swap(self) -> Quote {
10+
match self {
11+
Quote::Single => Quote::Double,
12+
Quote::Double => Quote::Single,
13+
}
14+
}
15+
16+
#[inline]
17+
pub const fn to_byte(&self) -> u8 {
18+
match self {
19+
Quote::Single => b'\'',
20+
Quote::Double => b'"',
21+
}
22+
}
23+
24+
#[inline]
25+
pub const fn to_char(&self) -> char {
26+
match self {
27+
Quote::Single => '\'',
28+
Quote::Double => '"',
29+
}
30+
}
31+
}
32+
33+
pub struct EscapeLayout {
34+
pub quote: Quote,
35+
pub len: Option<usize>,
36+
}
37+
38+
pub trait Escape {
39+
type Source: ?Sized;
40+
41+
fn source_len(&self) -> usize;
42+
fn layout(&self) -> &EscapeLayout;
43+
fn changed(&self) -> bool {
44+
self.layout().len != Some(self.source_len())
45+
}
46+
47+
fn output_layout_with_checker(
48+
source: &Self::Source,
49+
preferred_quote: Quote,
50+
reserved_len: usize,
51+
length_add: impl Fn(usize, usize) -> Option<usize>,
52+
) -> EscapeLayout;
53+
// fn output_layout(source: &Self::Source, preferred_quote: Quote) -> EscapeLayout {
54+
// Self::output_layout_with_checker(source, preferred_quote, 2, |a, b| a.checked_add(b))
55+
// }
56+
fn output_layout(source: &Self::Source, preferred_quote: Quote) -> EscapeLayout {
57+
Self::output_layout_with_checker(source, preferred_quote, 2, |a, b| {
58+
Some((a as isize).checked_add(b as isize)? as usize)
59+
})
60+
}
61+
62+
fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result;
63+
fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result;
64+
fn write_body(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
65+
if self.changed() {
66+
self.write_body_slow(formatter)
67+
} else {
68+
self.write_source(formatter)
69+
}
70+
}
71+
fn write_quoted(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
72+
let quote = self.layout().quote.to_char();
73+
formatter.write_char(quote)?;
74+
self.write_body(formatter)?;
75+
formatter.write_char(quote)
76+
}
77+
fn to_quoted_string(&self) -> Option<String> {
78+
let len = self.layout().len?.checked_add(2)?;
79+
let mut s = String::with_capacity(len);
80+
self.write_quoted(&mut s).unwrap();
81+
Some(s)
82+
}
83+
}
84+
85+
/// Returns the outer quotes to use and the number of quotes that need to be
86+
/// escaped.
87+
pub(crate) const fn choose_quote(
88+
single_count: usize,
89+
double_count: usize,
90+
preferred_quote: Quote,
91+
) -> (Quote, usize) {
92+
let (primary_count, secondary_count) = match preferred_quote {
93+
Quote::Single => (single_count, double_count),
94+
Quote::Double => (double_count, single_count),
95+
};
96+
97+
// always use primary unless we have primary but no seconday
98+
let use_secondary = primary_count > 0 && secondary_count == 0;
99+
if use_secondary {
100+
(preferred_quote.swap(), secondary_count)
101+
} else {
102+
(preferred_quote, primary_count)
103+
}
104+
}
105+
106+
pub struct UnicodeEscape<'a> {
107+
source: &'a str,
108+
layout: EscapeLayout,
109+
}
110+
111+
impl<'a> UnicodeEscape<'a> {
112+
pub fn with_forced_quote(source: &'a str, quote: Quote) -> Self {
113+
let layout = EscapeLayout { quote, len: None };
114+
Self { source, layout }
115+
}
116+
pub fn new_repr(source: &'a str) -> Self {
117+
let layout = Self::output_layout(source, Quote::Single);
118+
Self { source, layout }
119+
}
120+
pub fn repr<'r>(&'a self) -> UnicodeRepr<'r, 'a> {
121+
UnicodeRepr(self)
122+
}
123+
}
124+
125+
pub struct UnicodeRepr<'r, 'a>(&'r UnicodeEscape<'a>);
126+
127+
impl std::fmt::Display for UnicodeRepr<'_, '_> {
128+
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
129+
self.0.write_quoted(formatter)
130+
}
131+
}
132+
133+
impl UnicodeEscape<'_> {
134+
fn escaped_char_len(ch: char) -> usize {
135+
match ch {
136+
'\\' | '\t' | '\r' | '\n' => 2,
137+
ch if ch < ' ' || ch as u32 == 0x7f => 4, // \xHH
138+
ch if ch.is_ascii() => 1,
139+
ch if crate::char::is_printable(ch) => {
140+
// max = std::cmp::max(ch, max);
141+
ch.len_utf8()
142+
}
143+
ch if (ch as u32) < 0x100 => 4, // \xHH
144+
ch if (ch as u32) < 0x10000 => 6, // \uHHHH
145+
_ => 10, // \uHHHHHHHH
146+
}
147+
}
148+
149+
fn write_char(
150+
ch: char,
151+
quote: Quote,
152+
formatter: &mut impl std::fmt::Write,
153+
) -> std::fmt::Result {
154+
match ch {
155+
'\n' => formatter.write_str("\\n"),
156+
'\t' => formatter.write_str("\\t"),
157+
'\r' => formatter.write_str("\\r"),
158+
// these 2 branches *would* be handled below, but we shouldn't have to do a
159+
// unicodedata lookup just for ascii characters
160+
'\x20'..='\x7e' => {
161+
// printable ascii range
162+
if ch == quote.to_char() || ch == '\\' {
163+
formatter.write_char('\\')?;
164+
}
165+
formatter.write_char(ch)
166+
}
167+
ch if ch.is_ascii() => {
168+
write!(formatter, "\\x{:02x}", ch as u8)
169+
}
170+
ch if crate::char::is_printable(ch) => formatter.write_char(ch),
171+
'\0'..='\u{ff}' => {
172+
write!(formatter, "\\x{:02x}", ch as u32)
173+
}
174+
'\0'..='\u{ffff}' => {
175+
write!(formatter, "\\u{:04x}", ch as u32)
176+
}
177+
_ => {
178+
write!(formatter, "\\U{:08x}", ch as u32)
179+
}
180+
}
181+
}
182+
}
183+
184+
impl<'a> Escape for UnicodeEscape<'a> {
185+
type Source = str;
186+
187+
fn source_len(&self) -> usize {
188+
self.source.len()
189+
}
190+
191+
fn layout(&self) -> &EscapeLayout {
192+
&self.layout
193+
}
194+
195+
fn output_layout_with_checker(
196+
source: &str,
197+
preferred_quote: Quote,
198+
reserved_len: usize,
199+
length_add: impl Fn(usize, usize) -> Option<usize>,
200+
) -> EscapeLayout {
201+
let mut out_len = reserved_len;
202+
let mut single_count = 0;
203+
let mut double_count = 0;
204+
205+
for ch in source.chars() {
206+
let incr = match ch {
207+
'\'' => {
208+
single_count += 1;
209+
1
210+
}
211+
'"' => {
212+
double_count += 1;
213+
1
214+
}
215+
c => Self::escaped_char_len(c),
216+
};
217+
let Some(new_len) = length_add(out_len, incr) else {
218+
#[cold]
219+
fn stop(single_count: usize, double_count: usize, preferred_quote: Quote) -> EscapeLayout {
220+
EscapeLayout { quote: choose_quote(single_count, double_count, preferred_quote).0, len: None }
221+
}
222+
return stop(single_count, double_count, preferred_quote);
223+
};
224+
out_len = new_len;
225+
}
226+
227+
let (quote, num_escaped_quotes) = choose_quote(single_count, double_count, preferred_quote);
228+
// we'll be adding backslashes in front of the existing inner quotes
229+
let Some(out_len) = length_add(out_len, num_escaped_quotes) else {
230+
return EscapeLayout { quote, len: None };
231+
};
232+
233+
EscapeLayout {
234+
quote,
235+
len: Some(out_len - reserved_len),
236+
}
237+
}
238+
239+
fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
240+
formatter.write_str(self.source)
241+
}
242+
243+
#[cold]
244+
fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
245+
for ch in self.source.chars() {
246+
Self::write_char(ch, self.layout().quote, formatter)?;
247+
}
248+
Ok(())
249+
}
250+
}
251+
252+
#[cfg(test)]
253+
mod unicode_escapse_tests {
254+
use super::*;
255+
256+
#[test]
257+
fn changed() {
258+
fn test(s: &str) -> bool {
259+
UnicodeEscape::new_repr(s).changed()
260+
}
261+
assert!(!test("hello"));
262+
assert!(!test("'hello'"));
263+
assert!(!test("\"hello\""));
264+
265+
assert!(test("'\"hello"));
266+
assert!(test("hello\n"));
267+
}
268+
}

common/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ pub mod cmp;
1414
#[cfg(any(unix, windows, target_os = "wasi"))]
1515
pub mod crt_fd;
1616
pub mod encodings;
17+
pub mod escape;
1718
pub mod float_ops;
1819
pub mod format;
1920
pub mod hash;

0 commit comments

Comments
 (0)