From 9ce488737baa3666e8a3b601f3581a2560197684 Mon Sep 17 00:00:00 2001 From: Yikai Zhao Date: Sun, 7 Jan 2024 22:34:35 +0800 Subject: [PATCH] sligtly reduce bytecode size using smaller oct escape --- src/bytecode.rs | 17 +++++++++++++++-- tests/bytecode_test.rs | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/bytecode.rs b/src/bytecode.rs index 98f3b44..d674381 100644 --- a/src/bytecode.rs +++ b/src/bytecode.rs @@ -63,7 +63,9 @@ impl LispObject { let mut result = String::new(); result.reserve(vec.len() * 4 + 2); result.push('"'); + let mut last_oct_escape_not_full = false; for c in vec { + let mut oct_escape_not_full = false; match *c { 7 => result += "\\a", 8 => result += "\\b", @@ -78,10 +80,21 @@ impl LispObject { result += &format!("\\^{}", (*c as u32 + 64) as u8 as char); }, 27..=31 | 128..=255 | 34 | 92 => { // oct, for unprintable and '"' and '\\' - result += &format!("\\{:03o}", *c as u32); + let oct_s = format!("\\{:o}", *c as u32); + if oct_s.len() < 4 { + oct_escape_not_full = true; + } + result += &oct_s; + }, + _ => { // printable + // https://www.gnu.org/software/emacs/manual/html_node/elisp/Non_002dASCII-in-St + if last_oct_escape_not_full && ('0'..='7').contains(&(*c as char)) { + result += "\\ "; + } + result.push(*c as char); }, - _ => result.push(*c as char), // printable } + last_oct_escape_not_full = oct_escape_not_full; } result.push('"'); result diff --git a/tests/bytecode_test.rs b/tests/bytecode_test.rs index dad11ac..e2da5f3 100644 --- a/tests/bytecode_test.rs +++ b/tests/bytecode_test.rs @@ -50,7 +50,7 @@ fn run_one_test(json_str: &str, object_type: bytecode::ObjectType) -> Result<()> #[test] fn test_bytecode() { // unicode test - run_one_test(r#"{"a":"ÀÁÂÃÄÅÆÇÈÉÊËÌ abcd \n 你好世界"}"#, bytecode::ObjectType::Plist).unwrap(); + run_one_test(r#"{"a":"ÀÁÂÃÄÅÆÇÈÉÊËÌ\u0000 abcd \n 你好世界"}"#, bytecode::ObjectType::Plist).unwrap(); for object_type in vec![bytecode::ObjectType::Plist, bytecode::ObjectType::Alist,