From 68d235be5fc28fe985f21488765bff9d1b7d98cb Mon Sep 17 00:00:00 2001 From: iequidoo Date: Sun, 30 Jul 2023 02:05:49 -0300 Subject: [PATCH] feat: Remove original file stem from filenames in the blobstorage (#4309) This way filenames in the blobstorage are just random hex numbers. This also allows us to get rid of the `sanitize-filename` dependency. This also requires `Param::Filename` to be set to "debug_logging*.xdc" for messages containing logging webxdc-s, otherwise they are not detected properly. This is done in "fix: Message::set_file_from_bytes(): Set Param::Filename", so don't forget to update senders as well. --- Cargo.lock | 1 - Cargo.toml | 1 - node/test/test.mjs | 1 - python/tests/test_1_online.py | 3 - python/tests/test_2_increation.py | 1 - src/blob.rs | 123 ++++++++---------------------- src/chat.rs | 2 +- src/debug_logging.rs | 19 ++--- src/mimeparser.rs | 2 +- src/param.rs | 2 - src/receive_imf.rs | 4 +- src/receive_imf/tests.rs | 2 - 12 files changed, 43 insertions(+), 118 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8e642c5b22..1324b5642f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1144,7 +1144,6 @@ dependencies = [ "reqwest", "rusqlite", "rust-hsluv", - "sanitize-filename", "serde", "serde_json", "sha-1", diff --git a/Cargo.toml b/Cargo.toml index d22a496301..53b8216a05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,7 +80,6 @@ regex = "1.10" reqwest = { version = "0.11.24", features = ["json"] } rusqlite = { version = "0.31", features = ["sqlcipher"] } rust-hsluv = "0.1" -sanitize-filename = "0.5" serde_json = "1" serde = { version = "1.0", features = ["derive"] } sha-1 = "0.10" diff --git a/node/test/test.mjs b/node/test/test.mjs index 8ead548ff0..6ab4268000 100644 --- a/node/test/test.mjs +++ b/node/test/test.mjs @@ -442,7 +442,6 @@ describe('Offline Tests with unconfigured account', function () { context.setChatProfileImage(chatId, imagePath) const blobPath = context.getChat(chatId).getProfileImage() expect(blobPath.startsWith(blobs)).to.be.true - expect(blobPath.includes('image')).to.be.true expect(blobPath.endsWith('.jpeg')).to.be.true context.setChatProfileImage(chatId, null) diff --git a/python/tests/test_1_online.py b/python/tests/test_1_online.py index 8da04bcfe4..5ccfa8b5a7 100644 --- a/python/tests/test_1_online.py +++ b/python/tests/test_1_online.py @@ -182,13 +182,11 @@ def send_and_receive_message(): msg = send_and_receive_message() assert msg.text == "withfile" assert open(msg.file_path).read() == "some data" - msg.file_path.index(basename) assert msg.file_path.endswith(ext) msg2 = send_and_receive_message() assert msg2.text == "withfile" assert open(msg2.file_path).read() == "some data" - msg2.file_path.index(basename) assert msg2.file_path.endswith(ext) assert msg.file_path != msg2.file_path @@ -215,7 +213,6 @@ def test_send_file_html_attachment(tmp_path, acfactory, lp): msg = ac2.get_message_by_id(ev.data2) assert open(msg.file_path).read() == content - msg.file_path.index(basename) assert msg.file_path.endswith(ext) diff --git a/python/tests/test_2_increation.py b/python/tests/test_2_increation.py index 432a696217..e7f7d10e5e 100644 --- a/python/tests/test_2_increation.py +++ b/python/tests/test_2_increation.py @@ -50,7 +50,6 @@ def test_no_increation_copies_to_blobdir(self, tmp_path, acfactory, lp): src = tmp_path / "file.txt" src.write_text("hello there\n") msg = chat.send_file(str(src)) - assert msg.file_path.startswith(os.path.join(ac1.get_blobdir(), "file")) assert msg.file_path.endswith(".txt") def test_forward_increation(self, acfactory, data, lp): diff --git a/src/blob.rs b/src/blob.rs index cdb88c41ca..0d99fff053 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -51,8 +51,8 @@ impl<'a> BlobObject<'a> { data: &[u8], ) -> Result> { let blobdir = context.get_blobdir(); - let (stem, ext) = BlobObject::sanitise_name(suggested_name); - let (name, mut file) = BlobObject::create_new_file(context, blobdir, &stem, &ext).await?; + let ext = BlobObject::get_extension(suggested_name); + let (name, mut file) = BlobObject::create_new_file(context, blobdir, &ext).await?; file.write_all(data).await.context("file write failure")?; // workaround a bug in async-std @@ -72,12 +72,11 @@ impl<'a> BlobObject<'a> { async fn create_new_file( context: &Context, dir: &Path, - stem: &str, ext: &str, ) -> Result<(String, fs::File)> { let mut attempt = 0; loop { - let name = format!("{}-{:016x}{}", stem, rand::random::(), ext); + let name = format!("{:016x}{}", rand::random::(), ext); attempt += 1; let path = dir.join(&name); match fs::OpenOptions::new() @@ -108,9 +107,9 @@ impl<'a> BlobObject<'a> { let mut src_file = fs::File::open(src) .await .with_context(|| format!("failed to open file {}", src.display()))?; - let (stem, ext) = BlobObject::sanitise_name(&src.to_string_lossy()); + let ext = BlobObject::get_extension(&src.to_string_lossy()); let (name, mut dst_file) = - BlobObject::create_new_file(context, context.get_blobdir(), &stem, &ext).await?; + BlobObject::create_new_file(context, context.get_blobdir(), &ext).await?; let name_for_err = name.clone(); if let Err(err) = io::copy(&mut src_file, &mut dst_file).await { // Attempt to remove the failed file, swallow errors resulting from that. @@ -134,10 +133,8 @@ impl<'a> BlobObject<'a> { /// /// If the source file is not a path to into the blob directory /// the file will be copied into the blob directory first. If the - /// source file is already in the blobdir it will not be copied - /// and only be created if it is a valid blobname, that is no - /// subdirectory is used and [BlobObject::sanitise_name] does not - /// modify the filename. + /// source file is already in the blobdir (but not in a subdirectory) + /// it will not be copied and only be created if it is a valid blobname. /// /// Paths into the blob directory may be either defined by an absolute path /// or by the relative prefix `$BLOBDIR`. @@ -154,8 +151,7 @@ impl<'a> BlobObject<'a> { /// Returns a [BlobObject] for an existing blob from a path. /// /// The path must designate a file directly in the blobdir and - /// must use a valid blob name. That is after sanitisation the - /// name must still be the same, that means it must be valid UTF-8 + /// must use a valid blob name. That means it must be valid UTF-8 /// and not have any special characters in it. pub fn from_path(context: &'a Context, path: &Path) -> Result> { let rel_path = path @@ -229,21 +225,8 @@ impl<'a> BlobObject<'a> { } } - /// Create a safe name based on a messy input string. - /// - /// The safe name will be a valid filename on Unix and Windows and - /// not contain any path separators. The input can contain path - /// segments separated by either Unix or Windows path separators, - /// the rightmost non-empty segment will be used as name, - /// sanitised for special characters. - /// - /// The resulting name is returned as a tuple, the first part - /// being the stem or basename and the second being an extension, - /// including the dot. E.g. "foo.txt" is returned as `("foo", - /// ".txt")` while "bar" is returned as `("bar", "")`. - /// - /// The extension part will always be lowercased. - fn sanitise_name(name: &str) -> (String, String) { + /// Get a file extension if any, including the dot, in lower case, otherwise an empty string. + fn get_extension(name: &str) -> String { let mut name = name.to_string(); for part in name.rsplit('/') { if !part.is_empty() { @@ -257,20 +240,12 @@ impl<'a> BlobObject<'a> { break; } } - let opts = sanitize_filename::Options { - truncate: true, - windows: true, - replacement: "", - }; - let clean = sanitize_filename::sanitize_with_options(name, opts); // Let's take the tricky filename // "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz" as an example. // Split it into "file" and "with_lots_of_characters_behind_point_and_double_ending.tar.gz": - let mut iter = clean.splitn(2, '.'); - - let stem: String = iter.next().unwrap_or_default().chars().take(64).collect(); - // stem == "file" + let mut iter = name.splitn(2, '.'); + iter.next(); let ext_chars = iter.next().unwrap_or_default().chars(); let ext: String = ext_chars @@ -283,11 +258,10 @@ impl<'a> BlobObject<'a> { // ext == "d_point_and_double_ending.tar.gz" if ext.is_empty() { - (stem, "".to_string()) + ext } else { - (stem, format!(".{ext}").to_lowercase()) - // Return ("file", ".d_point_and_double_ending.tar.gz") - // which is not perfect but acceptable. + format!(".{ext}").to_lowercase() + // Return ".d_point_and_double_ending.tar.gz", which is not perfect but acceptable. } } @@ -743,7 +717,7 @@ mod tests { async fn test_create() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo", b"hello").await.unwrap(); - let re = Regex::new("^foo-[[:xdigit:]]{16}$").unwrap(); + let re = Regex::new("^[[:xdigit:]]{16}$").unwrap(); assert!(re.is_match(blob.as_file_name())); let fname = t.get_blobdir().join(blob.as_file_name()); let data = fs::read(fname).await.unwrap(); @@ -762,7 +736,7 @@ mod tests { async fn test_lowercase_ext() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.TXT", b"hello").await.unwrap(); - let re = Regex::new("^\\$BLOBDIR/foo-[[:xdigit:]]{16}.txt$").unwrap(); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.txt$").unwrap(); assert!(re.is_match(blob.as_name())); } @@ -770,7 +744,7 @@ mod tests { async fn test_as_file_name() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); - let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap(); + let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap(); assert!(re.is_match(blob.as_file_name())); } @@ -778,7 +752,7 @@ mod tests { async fn test_as_rel_path() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); - let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap(); + let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap(); assert!(re.is_match(blob.as_rel_path().to_str().unwrap())); } @@ -794,7 +768,7 @@ mod tests { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_create_dup() { let t = TestContext::new().await; - let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap(); + let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap(); let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); assert!(re.is_match(blob.as_rel_path().to_str().unwrap())); @@ -815,7 +789,7 @@ mod tests { let blob = BlobObject::create(&t, "foo.tar.gz", b"hello") .await .unwrap(); - let re = Regex::new("^foo-[[:xdigit:]]{16}.tar.gz$").unwrap(); + let re = Regex::new("^[[:xdigit:]]{16}.tar.gz$").unwrap(); assert!(re.is_match(blob.as_file_name())); let foo_path = t.get_blobdir().join(blob.as_file_name()); assert!(foo_path.exists()); @@ -830,7 +804,6 @@ mod tests { } else { let name = fname.to_str().unwrap(); println!("{name}"); - assert!(name.starts_with("foo")); assert!(name.ends_with(".tar.gz")); } } @@ -851,7 +824,7 @@ mod tests { let src = t.dir.path().join("src"); fs::write(&src, b"boo").await.unwrap(); let blob = BlobObject::create_and_copy(&t, src.as_ref()).await.unwrap(); - let re = Regex::new("^\\$BLOBDIR/src-[[:xdigit:]]{16}$").unwrap(); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}$").unwrap(); assert!(re.is_match(blob.as_name())); let data = fs::read(blob.to_abs_path()).await.unwrap(); assert_eq!(data, b"boo"); @@ -873,7 +846,7 @@ mod tests { let blob = BlobObject::new_from_path(&t, src_ext.as_ref()) .await .unwrap(); - let re = Regex::new("^\\$BLOBDIR/external-[[:xdigit:]]{16}$").unwrap(); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}$").unwrap(); assert!(re.is_match(blob.as_name())); let data = fs::read(blob.to_abs_path()).await.unwrap(); assert_eq!(data, b"boo"); @@ -886,20 +859,6 @@ mod tests { assert_eq!(data, b"boo"); } - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn test_create_from_name_long() { - let t = TestContext::new().await; - let src_ext = t.dir.path().join("autocrypt-setup-message-4137848473.html"); - fs::write(&src_ext, b"boo").await.unwrap(); - let blob = BlobObject::new_from_path(&t, src_ext.as_ref()) - .await - .unwrap(); - let re = - Regex::new("^\\$BLOBDIR/autocrypt-setup-message-4137848473-[[:xdigit:]]{16}.html$") - .unwrap(); - assert!(re.is_match(blob.as_name())); - } - #[test] fn test_is_blob_name() { assert!(BlobObject::is_acceptible_blob_name("foo")); @@ -911,42 +870,24 @@ mod tests { } #[test] - fn test_sanitise_name() { - let (stem, ext) = - BlobObject::sanitise_name("Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt"); + fn test_get_extension() { + let ext = BlobObject::get_extension("Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt"); assert_eq!(ext, ".txt"); - assert!(!stem.is_empty()); - // the extensions are kept together as between stem and extension a number may be added - - // and `foo.tar.gz` should become `foo-1234.tar.gz` and not `foo.tar-1234.gz` - let (stem, ext) = BlobObject::sanitise_name("wot.tar.gz"); - assert_eq!(stem, "wot"); + let ext = BlobObject::get_extension("wot.tar.gz"); assert_eq!(ext, ".tar.gz"); - let (stem, ext) = BlobObject::sanitise_name(".foo.bar"); - assert_eq!(stem, ""); + let ext = BlobObject::get_extension(".foo.bar"); assert_eq!(ext, ".foo.bar"); - let (stem, ext) = BlobObject::sanitise_name("foo?.bar"); - assert!(stem.contains("foo")); - assert!(!stem.contains('?')); + let ext = BlobObject::get_extension("foo?.bar"); assert_eq!(ext, ".bar"); - let (stem, ext) = BlobObject::sanitise_name("no-extension"); - assert_eq!(stem, "no-extension"); + let ext = BlobObject::get_extension("no-extension"); assert_eq!(ext, ""); - let (stem, ext) = BlobObject::sanitise_name("path/ignored\\this: is* forbidden?.c"); + let ext = BlobObject::get_extension("path/ignored\\this: is* forbidden?.c"); assert_eq!(ext, ".c"); - assert!(!stem.contains("path")); - assert!(!stem.contains("ignored")); - assert!(stem.contains("this")); - assert!(stem.contains("forbidden")); - assert!(!stem.contains('/')); - assert!(!stem.contains('\\')); - assert!(!stem.contains(':')); - assert!(!stem.contains('*')); - assert!(!stem.contains('?')); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -995,7 +936,7 @@ mod tests { let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); let blobdir = t.get_blobdir().to_str().unwrap(); assert!(avatar_blob.starts_with(blobdir)); - let re = Regex::new("avatar-[[:xdigit:]]{16}.jpg$").unwrap(); + let re = Regex::new("[[:xdigit:]]{16}.jpg$").unwrap(); assert!(re.is_match(&avatar_blob)); let avatar_blob = Path::new(&avatar_blob); assert!(avatar_blob.exists()); @@ -1072,7 +1013,7 @@ mod tests { let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); let blobdir = t.get_blobdir().to_str().unwrap(); assert!(avatar_blob.starts_with(blobdir)); - let re = Regex::new("avatar-[[:xdigit:]]{16}.png$").unwrap(); + let re = Regex::new("[[:xdigit:]]{16}.png$").unwrap(); assert!(re.is_match(&avatar_blob)); assert!(Path::new(&avatar_blob).exists()); assert_eq!( diff --git a/src/chat.rs b/src/chat.rs index d0389ed0e0..7e64a74910 100644 --- a/src/chat.rs +++ b/src/chat.rs @@ -7030,7 +7030,7 @@ mod tests { msg.param.get(Param::Filename).unwrap(), "harmless_file.txt.exe" ); - let re = Regex::new("^\\$BLOBDIR/harmless_file-[[:xdigit:]]{16}.txt.exe$").unwrap(); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.txt.exe$").unwrap(); assert!(re.is_match(msg.param.get(Param::File).unwrap())); Ok(()) } diff --git a/src/debug_logging.rs b/src/debug_logging.rs index bcccaaf9f5..2315476eb8 100644 --- a/src/debug_logging.rs +++ b/src/debug_logging.rs @@ -9,7 +9,6 @@ use crate::tools::time; use crate::webxdc::StatusUpdateItem; use async_channel::{self as channel, Receiver, Sender}; use serde_json::json; -use std::path::PathBuf; use tokio::task; #[derive(Debug)] @@ -97,7 +96,7 @@ pub async fn maybe_set_logging_xdc( context, msg.get_viewtype(), chat_id, - msg.param.get_path(Param::File, context).unwrap_or_default(), + msg.param.get(Param::Filename), msg.get_id(), ) .await?; @@ -110,18 +109,16 @@ pub async fn maybe_set_logging_xdc_inner( context: &Context, viewtype: Viewtype, chat_id: ChatId, - file: Option, + file_name: Option<&str>, msg_id: MsgId, ) -> anyhow::Result<()> { if viewtype == Viewtype::Webxdc { - if let Some(file) = file { - if let Some(file_name) = file.file_name().and_then(|name| name.to_str()) { - if file_name.starts_with("debug_logging") - && file_name.ends_with(".xdc") - && chat_id.is_self_talk(context).await? - { - set_debug_logging_xdc(context, Some(msg_id)).await?; - } + if let Some(file_name) = file_name { + if file_name.starts_with("debug_logging") + && file_name.ends_with(".xdc") + && chat_id.is_self_talk(context).await? + { + set_debug_logging_xdc(context, Some(msg_id)).await?; } } } diff --git a/src/mimeparser.rs b/src/mimeparser.rs index 5960f83623..f58aaf19ac 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -3807,7 +3807,7 @@ Message. mime_message.parts[0].msg, "this is a classic email – I attached the .EML file".to_string() ); - let re = Regex::new("^\\$BLOBDIR/-[[:xdigit:]]{16}.eml$").unwrap(); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.eml$").unwrap(); assert!(re.is_match(mime_message.parts[0].param.get(Param::File).unwrap())); assert_eq!(mime_message.parts[0].org_filename, Some(".eml".to_string())); diff --git a/src/param.rs b/src/param.rs index d035d2d112..b5b4d5dd9f 100644 --- a/src/param.rs +++ b/src/param.rs @@ -542,8 +542,6 @@ mod tests { assert!(p.get_blob(Param::File, &t, false).await.is_err()); fs::write(fname, b"boo").await.unwrap(); - let blob = p.get_blob(Param::File, &t, true).await.unwrap().unwrap(); - assert!(blob.as_file_name().starts_with("foo")); // Blob in blobdir, expect blob. let bar_path = t.get_blobdir().join("bar"); diff --git a/src/receive_imf.rs b/src/receive_imf.rs index 4383b5f0cb..55ca840cc7 100644 --- a/src/receive_imf.rs +++ b/src/receive_imf.rs @@ -1536,9 +1536,7 @@ RETURNING id context, part.typ, chat_id, - part.param - .get_path(Param::File, context) - .unwrap_or_default(), + part.param.get(Param::Filename), *msg_id, ) .await?; diff --git a/src/receive_imf/tests.rs b/src/receive_imf/tests.rs index dcf9a12214..8e1fff4b40 100644 --- a/src/receive_imf/tests.rs +++ b/src/receive_imf/tests.rs @@ -1564,7 +1564,6 @@ async fn test_pdf_filename_simple() { assert_eq!(msg.viewtype, Viewtype::File); assert_eq!(msg.text, "mail body"); let file_path = msg.param.get(Param::File).unwrap(); - assert!(file_path.starts_with("$BLOBDIR/simple")); assert!(file_path.ends_with(".pdf")); } @@ -1580,7 +1579,6 @@ async fn test_pdf_filename_continuation() { assert_eq!(msg.viewtype, Viewtype::File); assert_eq!(msg.text, "mail body"); let file_path = msg.param.get(Param::File).unwrap(); - assert!(file_path.starts_with("$BLOBDIR/test pdf äöüß")); assert!(file_path.ends_with(".pdf")); }