From 55ed7437e555c0f390d8370dc0a7986241efe1ad Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 1 Apr 2024 10:39:39 -0400 Subject: [PATCH 1/6] perf: reduce allocations in MediaType::from_specifier --- rust-toolchain.toml | 2 +- src/lib.rs | 225 +++++++++++++++++++++++++++----------------- 2 files changed, 142 insertions(+), 85 deletions(-) diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 514569a..b4fc41c 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,4 +1,4 @@ [toolchain] -channel = "1.71.1" +channel = "1.77.1" components = ["clippy", "rustfmt"] profile = "minimal" diff --git a/src/lib.rs b/src/lib.rs index 6ae54b3..7a385f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,9 +5,6 @@ use serde::Serializer; use std::fmt; use std::path::Path; -#[cfg(feature = "module_specifier")] -use std::path::PathBuf; - #[cfg(feature = "module_specifier")] type ModuleSpecifier = url::Url; @@ -34,22 +31,20 @@ pub enum MediaType { /// Definition files don't have separate content types and so we have to "guess" /// at what they are meant to be. fn map_typescript_like( - path: &Path, + path: impl PathLike, base_type: MediaType, definition_type: MediaType, ) -> MediaType { match path.file_stem() { None => base_type, - Some(os_str) => { - if let Some(file_stem) = os_str.to_str() { - // .ts files that contain .d. in the file name are always considered a typescript declaration file. - // See: https://github.com/microsoft/TypeScript/issues/53319#issuecomment-1474174018 - if file_stem.ends_with(".d") - || (path.to_string_lossy().ends_with(".ts") - && file_stem.contains(".d.")) - { - return definition_type; - } + Some(file_stem) => { + // .ts files that contain .d. in the file name are always considered a typescript declaration file. + // See: https://github.com/microsoft/TypeScript/issues/53319#issuecomment-1474174018 + if file_stem.ends_with(".d") + || (path.ext().map(|ext| ext == "ts").unwrap_or(false) + && file_stem.contains(".d.")) + { + return definition_type; } base_type } @@ -204,32 +199,37 @@ impl MediaType { } pub fn from_path(path: &Path) -> Self { - match path.extension() { + Self::from_path_like(path) + } + + fn from_path_like(path: impl PathLike) -> Self { + match path.ext() { None => match path.file_name() { None => Self::Unknown, - Some(os_str) => { - let lowercase_str = os_str.to_str().map(|s| s.to_lowercase()); - match lowercase_str.as_deref() { - Some(".tsbuildinfo") => Self::TsBuildInfo, + Some(file_name) => { + let lowercase_str = file_name.to_lowercase(); + match lowercase_str.as_str() { + ".tsbuildinfo" => Self::TsBuildInfo, _ => Self::Unknown, } } }, - Some(os_str) => { - let lowercase_str = os_str.to_str().map(|s| s.to_lowercase()); - match lowercase_str.as_deref() { - Some("ts") => map_typescript_like(path, Self::TypeScript, Self::Dts), - Some("mts") => map_typescript_like(path, Self::Mts, Self::Dmts), - Some("cts") => map_typescript_like(path, Self::Cts, Self::Dcts), - Some("tsx") => Self::Tsx, - Some("js") => Self::JavaScript, - Some("jsx") => Self::Jsx, - Some("mjs") => Self::Mjs, - Some("cjs") => Self::Cjs, - Some("json") => Self::Json, - Some("wasm") => Self::Wasm, - Some("tsbuildinfo") => Self::TsBuildInfo, - Some("map") => Self::SourceMap, + Some(ext) => { + let lowercase_str = ext.to_lowercase(); + eprintln!("STR: {}", lowercase_str); + match lowercase_str.as_str() { + "ts" => map_typescript_like(path, Self::TypeScript, Self::Dts), + "mts" => map_typescript_like(path, Self::Mts, Self::Dmts), + "cts" => map_typescript_like(path, Self::Cts, Self::Dcts), + "tsx" => Self::Tsx, + "js" => Self::JavaScript, + "jsx" => Self::Jsx, + "mjs" => Self::Mjs, + "cjs" => Self::Cjs, + "json" => Self::Json, + "wasm" => Self::Wasm, + "tsbuildinfo" => Self::TsBuildInfo, + "map" => Self::SourceMap, _ => Self::Unknown, } } @@ -246,8 +246,7 @@ impl MediaType { use data_url::DataUrl; if specifier.scheme() != "data" { - let path = specifier_to_path(specifier); - Self::from_path(&path) + Self::from_path_like(specifier) } else if let Ok(data_url) = DataUrl::process(specifier.as_str()) { Self::from_content_type(specifier, data_url.mime_type().to_string()) } else { @@ -295,53 +294,20 @@ impl fmt::Display for MediaType { } } -#[cfg(feature = "module_specifier")] -#[cfg(not(target_arch = "wasm32"))] -fn specifier_to_path(specifier: &ModuleSpecifier) -> PathBuf { - if let Ok(path) = specifier.to_file_path() { - path - } else { - specifier_path_to_path(specifier) - } -} - -#[cfg(feature = "module_specifier")] -#[cfg(target_arch = "wasm32")] -fn specifier_to_path(specifier: &ModuleSpecifier) -> PathBuf { - specifier_path_to_path(specifier) -} - -#[cfg(feature = "module_specifier")] -fn specifier_path_to_path(specifier: &ModuleSpecifier) -> PathBuf { - let path = specifier.path(); - if path.is_empty() { - if let Some(domain) = specifier.domain() { - // ex. deno://lib.deno.d.ts - PathBuf::from(domain) - } else { - PathBuf::from("") - } - } else { - PathBuf::from(path) - } -} - /// Used to augment media types by using the path part of a module specifier to /// resolve to a more accurate media type. #[cfg(feature = "module_specifier")] fn map_js_like_extension( - specifier: &ModuleSpecifier, + path: &ModuleSpecifier, default: MediaType, ) -> MediaType { - let path = specifier_to_path(specifier); - match path.extension() { + match path.ext() { None => default, - Some(os_str) => match os_str.to_str() { - None => default, - Some("jsx") => MediaType::Jsx, - Some("mjs") => MediaType::Mjs, - Some("cjs") => MediaType::Cjs, - Some("tsx") => MediaType::Tsx, + Some(ext) => match ext { + "jsx" => MediaType::Jsx, + "mjs" => MediaType::Mjs, + "cjs" => MediaType::Cjs, + "tsx" => MediaType::Tsx, // This preserves legacy behavior, where if a file is served with a // content type of `application/javascript`, but it ends only with a `.ts` // we will assume that it is JavaScript and not TypeScript, but if it ends @@ -349,28 +315,91 @@ fn map_js_like_extension( // // This handles situations where the file is transpiled on the server and // is explicitly providing a media type. - Some("ts") => map_typescript_like(&path, default, MediaType::Dts), - Some("mts") => { + "ts" => map_typescript_like(path, default, MediaType::Dts), + "mts" => { let base_type = if default == MediaType::JavaScript { MediaType::Mjs } else { MediaType::Mts }; - map_typescript_like(&path, base_type, MediaType::Dmts) + map_typescript_like(path, base_type, MediaType::Dmts) } - Some("cts") => { + "cts" => { let base_type = if default == MediaType::JavaScript { MediaType::Cjs } else { MediaType::Cts }; - map_typescript_like(&path, base_type, MediaType::Dcts) + map_typescript_like(path, base_type, MediaType::Dcts) } - Some(_) => default, + _ => default, }, } } +/// Used to reduce allocations when doing MediaType operations on Urls. +trait PathLike { + fn ext(&self) -> Option<&str>; + fn file_name(&self) -> Option<&str>; + fn file_stem(&self) -> Option<&str>; +} + +impl<'a> PathLike for &'a Path { + fn ext(&self) -> Option<&str> { + Path::extension(self).and_then(|ext| ext.to_str()) + } + + fn file_name(&self) -> Option<&str> { + Path::file_name(self).and_then(|os_str| os_str.to_str()) + } + + fn file_stem(&self) -> Option<&str> { + Path::file_stem(self).and_then(|os_str| os_str.to_str()) + } +} + +#[cfg(feature = "module_specifier")] +impl<'a> PathLike for &'a url::Url { + fn ext(&self) -> Option<&str> { + let file_name = self.file_name()?; + let period_index = file_name.rfind('.')?; + if period_index == 0 { + None + } else { + Some(&file_name[period_index + 1..]) + } + } + + fn file_name(&self) -> Option<&str> { + let path = self.path(); + let path = if self.path().is_empty() { + // ex. deno://lib.deno.d.ts + self.domain()? + } else { + path + }; + let path = path.trim_end_matches('/'); + if path.is_empty() { + None + } else { + match path.rfind('/') { + Some(last_slash_index) => Some(&path[last_slash_index + 1..]), + None => Some(path), + } + } + } + + fn file_stem(&self) -> Option<&str> { + let file_name = self.file_name()?; + let period_index = file_name.rfind('.')?; + if period_index == 0 { + Some(file_name) + } else { + Some(&file_name[..period_index]) + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -383,8 +412,9 @@ mod tests { /// Taken from Cargo /// https://github.com/rust-lang/cargo/blob/af307a38c20a753ec60f0ad18be5abed3db3c9ac/src/cargo/util/paths.rs#L60-L85 #[cfg(feature = "module_specifier")] - fn normalize_path>(path: P) -> PathBuf { + fn normalize_path>(path: P) -> std::path::PathBuf { use std::path::Component; + use std::path::PathBuf; let mut components = path.as_ref().components().peekable(); let mut ret = @@ -528,7 +558,12 @@ mod tests { for (specifier, expected) in fixtures { let actual = resolve_url_or_path(specifier); - assert_eq!(MediaType::from_specifier(&actual), expected); + assert_eq!( + MediaType::from_specifier(&actual), + expected, + "specifier: {}", + specifier + ); assert_eq!( MediaType::from_specifier_and_headers(&actual, None), @@ -691,4 +726,26 @@ mod tests { assert_eq!(MediaType::SourceMap.to_string(), "SourceMap"); assert_eq!(MediaType::Unknown.to_string(), "Unknown"); } + + #[test] + fn test_url_path_like_file_stem() { + let url = ModuleSpecifier::parse("file:///.test").unwrap(); + assert_eq!((&url).file_stem(), Some(".test")); + let url = ModuleSpecifier::parse("file:///.test.other").unwrap(); + assert_eq!((&url).file_stem(), Some(".test")); + let url = ModuleSpecifier::parse("file:///").unwrap(); + assert_eq!((&url).file_stem(), None); + } + + #[test] + fn test_url_path_like_extension() { + let url = ModuleSpecifier::parse("file:///.test").unwrap(); + assert_eq!((&url).ext(), None); + let url = ModuleSpecifier::parse("file:///.test.other").unwrap(); + assert_eq!((&url).ext(), Some("other")); + let url = ModuleSpecifier::parse("file:///").unwrap(); + assert_eq!((&url).ext(), None); + let url = ModuleSpecifier::parse("file:///.").unwrap(); + assert_eq!((&url).ext(), None); + } } From 25fcd94ea4f95b189bae3a0850575e59a7f0e6ae Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 1 Apr 2024 10:42:17 -0400 Subject: [PATCH 2/6] Fix --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 7a385f9..a0cd1a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -727,6 +727,7 @@ mod tests { assert_eq!(MediaType::Unknown.to_string(), "Unknown"); } + #[cfg(feature = "module_specifier")] #[test] fn test_url_path_like_file_stem() { let url = ModuleSpecifier::parse("file:///.test").unwrap(); @@ -737,6 +738,7 @@ mod tests { assert_eq!((&url).file_stem(), None); } + #[cfg(feature = "module_specifier")] #[test] fn test_url_path_like_extension() { let url = ModuleSpecifier::parse("file:///.test").unwrap(); From 5b1dbdb769246a0b82f16a4ce8cb5269ab6593b2 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 1 Apr 2024 11:13:45 -0400 Subject: [PATCH 3/6] remove eprintln --- src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index a0cd1a8..84572e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,8 @@ // Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. +#![deny(clippy::print_stderr)] +#![deny(clippy::print_stdout)] + use serde::Serialize; use serde::Serializer; use std::fmt; @@ -216,7 +219,6 @@ impl MediaType { }, Some(ext) => { let lowercase_str = ext.to_lowercase(); - eprintln!("STR: {}", lowercase_str); match lowercase_str.as_str() { "ts" => map_typescript_like(path, Self::TypeScript, Self::Dts), "mts" => map_typescript_like(path, Self::Mts, Self::Dmts), From b314b22285609334492ece437896ca275fb3b324 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 1 Apr 2024 11:21:06 -0400 Subject: [PATCH 4/6] 334ns -> 264ns --- src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 84572e9..08448f6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -210,10 +210,10 @@ impl MediaType { None => match path.file_name() { None => Self::Unknown, Some(file_name) => { - let lowercase_str = file_name.to_lowercase(); - match lowercase_str.as_str() { - ".tsbuildinfo" => Self::TsBuildInfo, - _ => Self::Unknown, + if file_name.eq_ignore_ascii_case(".tsbuildinfo") { + Self::TsBuildInfo + } else { + Self::Unknown } } }, From dbc6a0d83afcf6df7fb6c1a81479f50598f4f00a Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 1 Apr 2024 11:29:06 -0400 Subject: [PATCH 5/6] Add comment --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 08448f6..412ccf4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -218,6 +218,8 @@ impl MediaType { } }, Some(ext) => { + // using eq_ignore_ascii_case with if/elses seems to be ~40ns + // slower here, so continue to use to_lowercase() let lowercase_str = ext.to_lowercase(); match lowercase_str.as_str() { "ts" => map_typescript_like(path, Self::TypeScript, Self::Dts), From 60fd7ea7924efc97ed452ce656034e57f9c9ddac Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 1 Apr 2024 11:57:14 -0400 Subject: [PATCH 6/6] Remove vector allocation (329ns -> 300ns) --- src/lib.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 412ccf4..e2d70e3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -166,11 +166,14 @@ impl MediaType { specifier: &ModuleSpecifier, content_type: S, ) -> Self { - match content_type.as_ref().split(';').collect::>()[0] - .trim() - .to_lowercase() + let first_part = content_type .as_ref() - { + .split(';') + .next() + .unwrap() + .trim() + .to_lowercase(); + match first_part.as_str() { "application/typescript" | "text/typescript" | "video/vnd.dlna.mpeg-tts"