diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index 118c6eeb289bb..bea5ccd7c860d 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -58,7 +58,7 @@ use rustc_span::{ symbol::{sym, Symbol}, BytePos, FileName, RealFileName, }; -use serde::ser::{SerializeMap, SerializeSeq}; +use serde::ser::SerializeMap; use serde::{Serialize, Serializer}; use crate::clean::{self, ItemId, RenderedLink, SelfTy}; @@ -123,44 +123,58 @@ pub(crate) struct IndexItem { } /// A type used for the search index. -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq)] pub(crate) struct RenderType { id: Option, generics: Option>, bindings: Option)>>, } -impl Serialize for RenderType { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let id = match &self.id { +impl RenderType { + // Types are rendered as lists of lists, because that's pretty compact. + // The contents of the lists are always integers in self-terminating hex + // form, handled by `RenderTypeId::write_to_string`, so no commas are + // needed to separate the items. + pub fn write_to_string(&self, string: &mut String) { + fn write_optional_id(id: Option, string: &mut String) { // 0 is a sentinel, everything else is one-indexed - None => 0, - // concrete type - Some(RenderTypeId::Index(idx)) if *idx >= 0 => idx + 1, - // generic type parameter - Some(RenderTypeId::Index(idx)) => *idx, - _ => panic!("must convert render types to indexes before serializing"), - }; + match id { + Some(id) => id.write_to_string(string), + None => string.push('`'), + } + } + // Either just the type id, or `{type, generics, bindings?}` + // where generics is a list of types, + // and bindings is a list of `{id, typelist}` pairs. if self.generics.is_some() || self.bindings.is_some() { - let mut seq = serializer.serialize_seq(None)?; - seq.serialize_element(&id)?; - seq.serialize_element(self.generics.as_ref().map(Vec::as_slice).unwrap_or_default())?; + string.push('{'); + write_optional_id(self.id, string); + string.push('{'); + for generic in &self.generics.as_ref().map(Vec::as_slice).unwrap_or_default()[..] { + generic.write_to_string(string); + } + string.push('}'); if self.bindings.is_some() { - seq.serialize_element( - self.bindings.as_ref().map(Vec::as_slice).unwrap_or_default(), - )?; + string.push('{'); + for binding in &self.bindings.as_ref().map(Vec::as_slice).unwrap_or_default()[..] { + string.push('{'); + binding.0.write_to_string(string); + string.push('{'); + for constraint in &binding.1[..] { + constraint.write_to_string(string); + } + string.push_str("}}"); + } + string.push('}'); } - seq.end() + string.push('}'); } else { - id.serialize(serializer) + write_optional_id(self.id, string); } } } -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub(crate) enum RenderTypeId { DefId(DefId), Primitive(clean::PrimitiveType), @@ -168,70 +182,122 @@ pub(crate) enum RenderTypeId { Index(isize), } -impl Serialize for RenderTypeId { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let id = match &self { +impl RenderTypeId { + pub fn write_to_string(&self, string: &mut String) { + // (sign, value) + let (sign, id): (bool, u32) = match &self { // 0 is a sentinel, everything else is one-indexed // concrete type - RenderTypeId::Index(idx) if *idx >= 0 => idx + 1, + RenderTypeId::Index(idx) if *idx >= 0 => (false, (idx + 1isize).try_into().unwrap()), // generic type parameter - RenderTypeId::Index(idx) => *idx, + RenderTypeId::Index(idx) => (true, (-*idx).try_into().unwrap()), _ => panic!("must convert render types to indexes before serializing"), }; - id.serialize(serializer) + // zig-zag encoding + let value: u32 = (id << 1) | (if sign { 1 } else { 0 }); + // Self-terminating hex use capital letters for everything but the + // least significant digit, which is lowercase. For example, decimal 17 + // would be `` Aa `` if zig-zag encoding weren't used. + // + // Zig-zag encoding, however, stores the sign bit as the last bit. + // This means, in the last hexit, 1 is actually `c`, -1 is `b` + // (`a` is the imaginary -0), and, because all the bits are shifted + // by one, `` A` `` is actually 8 and `` Aa `` is -8. + // + // https://rust-lang.github.io/rustc-dev-guide/rustdoc-internals/search.html + // describes the encoding in more detail. + let mut shift: u32 = 28; + let mut mask: u32 = 0xF0_00_00_00; + while shift < 32 { + let hexit = (value & mask) >> shift; + if hexit != 0 || shift == 0 { + let hex = + char::try_from(if shift == 0 { '`' } else { '@' } as u32 + hexit).unwrap(); + string.push(hex); + } + shift = shift.wrapping_sub(4); + mask = mask >> 4; + } } } /// Full type of functions/methods in the search index. -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq)] pub(crate) struct IndexItemFunctionType { inputs: Vec, output: Vec, where_clause: Vec>, } -impl Serialize for IndexItemFunctionType { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - // If we couldn't figure out a type, just write `0`. +impl IndexItemFunctionType { + pub fn write_to_string<'a>( + &'a self, + string: &mut String, + backref_queue: &mut VecDeque<&'a IndexItemFunctionType>, + ) { + assert!(backref_queue.len() <= 16); + // If we couldn't figure out a type, just write 0, + // which is encoded as `` ` `` (see RenderTypeId::write_to_string). let has_missing = self .inputs .iter() .chain(self.output.iter()) .any(|i| i.id.is_none() && i.generics.is_none()); if has_missing { - 0.serialize(serializer) + string.push('`'); + } else if let Some(idx) = backref_queue.iter().position(|other| *other == self) { + // The backref queue has 16 items, so backrefs use + // a single hexit, disjoint from the ones used for numbers. + string.push( + char::try_from('0' as u32 + u32::try_from(idx).unwrap()) + .expect("last possible value is '?'"), + ); } else { - let mut seq = serializer.serialize_seq(None)?; + backref_queue.push_front(self); + if backref_queue.len() > 16 { + backref_queue.pop_back(); + } + string.push('{'); match &self.inputs[..] { [one] if one.generics.is_none() && one.bindings.is_none() => { - seq.serialize_element(one)? + one.write_to_string(string); + } + _ => { + string.push('{'); + for item in &self.inputs[..] { + item.write_to_string(string); + } + string.push('}'); } - _ => seq.serialize_element(&self.inputs)?, } match &self.output[..] { [] if self.where_clause.is_empty() => {} [one] if one.generics.is_none() && one.bindings.is_none() => { - seq.serialize_element(one)? + one.write_to_string(string); + } + _ => { + string.push('{'); + for item in &self.output[..] { + item.write_to_string(string); + } + string.push('}'); } - _ => seq.serialize_element(&self.output)?, } for constraint in &self.where_clause { if let [one] = &constraint[..] && one.generics.is_none() && one.bindings.is_none() { - seq.serialize_element(one)?; + one.write_to_string(string); } else { - seq.serialize_element(constraint)?; + string.push('{'); + for item in &constraint[..] { + item.write_to_string(string); + } + string.push('}'); } } - seq.end() + string.push('}'); } } } diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs index a1029320d2d27..e49df400c83ed 100644 --- a/src/librustdoc/html/render/search_index.rs +++ b/src/librustdoc/html/render/search_index.rs @@ -1,5 +1,5 @@ use std::collections::hash_map::Entry; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, VecDeque}; use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; use rustc_middle::ty::TyCtxt; @@ -409,9 +409,11 @@ pub(crate) fn build_index<'tcx>( let mut full_paths = Vec::with_capacity(self.items.len()); let mut descriptions = Vec::with_capacity(self.items.len()); let mut parents = Vec::with_capacity(self.items.len()); - let mut functions = Vec::with_capacity(self.items.len()); + let mut functions = String::with_capacity(self.items.len()); let mut deprecated = Vec::with_capacity(self.items.len()); + let mut backref_queue = VecDeque::new(); + for (index, item) in self.items.iter().enumerate() { let n = item.ty as u8; let c = char::try_from(n + b'A').expect("item types must fit in ASCII"); @@ -434,27 +436,10 @@ pub(crate) fn build_index<'tcx>( full_paths.push((index, &item.path)); } - // Fake option to get `0` out as a sentinel instead of `null`. - // We want to use `0` because it's three less bytes. - enum FunctionOption<'a> { - Function(&'a IndexItemFunctionType), - None, - } - impl<'a> Serialize for FunctionOption<'a> { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - match self { - FunctionOption::None => 0.serialize(serializer), - FunctionOption::Function(ty) => ty.serialize(serializer), - } - } + match &item.search_type { + Some(ty) => ty.write_to_string(&mut functions, &mut backref_queue), + None => functions.push('`'), } - functions.push(match &item.search_type { - Some(ty) => FunctionOption::Function(ty), - None => FunctionOption::None, - }); if item.deprecation.is_some() { deprecated.push(index); diff --git a/src/librustdoc/html/static/js/externs.js b/src/librustdoc/html/static/js/externs.js index 93709e4e830ad..d24148b9556b9 100644 --- a/src/librustdoc/html/static/js/externs.js +++ b/src/librustdoc/html/static/js/externs.js @@ -200,3 +200,59 @@ let FunctionSearchType; * }} */ let FunctionType; + +/** + * The raw search data for a given crate. `n`, `t`, `d`, `i`, and `f` + * are arrays with the same length. `q`, `a`, and `c` use a sparse + * representation for compactness. + * + * `n[i]` contains the name of an item. + * + * `t[i]` contains the type of that item + * (as a string of characters that represent an offset in `itemTypes`). + * + * `d[i]` contains the description of that item. + * + * `q` contains the full paths of the items. For compactness, it is a set of + * (index, path) pairs used to create a map. If a given index `i` is + * not present, this indicates "same as the last index present". + * + * `i[i]` contains an item's parent, usually a module. For compactness, + * it is a set of indexes into the `p` array. + * + * `f` contains function signatures, or `0` if the item isn't a function. + * More information on how they're encoded can be found in rustc-dev-guide + * + * Functions are themselves encoded as arrays. The first item is a list of + * types representing the function's inputs, and the second list item is a list + * of types representing the function's output. Tuples are flattened. + * Types are also represented as arrays; the first item is an index into the `p` + * array, while the second is a list of types representing any generic parameters. + * + * b[i] contains an item's impl disambiguator. This is only present if an item + * is defined in an impl block and, the impl block's type has more than one associated + * item with the same name. + * + * `a` defines aliases with an Array of pairs: [name, offset], where `offset` + * points into the n/t/d/q/i/f arrays. + * + * `doc` contains the description of the crate. + * + * `p` is a list of path/type pairs. It is used for parents and function parameters. + * + * `c` is an array of item indices that are deprecated. + * @typedef {{ + * doc: string, + * a: Object, + * n: Array, + * t: String, + * d: Array, + * q: Array<[Number, string]>, + * i: Array, + * f: string, + * p: Array, + * b: Array<[Number, String]>, + * c: Array + * }} + */ +let RawSearchIndexCrate; diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index e6263db32835d..e0708485fc058 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -2767,19 +2767,65 @@ ${item.displayPath}${name}\ * The raw function search type format is generated using serde in * librustdoc/html/render/mod.rs: impl Serialize for IndexItemFunctionType * - * @param {RawFunctionSearchType} functionSearchType + * @param {{ + * string: string, + * offset: number, + * backrefQueue: FunctionSearchType[] + * }} itemFunctionDecoder * @param {Array<{name: string, ty: number}>} lowercasePaths * @param {Map} * * @return {null|FunctionSearchType} */ - function buildFunctionSearchType(functionSearchType, lowercasePaths) { - const INPUTS_DATA = 0; - const OUTPUT_DATA = 1; - // `0` is used as a sentinel because it's fewer bytes than `null` - if (functionSearchType === 0) { + function buildFunctionSearchType(itemFunctionDecoder, lowercasePaths) { + const c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset); + itemFunctionDecoder.offset += 1; + const [zero, ua, la, ob, cb] = ["0", "@", "`", "{", "}"].map(c => c.charCodeAt(0)); + // `` ` `` is used as a sentinel because it's fewer bytes than `null`, and decodes to zero + // `0` is a backref + if (c === la) { return null; } + // sixteen characters after "0" are backref + if (c >= zero && c < ua) { + return itemFunctionDecoder.backrefQueue[c - zero]; + } + if (c !== ob) { + throw ["Unexpected ", c, " in function: expected ", "{", "; this is a bug"]; + } + // call after consuming `{` + function decodeList() { + let c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset); + const ret = []; + while (c !== cb) { + ret.push(decode()); + c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset); + } + itemFunctionDecoder.offset += 1; // eat cb + return ret; + } + // consumes and returns a list or integer + function decode() { + let n = 0; + let c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset); + if (c === ob) { + itemFunctionDecoder.offset += 1; + return decodeList(); + } + while (c < la) { + n = (n << 4) | (c & 0xF); + itemFunctionDecoder.offset += 1; + c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset); + } + // last character >= la + n = (n << 4) | (c & 0xF); + const [sign, value] = [n & 1, n >> 1]; + itemFunctionDecoder.offset += 1; + return sign ? -value : value; + } + const functionSearchType = decodeList(); + const INPUTS_DATA = 0; + const OUTPUT_DATA = 1; let inputs, output; if (typeof functionSearchType[INPUTS_DATA] === "number") { inputs = [buildItemSearchType(functionSearchType[INPUTS_DATA], lowercasePaths)]; @@ -2808,9 +2854,14 @@ ${item.displayPath}${name}\ ? [buildItemSearchType(functionSearchType[i], lowercasePaths)] : buildItemSearchTypeAll(functionSearchType[i], lowercasePaths)); } - return { + const ret = { inputs, output, where_clause, }; + itemFunctionDecoder.backrefQueue.unshift(ret); + if (itemFunctionDecoder.backrefQueue.length > 16) { + itemFunctionDecoder.backrefQueue.pop(); + } + return ret; } /** @@ -2924,6 +2975,11 @@ ${item.displayPath}${name}\ return functionTypeFingerprint[(fullId * 4) + 3]; } + /** + * Convert raw search index into in-memory search index. + * + * @param {[string, RawSearchIndexCrate][]} rawSearchIndex + */ function buildIndex(rawSearchIndex) { searchIndex = []; typeNameIdMap = new Map(); @@ -2950,59 +3006,7 @@ ${item.displayPath}${name}\ // This loop actually generates the search item indexes, including // normalized names, type signature objects and fingerprints, and aliases. id = 0; - /** - * The raw search data for a given crate. `n`, `t`, `d`, `i`, and `f` - * are arrays with the same length. `q`, `a`, and `c` use a sparse - * representation for compactness. - * - * `n[i]` contains the name of an item. - * - * `t[i]` contains the type of that item - * (as a string of characters that represent an offset in `itemTypes`). - * - * `d[i]` contains the description of that item. - * - * `q` contains the full paths of the items. For compactness, it is a set of - * (index, path) pairs used to create a map. If a given index `i` is - * not present, this indicates "same as the last index present". - * - * `i[i]` contains an item's parent, usually a module. For compactness, - * it is a set of indexes into the `p` array. - * - * `f[i]` contains function signatures, or `0` if the item isn't a function. - * Functions are themselves encoded as arrays. The first item is a list of - * types representing the function's inputs, and the second list item is a list - * of types representing the function's output. Tuples are flattened. - * Types are also represented as arrays; the first item is an index into the `p` - * array, while the second is a list of types representing any generic parameters. - * - * b[i] contains an item's impl disambiguator. This is only present if an item - * is defined in an impl block and, the impl block's type has more than one associated - * item with the same name. - * - * `a` defines aliases with an Array of pairs: [name, offset], where `offset` - * points into the n/t/d/q/i/f arrays. - * - * `doc` contains the description of the crate. - * - * `p` is a list of path/type pairs. It is used for parents and function parameters. - * - * `c` is an array of item indices that are deprecated. - * - * @type {{ - * doc: string, - * a: Object, - * n: Array, - * t: String, - * d: Array, - * q: Array<[Number, string]>, - * i: Array, - * f: Array, - * p: Array, - * b: Array<[Number, String]>, - * c: Array - * }} - */ + for (const [crate, crateCorpus] of rawSearchIndex) { // This object should have exactly the same set of fields as the "row" // object defined below. Your JavaScript runtime will thank you. @@ -3039,8 +3043,12 @@ ${item.displayPath}${name}\ const itemDescs = crateCorpus.d; // an array of (Number) the parent path index + 1 to `paths`, or 0 if none const itemParentIdxs = crateCorpus.i; - // an array of (Object | null) the type of the function, if any - const itemFunctionSearchTypes = crateCorpus.f; + // a string representing the list of function types + const itemFunctionDecoder = { + string: crateCorpus.f, + offset: 0, + backrefQueue: [], + }; // an array of (Number) indices for the deprecated items const deprecatedItems = new Set(crateCorpus.c); // an array of (Number) indices for the deprecated items @@ -3088,12 +3096,8 @@ ${item.displayPath}${name}\ word = itemNames[i].toLowerCase(); } const path = itemPaths.has(i) ? itemPaths.get(i) : lastPath; - let type = null; - if (itemFunctionSearchTypes[i] !== 0) { - type = buildFunctionSearchType( - itemFunctionSearchTypes[i], - lowercasePaths - ); + const type = buildFunctionSearchType(itemFunctionDecoder, lowercasePaths); + if (type !== null) { if (type) { const fp = functionTypeFingerprint.subarray(id * 4, (id + 1) * 4); const fps = new Set();