From 636536bebb7ca68590ad67bdb75d35435f024d93 Mon Sep 17 00:00:00 2001 From: Alex Hornby Date: Thu, 26 Nov 2020 08:06:30 -0800 Subject: [PATCH] mononoke: add SkeletonManifestMapping to walker Summary: So we can scrub and inspect them Reviewed By: StanislavGlebik Differential Revision: D25120995 fbshipit-source-id: d150e55f0d72f584c15dbaf2bd017d19130312b2 --- .../integration/test-walker-count-objects.t | 22 ++++++++++- eden/mononoke/walker/Cargo.toml | 1 + eden/mononoke/walker/src/corpus.rs | 1 + eden/mononoke/walker/src/graph.rs | 13 ++++++- eden/mononoke/walker/src/parse_node.rs | 10 +++++ eden/mononoke/walker/src/sampling.rs | 1 + eden/mononoke/walker/src/setup.rs | 2 + eden/mononoke/walker/src/state.rs | 18 +++++++++ eden/mononoke/walker/src/walk.rs | 38 +++++++++++++++++++ 9 files changed, 102 insertions(+), 4 deletions(-) diff --git a/eden/mononoke/tests/integration/test-walker-count-objects.t b/eden/mononoke/tests/integration/test-walker-count-objects.t index e5d9d2b4c5b1a..dd3451c0830a0 100644 --- a/eden/mononoke/tests/integration/test-walker-count-objects.t +++ b/eden/mononoke/tests/integration/test-walker-count-objects.t @@ -15,7 +15,7 @@ setup configuration │ o A [draft;rev=0;426bada5c675] $ - $ blobimport repo-hg/.hg repo --derived-data-type=blame --derived-data-type=changeset_info --derived-data-type=deleted_manifest --derived-data-type=fsnodes --derived-data-type=unodes + $ blobimport repo-hg/.hg repo --derived-data-type=blame --derived-data-type=changeset_info --derived-data-type=deleted_manifest --derived-data-type=fsnodes --derived-data-type=skeleton_manifests --derived-data-type=unodes check blobstore numbers, walk will do some more steps for mappings $ BLOBPREFIX="$TESTTMP/blobstore/blobs/blob-repo0000" @@ -27,7 +27,7 @@ check blobstore numbers, walk will do some more steps for mappings 12 $ BLOBCOUNT=$(ls $BLOBPREFIX.* | grep -v .alias. | wc -l) $ echo "$BLOBCOUNT" - 49 + 55 count-objects, bonsai core data. total nodes is BONSAICOUNT plus one for the root bookmark step. $ mononoke_walker --readonly-storage scrub -q --bookmark master_bookmark -I bonsai 2>&1 | strip_glog @@ -154,3 +154,21 @@ count-objects, deep walk across deleted files manifest Final count: (8, 8) Bytes/s,* (glob) * Type:Walked,Checks,Children Bookmark:1,1,1 Changeset:3,* DeletedManifest:1,* DeletedManifestMapping:3,* (glob) + +count-objects, shallow walk across skeleton manifest + $ mononoke_walker --readonly-storage scrub -q --bookmark master_bookmark -I shallow -i bonsai -i derived_skeleton_manifests -X ChangesetToFileContent 2>&1 | strip_glog + Walking roots * (glob) + Walking edge types [BookmarkToChangeset, ChangesetToSkeletonManifestMapping] + Walking node types [Bookmark, Changeset, SkeletonManifestMapping] + Final count: (3, 3) + Bytes/s,* (glob) + * Type:Walked,Checks,Children Bookmark:1,1,1 Changeset:1,* SkeletonManifestMapping:1,* (glob) + +count-objects, deep walk across skeleton manifest + $ mononoke_walker --readonly-storage scrub -q --bookmark master_bookmark -I deep -i bonsai -i derived_skeleton_manifests 2>&1 | strip_glog + Walking roots * (glob) + Walking edge types [BookmarkToChangeset, ChangesetToBonsaiParent, ChangesetToSkeletonManifestMapping] + Walking node types [Bookmark, Changeset, SkeletonManifestMapping] + Final count: (7, 7) + Bytes/s,* (glob) + * Type:Walked,Checks,Children Bookmark:1,1,1 Changeset:3,* SkeletonManifestMapping:3,* (glob) diff --git a/eden/mononoke/walker/Cargo.toml b/eden/mononoke/walker/Cargo.toml index ae87f3ab5bbde..e96f6b88cfbdc 100644 --- a/eden/mononoke/walker/Cargo.toml +++ b/eden/mononoke/walker/Cargo.toml @@ -35,6 +35,7 @@ phases = { path = "../phases" } prefixblob = { path = "../blobstore/prefixblob" } samplingblob = { path = "../blobstore/samplingblob" } scuba_ext = { path = "../common/scuba_ext" } +skeleton_manifest = { path = "../derived_data/skeleton_manifest" } sql_ext = { path = "../common/rust/sql_ext" } unodes = { path = "../derived_data/unodes" } async_compression = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } diff --git a/eden/mononoke/walker/src/corpus.rs b/eden/mononoke/walker/src/corpus.rs index 5c3bd44aa2743..1cbc6c1894a8b 100644 --- a/eden/mononoke/walker/src/corpus.rs +++ b/eden/mononoke/walker/src/corpus.rs @@ -174,6 +174,7 @@ fn dump_with_extension(node_type: NodeType) -> bool { NodeType::DeletedManifestMapping => false, NodeType::Fsnode => false, NodeType::FsnodeMapping => false, + NodeType::SkeletonManifestMapping => false, NodeType::UnodeFile => false, NodeType::UnodeManifest => false, NodeType::UnodeMapping => false, diff --git a/eden/mononoke/walker/src/graph.rs b/eden/mononoke/walker/src/graph.rs index 4e258f7f07888..c3af5e291f445 100644 --- a/eden/mononoke/walker/src/graph.rs +++ b/eden/mononoke/walker/src/graph.rs @@ -30,10 +30,11 @@ use mononoke_types::{ fsnode::Fsnode, unode::{FileUnode, ManifestUnode}, BlameId, BonsaiChangeset, ChangesetId, ContentId, ContentMetadata, DeletedManifestId, - FileUnodeId, FsnodeId, MPath, MPathHash, ManifestUnodeId, MononokeId, + FileUnodeId, FsnodeId, MPath, MPathHash, ManifestUnodeId, MononokeId, SkeletonManifestId, }; use once_cell::sync::OnceCell; use phases::Phase; +use skeleton_manifest::RootSkeletonManifestId; use std::{ fmt, hash::{Hash, Hasher}, @@ -232,6 +233,7 @@ create_graph!( DeletedManifestMapping, Fsnode, FsnodeMapping, + SkeletonManifestMapping, UnodeFile, UnodeManifest, UnodeMapping @@ -251,6 +253,7 @@ create_graph!( ChangesetInfoMapping, DeletedManifestMapping, FsnodeMapping, + SkeletonManifestMapping, UnodeMapping ] ), @@ -324,6 +327,7 @@ create_graph!( [ChildFsnode(Fsnode), FileContent] ), (FsnodeMapping, ChangesetId, [RootFsnode(Fsnode)]), + (SkeletonManifestMapping, ChangesetId, []), ( UnodeFile, PathKey>, @@ -374,6 +378,7 @@ impl NodeType { NodeType::DeletedManifestMapping => Some(RootDeletedManifestId::NAME), NodeType::Fsnode => Some(RootFsnodeId::NAME), NodeType::FsnodeMapping => Some(RootFsnodeId::NAME), + NodeType::SkeletonManifestMapping => Some(RootSkeletonManifestId::NAME), NodeType::UnodeFile => Some(RootUnodeManifestId::NAME), NodeType::UnodeManifest => Some(RootUnodeManifestId::NAME), NodeType::UnodeMapping => Some(RootUnodeManifestId::NAME), @@ -530,6 +535,7 @@ pub enum NodeData { DeletedManifestMapping(Option), Fsnode(Fsnode), FsnodeMapping(Option), + SkeletonManifestMapping(Option), UnodeFile(FileUnode), UnodeManifest(ManifestUnode), UnodeMapping(Option), @@ -563,6 +569,7 @@ impl Node { Node::DeletedManifestMapping(k) => k.blobstore_key(), Node::Fsnode(PathKey { id, path: _ }) => id.blobstore_key(), Node::FsnodeMapping(k) => k.blobstore_key(), + Node::SkeletonManifestMapping(k) => k.blobstore_key(), Node::UnodeFile(PathKey { id, path: _ }) => id.blobstore_key(), Node::UnodeManifest(PathKey { id, path: _ }) => id.blobstore_key(), Node::UnodeMapping(k) => k.blobstore_key(), @@ -596,6 +603,7 @@ impl Node { Node::DeletedManifestMapping(_) => None, Node::Fsnode(PathKey { id: _, path }) => Some(&path), Node::FsnodeMapping(_) => None, + Node::SkeletonManifestMapping(_) => None, Node::UnodeFile(PathKey { id: _, path }) => Some(&path), Node::UnodeManifest(PathKey { id: _, path }) => Some(&path), Node::UnodeMapping(_) => None, @@ -630,6 +638,7 @@ impl Node { Node::DeletedManifestMapping(k) => Some(k.sampling_fingerprint()), Node::Fsnode(PathKey { id, path: _ }) => Some(id.sampling_fingerprint()), Node::FsnodeMapping(k) => Some(k.sampling_fingerprint()), + Node::SkeletonManifestMapping(k) => Some(k.sampling_fingerprint()), Node::UnodeFile(PathKey { id, path: _ }) => Some(id.sampling_fingerprint()), Node::UnodeManifest(PathKey { id, path: _ }) => Some(id.sampling_fingerprint()), Node::UnodeMapping(k) => Some(k.sampling_fingerprint()), @@ -721,7 +730,7 @@ mod tests { // list, otherwise it won't get scrubbed and thus you would be unaware of different representation // in different stores let grandfathered: HashSet<&'static str> = - HashSet::from_iter(vec!["fastlog", "git_trees", "skeleton_manifests"].into_iter()); + HashSet::from_iter(vec!["fastlog", "git_trees"].into_iter()); let mut missing = HashSet::new(); for t in &a { if s.contains(t.as_str()) { diff --git a/eden/mononoke/walker/src/parse_node.rs b/eden/mononoke/walker/src/parse_node.rs index d71ea62bd71e9..4dd295803e6b7 100644 --- a/eden/mononoke/walker/src/parse_node.rs +++ b/eden/mononoke/walker/src/parse_node.rs @@ -323,6 +323,16 @@ mod tests { .get_type() ); } + NodeType::SkeletonManifestMapping => { + assert_eq!( + node_type, + &parse_node(&format!( + "SkeletonManifestMapping{}{}", + NODE_SEP, SAMPLE_BLAKE2 + ))? + .get_type() + ); + } NodeType::UnodeFile => { assert_eq!( node_type, diff --git a/eden/mononoke/walker/src/sampling.rs b/eden/mononoke/walker/src/sampling.rs index ad45ea564bed2..a1d67a9b76368 100644 --- a/eden/mononoke/walker/src/sampling.rs +++ b/eden/mononoke/walker/src/sampling.rs @@ -120,6 +120,7 @@ fn filter_repo_path(node_type: NodeType, path: Option<&'_ WrappedPath>) -> Optio NodeType::DeletedManifestMapping => None, NodeType::Fsnode => path, NodeType::FsnodeMapping => None, + NodeType::SkeletonManifestMapping => None, NodeType::UnodeFile => path, NodeType::UnodeManifest => path, NodeType::UnodeMapping => None, diff --git a/eden/mononoke/walker/src/setup.rs b/eden/mononoke/walker/src/setup.rs index 5a4f348c45b0a..563a69917b3bd 100644 --- a/eden/mononoke/walker/src/setup.rs +++ b/eden/mononoke/walker/src/setup.rs @@ -153,6 +153,7 @@ const DEEP_INCLUDE_EDGE_TYPES: &[EdgeType] = &[ EdgeType::ChangesetToChangesetInfoMapping, EdgeType::ChangesetToDeletedManifestMapping, EdgeType::ChangesetToFsnodeMapping, + EdgeType::ChangesetToSkeletonManifestMapping, EdgeType::ChangesetToUnodeMapping, // Hg EdgeType::HgBonsaiMappingToChangeset, @@ -205,6 +206,7 @@ const SHALLOW_INCLUDE_EDGE_TYPES: &[EdgeType] = &[ EdgeType::ChangesetToChangesetInfoMapping, EdgeType::ChangesetToDeletedManifestMapping, EdgeType::ChangesetToFsnodeMapping, + EdgeType::ChangesetToSkeletonManifestMapping, EdgeType::ChangesetToUnodeMapping, // Hg EdgeType::HgBonsaiMappingToChangeset, diff --git a/eden/mononoke/walker/src/state.rs b/eden/mononoke/walker/src/state.rs index 0e98409270ce9..66f2c08e1eb08 100644 --- a/eden/mononoke/walker/src/state.rs +++ b/eden/mononoke/walker/src/state.rs @@ -178,6 +178,7 @@ pub struct WalkState { visited_deleted_manifest_mapping: StateMap>, visited_fsnode: StateMap<(InternedId>, InternedId)>, visited_fsnode_mapping: StateMap>, + visited_skeleton_manifest_mapping: StateMap>, visited_unode_file: StateMap<(InternedId>, UnodeInterned)>, visited_unode_manifest: StateMap<( InternedId>, @@ -231,6 +232,7 @@ impl WalkState { visited_deleted_manifest_mapping: StateMap::with_hasher(fac.clone()), visited_fsnode: StateMap::with_hasher(fac.clone()), visited_fsnode_mapping: StateMap::with_hasher(fac.clone()), + visited_skeleton_manifest_mapping: StateMap::with_hasher(fac.clone()), visited_unode_file: StateMap::with_hasher(fac.clone()), visited_unode_manifest: StateMap::with_hasher(fac.clone()), visited_unode_mapping: StateMap::with_hasher(fac), @@ -302,6 +304,15 @@ impl WalkState { (Node::FsnodeMapping(bcs_id), Some(NodeData::FsnodeMapping(Some(_)))) => { self.record(&self.visited_fsnode_mapping, &self.bcs_ids.interned(bcs_id)); } + ( + Node::SkeletonManifestMapping(bcs_id), + Some(NodeData::SkeletonManifestMapping(Some(_))), + ) => { + self.record( + &self.visited_skeleton_manifest_mapping, + &self.bcs_ids.interned(bcs_id), + ); + } (Node::UnodeMapping(bcs_id), Some(NodeData::UnodeMapping(Some(_)))) => { self.record(&self.visited_unode_mapping, &self.bcs_ids.interned(bcs_id)); } @@ -452,6 +463,13 @@ impl VisitOne for WalkState { true } } + Node::SkeletonManifestMapping(bcs_id) => { + if let Some(id) = self.bcs_ids.get(bcs_id) { + !self.visited_skeleton_manifest_mapping.contains_key(&id) // Does not insert, see record_resolved_visit + } else { + true + } + } Node::UnodeFile(k) => self.record_with_path( &self.visited_unode_file, ( diff --git a/eden/mononoke/walker/src/walk.rs b/eden/mononoke/walker/src/walk.rs index 0ab3431f38c06..79acc6ef4ebc5 100644 --- a/eden/mononoke/walker/src/walk.rs +++ b/eden/mononoke/walker/src/walk.rs @@ -43,6 +43,7 @@ use mononoke_types::{ }; use phases::{HeadsFetcher, Phase, Phases}; use scuba_ext::ScubaSampleBuilder; +use skeleton_manifest::RootSkeletonManifestId; use slog::warn; use std::{ collections::{HashMap, HashSet}, @@ -338,6 +339,12 @@ async fn bonsai_changeset_step( checker.add_edge(&mut edges, EdgeType::ChangesetToFsnodeMapping, || { Node::FsnodeMapping(*bcs_id) }); + // Skeleton manifest mapping is 1:1 but from their expands less than unodes + checker.add_edge( + &mut edges, + EdgeType::ChangesetToSkeletonManifestMapping, + || Node::SkeletonManifestMapping(*bcs_id), + ); // Deleted manifest mapping is 1:1 but from their expands less than unodes checker.add_edge( &mut edges, @@ -1057,6 +1064,34 @@ async fn deleted_manifest_mapping_step( } } +async fn skeleton_manifest_mapping_step( + ctx: &CoreContext, + repo: &BlobRepo, + checker: &Checker, + bcs_id: ChangesetId, + enable_derive: bool, +) -> Result { + let root_manifest_id = + maybe_derived::(ctx, repo, bcs_id, enable_derive).await?; + + if let Some(root_manifest_id) = root_manifest_id { + let edges = vec![]; + Ok(StepOutput( + checker.step_data(NodeType::SkeletonManifestMapping, || { + NodeData::SkeletonManifestMapping(Some(*root_manifest_id.skeleton_manifest_id())) + }), + edges, + )) + } else { + Ok(StepOutput( + checker.step_data(NodeType::SkeletonManifestMapping, || { + NodeData::SkeletonManifestMapping(None) + }), + vec![], + )) + } +} + /// Expand nodes where check for a type is used as a check for other types. /// e.g. to make sure metadata looked up/considered for files. pub fn expand_checked_nodes(children: &mut Vec) -> () { @@ -1388,6 +1423,9 @@ where Node::FsnodeMapping(bcs_id) => { bonsai_to_fsnode_mapping_step(&ctx, &repo, &checker, bcs_id, enable_derive).await } + Node::SkeletonManifestMapping(bcs_id) => { + skeleton_manifest_mapping_step(&ctx, &repo, &checker, bcs_id, enable_derive).await + } Node::UnodeFile(PathKey { id, path }) => { unode_file_step(&ctx, &repo, &checker, path, id).await }