Skip to content

Commit

Permalink
mononoke: add SkeletonManifestMapping to walker
Browse files Browse the repository at this point in the history
Summary: So we can scrub and inspect them

Reviewed By: StanislavGlebik

Differential Revision: D25120995

fbshipit-source-id: d150e55f0d72f584c15dbaf2bd017d19130312b2
  • Loading branch information
ahornby authored and facebook-github-bot committed Nov 26, 2020
1 parent 3d03808 commit 636536b
Show file tree
Hide file tree
Showing 9 changed files with 102 additions and 4 deletions.
22 changes: 20 additions & 2 deletions eden/mononoke/tests/integration/test-walker-count-objects.t
Expand Up @@ -15,7 +15,7 @@ setup configuration
o A [draft;rev=0;426bada5c675]
$
$ blobimport repo-hg/.hg repo --derived-data-type=blame --derived-data-type=changeset_info --derived-data-type=deleted_manifest --derived-data-type=fsnodes --derived-data-type=unodes
$ blobimport repo-hg/.hg repo --derived-data-type=blame --derived-data-type=changeset_info --derived-data-type=deleted_manifest --derived-data-type=fsnodes --derived-data-type=skeleton_manifests --derived-data-type=unodes

check blobstore numbers, walk will do some more steps for mappings
$ BLOBPREFIX="$TESTTMP/blobstore/blobs/blob-repo0000"
Expand All @@ -27,7 +27,7 @@ check blobstore numbers, walk will do some more steps for mappings
12
$ BLOBCOUNT=$(ls $BLOBPREFIX.* | grep -v .alias. | wc -l)
$ echo "$BLOBCOUNT"
49
55

count-objects, bonsai core data. total nodes is BONSAICOUNT plus one for the root bookmark step.
$ mononoke_walker --readonly-storage scrub -q --bookmark master_bookmark -I bonsai 2>&1 | strip_glog
Expand Down Expand Up @@ -154,3 +154,21 @@ count-objects, deep walk across deleted files manifest
Final count: (8, 8)
Bytes/s,* (glob)
* Type:Walked,Checks,Children Bookmark:1,1,1 Changeset:3,* DeletedManifest:1,* DeletedManifestMapping:3,* (glob)

count-objects, shallow walk across skeleton manifest
$ mononoke_walker --readonly-storage scrub -q --bookmark master_bookmark -I shallow -i bonsai -i derived_skeleton_manifests -X ChangesetToFileContent 2>&1 | strip_glog
Walking roots * (glob)
Walking edge types [BookmarkToChangeset, ChangesetToSkeletonManifestMapping]
Walking node types [Bookmark, Changeset, SkeletonManifestMapping]
Final count: (3, 3)
Bytes/s,* (glob)
* Type:Walked,Checks,Children Bookmark:1,1,1 Changeset:1,* SkeletonManifestMapping:1,* (glob)

count-objects, deep walk across skeleton manifest
$ mononoke_walker --readonly-storage scrub -q --bookmark master_bookmark -I deep -i bonsai -i derived_skeleton_manifests 2>&1 | strip_glog
Walking roots * (glob)
Walking edge types [BookmarkToChangeset, ChangesetToBonsaiParent, ChangesetToSkeletonManifestMapping]
Walking node types [Bookmark, Changeset, SkeletonManifestMapping]
Final count: (7, 7)
Bytes/s,* (glob)
* Type:Walked,Checks,Children Bookmark:1,1,1 Changeset:3,* SkeletonManifestMapping:3,* (glob)
1 change: 1 addition & 0 deletions eden/mononoke/walker/Cargo.toml
Expand Up @@ -35,6 +35,7 @@ phases = { path = "../phases" }
prefixblob = { path = "../blobstore/prefixblob" }
samplingblob = { path = "../blobstore/samplingblob" }
scuba_ext = { path = "../common/scuba_ext" }
skeleton_manifest = { path = "../derived_data/skeleton_manifest" }
sql_ext = { path = "../common/rust/sql_ext" }
unodes = { path = "../derived_data/unodes" }
async_compression = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" }
Expand Down
1 change: 1 addition & 0 deletions eden/mononoke/walker/src/corpus.rs
Expand Up @@ -174,6 +174,7 @@ fn dump_with_extension(node_type: NodeType) -> bool {
NodeType::DeletedManifestMapping => false,
NodeType::Fsnode => false,
NodeType::FsnodeMapping => false,
NodeType::SkeletonManifestMapping => false,
NodeType::UnodeFile => false,
NodeType::UnodeManifest => false,
NodeType::UnodeMapping => false,
Expand Down
13 changes: 11 additions & 2 deletions eden/mononoke/walker/src/graph.rs
Expand Up @@ -30,10 +30,11 @@ use mononoke_types::{
fsnode::Fsnode,
unode::{FileUnode, ManifestUnode},
BlameId, BonsaiChangeset, ChangesetId, ContentId, ContentMetadata, DeletedManifestId,
FileUnodeId, FsnodeId, MPath, MPathHash, ManifestUnodeId, MononokeId,
FileUnodeId, FsnodeId, MPath, MPathHash, ManifestUnodeId, MononokeId, SkeletonManifestId,
};
use once_cell::sync::OnceCell;
use phases::Phase;
use skeleton_manifest::RootSkeletonManifestId;
use std::{
fmt,
hash::{Hash, Hasher},
Expand Down Expand Up @@ -232,6 +233,7 @@ create_graph!(
DeletedManifestMapping,
Fsnode,
FsnodeMapping,
SkeletonManifestMapping,
UnodeFile,
UnodeManifest,
UnodeMapping
Expand All @@ -251,6 +253,7 @@ create_graph!(
ChangesetInfoMapping,
DeletedManifestMapping,
FsnodeMapping,
SkeletonManifestMapping,
UnodeMapping
]
),
Expand Down Expand Up @@ -324,6 +327,7 @@ create_graph!(
[ChildFsnode(Fsnode), FileContent]
),
(FsnodeMapping, ChangesetId, [RootFsnode(Fsnode)]),
(SkeletonManifestMapping, ChangesetId, []),
(
UnodeFile,
PathKey<UnodeKey<FileUnodeId>>,
Expand Down Expand Up @@ -374,6 +378,7 @@ impl NodeType {
NodeType::DeletedManifestMapping => Some(RootDeletedManifestId::NAME),
NodeType::Fsnode => Some(RootFsnodeId::NAME),
NodeType::FsnodeMapping => Some(RootFsnodeId::NAME),
NodeType::SkeletonManifestMapping => Some(RootSkeletonManifestId::NAME),
NodeType::UnodeFile => Some(RootUnodeManifestId::NAME),
NodeType::UnodeManifest => Some(RootUnodeManifestId::NAME),
NodeType::UnodeMapping => Some(RootUnodeManifestId::NAME),
Expand Down Expand Up @@ -530,6 +535,7 @@ pub enum NodeData {
DeletedManifestMapping(Option<DeletedManifestId>),
Fsnode(Fsnode),
FsnodeMapping(Option<FsnodeId>),
SkeletonManifestMapping(Option<SkeletonManifestId>),
UnodeFile(FileUnode),
UnodeManifest(ManifestUnode),
UnodeMapping(Option<ManifestUnodeId>),
Expand Down Expand Up @@ -563,6 +569,7 @@ impl Node {
Node::DeletedManifestMapping(k) => k.blobstore_key(),
Node::Fsnode(PathKey { id, path: _ }) => id.blobstore_key(),
Node::FsnodeMapping(k) => k.blobstore_key(),
Node::SkeletonManifestMapping(k) => k.blobstore_key(),
Node::UnodeFile(PathKey { id, path: _ }) => id.blobstore_key(),
Node::UnodeManifest(PathKey { id, path: _ }) => id.blobstore_key(),
Node::UnodeMapping(k) => k.blobstore_key(),
Expand Down Expand Up @@ -596,6 +603,7 @@ impl Node {
Node::DeletedManifestMapping(_) => None,
Node::Fsnode(PathKey { id: _, path }) => Some(&path),
Node::FsnodeMapping(_) => None,
Node::SkeletonManifestMapping(_) => None,
Node::UnodeFile(PathKey { id: _, path }) => Some(&path),
Node::UnodeManifest(PathKey { id: _, path }) => Some(&path),
Node::UnodeMapping(_) => None,
Expand Down Expand Up @@ -630,6 +638,7 @@ impl Node {
Node::DeletedManifestMapping(k) => Some(k.sampling_fingerprint()),
Node::Fsnode(PathKey { id, path: _ }) => Some(id.sampling_fingerprint()),
Node::FsnodeMapping(k) => Some(k.sampling_fingerprint()),
Node::SkeletonManifestMapping(k) => Some(k.sampling_fingerprint()),
Node::UnodeFile(PathKey { id, path: _ }) => Some(id.sampling_fingerprint()),
Node::UnodeManifest(PathKey { id, path: _ }) => Some(id.sampling_fingerprint()),
Node::UnodeMapping(k) => Some(k.sampling_fingerprint()),
Expand Down Expand Up @@ -721,7 +730,7 @@ mod tests {
// list, otherwise it won't get scrubbed and thus you would be unaware of different representation
// in different stores
let grandfathered: HashSet<&'static str> =
HashSet::from_iter(vec!["fastlog", "git_trees", "skeleton_manifests"].into_iter());
HashSet::from_iter(vec!["fastlog", "git_trees"].into_iter());
let mut missing = HashSet::new();
for t in &a {
if s.contains(t.as_str()) {
Expand Down
10 changes: 10 additions & 0 deletions eden/mononoke/walker/src/parse_node.rs
Expand Up @@ -323,6 +323,16 @@ mod tests {
.get_type()
);
}
NodeType::SkeletonManifestMapping => {
assert_eq!(
node_type,
&parse_node(&format!(
"SkeletonManifestMapping{}{}",
NODE_SEP, SAMPLE_BLAKE2
))?
.get_type()
);
}
NodeType::UnodeFile => {
assert_eq!(
node_type,
Expand Down
1 change: 1 addition & 0 deletions eden/mononoke/walker/src/sampling.rs
Expand Up @@ -120,6 +120,7 @@ fn filter_repo_path(node_type: NodeType, path: Option<&'_ WrappedPath>) -> Optio
NodeType::DeletedManifestMapping => None,
NodeType::Fsnode => path,
NodeType::FsnodeMapping => None,
NodeType::SkeletonManifestMapping => None,
NodeType::UnodeFile => path,
NodeType::UnodeManifest => path,
NodeType::UnodeMapping => None,
Expand Down
2 changes: 2 additions & 0 deletions eden/mononoke/walker/src/setup.rs
Expand Up @@ -153,6 +153,7 @@ const DEEP_INCLUDE_EDGE_TYPES: &[EdgeType] = &[
EdgeType::ChangesetToChangesetInfoMapping,
EdgeType::ChangesetToDeletedManifestMapping,
EdgeType::ChangesetToFsnodeMapping,
EdgeType::ChangesetToSkeletonManifestMapping,
EdgeType::ChangesetToUnodeMapping,
// Hg
EdgeType::HgBonsaiMappingToChangeset,
Expand Down Expand Up @@ -205,6 +206,7 @@ const SHALLOW_INCLUDE_EDGE_TYPES: &[EdgeType] = &[
EdgeType::ChangesetToChangesetInfoMapping,
EdgeType::ChangesetToDeletedManifestMapping,
EdgeType::ChangesetToFsnodeMapping,
EdgeType::ChangesetToSkeletonManifestMapping,
EdgeType::ChangesetToUnodeMapping,
// Hg
EdgeType::HgBonsaiMappingToChangeset,
Expand Down
18 changes: 18 additions & 0 deletions eden/mononoke/walker/src/state.rs
Expand Up @@ -178,6 +178,7 @@ pub struct WalkState {
visited_deleted_manifest_mapping: StateMap<InternedId<ChangesetId>>,
visited_fsnode: StateMap<(InternedId<Option<MPathHash>>, InternedId<FsnodeId>)>,
visited_fsnode_mapping: StateMap<InternedId<ChangesetId>>,
visited_skeleton_manifest_mapping: StateMap<InternedId<ChangesetId>>,
visited_unode_file: StateMap<(InternedId<Option<MPathHash>>, UnodeInterned<FileUnodeId>)>,
visited_unode_manifest: StateMap<(
InternedId<Option<MPathHash>>,
Expand Down Expand Up @@ -231,6 +232,7 @@ impl WalkState {
visited_deleted_manifest_mapping: StateMap::with_hasher(fac.clone()),
visited_fsnode: StateMap::with_hasher(fac.clone()),
visited_fsnode_mapping: StateMap::with_hasher(fac.clone()),
visited_skeleton_manifest_mapping: StateMap::with_hasher(fac.clone()),
visited_unode_file: StateMap::with_hasher(fac.clone()),
visited_unode_manifest: StateMap::with_hasher(fac.clone()),
visited_unode_mapping: StateMap::with_hasher(fac),
Expand Down Expand Up @@ -302,6 +304,15 @@ impl WalkState {
(Node::FsnodeMapping(bcs_id), Some(NodeData::FsnodeMapping(Some(_)))) => {
self.record(&self.visited_fsnode_mapping, &self.bcs_ids.interned(bcs_id));
}
(
Node::SkeletonManifestMapping(bcs_id),
Some(NodeData::SkeletonManifestMapping(Some(_))),
) => {
self.record(
&self.visited_skeleton_manifest_mapping,
&self.bcs_ids.interned(bcs_id),
);
}
(Node::UnodeMapping(bcs_id), Some(NodeData::UnodeMapping(Some(_)))) => {
self.record(&self.visited_unode_mapping, &self.bcs_ids.interned(bcs_id));
}
Expand Down Expand Up @@ -452,6 +463,13 @@ impl VisitOne for WalkState {
true
}
}
Node::SkeletonManifestMapping(bcs_id) => {
if let Some(id) = self.bcs_ids.get(bcs_id) {
!self.visited_skeleton_manifest_mapping.contains_key(&id) // Does not insert, see record_resolved_visit
} else {
true
}
}
Node::UnodeFile(k) => self.record_with_path(
&self.visited_unode_file,
(
Expand Down
38 changes: 38 additions & 0 deletions eden/mononoke/walker/src/walk.rs
Expand Up @@ -43,6 +43,7 @@ use mononoke_types::{
};
use phases::{HeadsFetcher, Phase, Phases};
use scuba_ext::ScubaSampleBuilder;
use skeleton_manifest::RootSkeletonManifestId;
use slog::warn;
use std::{
collections::{HashMap, HashSet},
Expand Down Expand Up @@ -338,6 +339,12 @@ async fn bonsai_changeset_step<V: VisitOne>(
checker.add_edge(&mut edges, EdgeType::ChangesetToFsnodeMapping, || {
Node::FsnodeMapping(*bcs_id)
});
// Skeleton manifest mapping is 1:1 but from their expands less than unodes
checker.add_edge(
&mut edges,
EdgeType::ChangesetToSkeletonManifestMapping,
|| Node::SkeletonManifestMapping(*bcs_id),
);
// Deleted manifest mapping is 1:1 but from their expands less than unodes
checker.add_edge(
&mut edges,
Expand Down Expand Up @@ -1057,6 +1064,34 @@ async fn deleted_manifest_mapping_step<V: VisitOne>(
}
}

async fn skeleton_manifest_mapping_step<V: VisitOne>(
ctx: &CoreContext,
repo: &BlobRepo,
checker: &Checker<V>,
bcs_id: ChangesetId,
enable_derive: bool,
) -> Result<StepOutput, Error> {
let root_manifest_id =
maybe_derived::<RootSkeletonManifestId>(ctx, repo, bcs_id, enable_derive).await?;

if let Some(root_manifest_id) = root_manifest_id {
let edges = vec![];
Ok(StepOutput(
checker.step_data(NodeType::SkeletonManifestMapping, || {
NodeData::SkeletonManifestMapping(Some(*root_manifest_id.skeleton_manifest_id()))
}),
edges,
))
} else {
Ok(StepOutput(
checker.step_data(NodeType::SkeletonManifestMapping, || {
NodeData::SkeletonManifestMapping(None)
}),
vec![],
))
}
}

/// Expand nodes where check for a type is used as a check for other types.
/// e.g. to make sure metadata looked up/considered for files.
pub fn expand_checked_nodes(children: &mut Vec<OutgoingEdge>) -> () {
Expand Down Expand Up @@ -1388,6 +1423,9 @@ where
Node::FsnodeMapping(bcs_id) => {
bonsai_to_fsnode_mapping_step(&ctx, &repo, &checker, bcs_id, enable_derive).await
}
Node::SkeletonManifestMapping(bcs_id) => {
skeleton_manifest_mapping_step(&ctx, &repo, &checker, bcs_id, enable_derive).await
}
Node::UnodeFile(PathKey { id, path }) => {
unode_file_step(&ctx, &repo, &checker, path, id).await
}
Expand Down

0 comments on commit 636536b

Please sign in to comment.