From 9cff2a0cf66f037bf10866b7787496c6cf02ad94 Mon Sep 17 00:00:00 2001 From: Tom Clarkson Date: Mon, 16 Dec 2019 09:58:38 +1100 Subject: [PATCH 1/7] subtree: handle multiple parents passed to cache_miss Signed-off-by: Tom Clarkson --- contrib/subtree/git-subtree.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index 868e18b9a1ab85..9867718503c948 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -238,7 +238,7 @@ cache_miss () { } check_parents () { - missed=$(cache_miss "$1") + missed=$(cache_miss $1) local indent=$(($2 + 1)) for miss in $missed do From 79b5f4a65197cea26ddc080c19dd2c5c7d424fc1 Mon Sep 17 00:00:00 2001 From: Tom Clarkson Date: Mon, 16 Dec 2019 10:02:53 +1100 Subject: [PATCH 2/7] subtree: exclude commits predating add from recursive processing Include recursion depth in debug logs so we can see when the recursion is getting out of hand. Making the cache handle null mappings correctly and adding older commits to the cache allows the recursive algorithm to terminate at any point on mainline rather than needing to reach either the add point or the initial commit. Signed-off-by: Tom Clarkson --- contrib/subtree/git-subtree.sh | 35 +++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index 9867718503c948..160bad95c1a949 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -244,7 +244,7 @@ check_parents () { do if ! test -r "$cachedir/notree/$miss" then - debug " incorrect order: $miss" + debug " unprocessed parent commit: $miss ($indent)" process_split_commit "$miss" "" "$indent" fi done @@ -392,6 +392,24 @@ find_existing_splits () { done } +find_mainline_ref () { + debug "Looking for first split..." + dir="$1" + revs="$2" + + git log --reverse --grep="^git-subtree-dir: $dir/*\$" \ + --no-show-signature --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs | + while read a b junk + do + case "$a" in + git-subtree-mainline:) + echo "$b" + return + ;; + esac + done +} + copy_commit () { # We're going to set some environment vars here, so # do it in a subshell to get rid of them safely later @@ -646,9 +664,9 @@ process_split_commit () { progress "$revcount/$revmax ($createcount) [$extracount]" - debug "Processing commit: $rev" + debug "Processing commit: $rev ($indent)" exists=$(cache_get "$rev") - if test -n "$exists" + if test -z "$(cache_miss "$rev")" then debug " prior: $exists" return @@ -773,6 +791,17 @@ cmd_split () { unrevs="$(find_existing_splits "$dir" "$revs")" + mainline="$(find_mainline_ref "$dir" "$revs")" + if test -n "$mainline" + then + debug "Mainline $mainline predates subtree add" + git rev-list --topo-order --skip=1 $mainline | + while read rev + do + cache_set "$rev" "" + done || exit $? + fi + # We can't restrict rev-list to only $dir here, because some of our # parents have the $dir contents the root, and those won't match. # (and rev-list --follow doesn't seem to solve this) From 8eec18388c86071db47512b84118e3b9111bd34d Mon Sep 17 00:00:00 2001 From: Tom Clarkson Date: Thu, 19 Dec 2019 14:10:58 +1100 Subject: [PATCH 3/7] subtree: persist cache between split runs Provide a mechanism for handling problematic commits. If the algorithm in process_split_commit is getting something wrong, you can write a corrected value to the cache before running split. Signed-off-by: Tom Clarkson --- contrib/subtree/git-subtree.sh | 37 ++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index 160bad95c1a949..c21d620610c034 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -27,6 +27,7 @@ b,branch= create a new branch from the split subtree ignore-joins ignore prior --rejoin commits onto= try connecting new tree to an existing one rejoin merge the new branch back into HEAD +clear-cache reset the subtree mapping cache options for 'add', 'merge', and 'pull' squash merge subtree changes as a single commit " @@ -48,6 +49,7 @@ annotate= squash= message= prefix= +clearcache= debug () { if test -n "$debug" @@ -131,6 +133,9 @@ do --no-rejoin) rejoin= ;; + --clear-cache) + clearcache=1 + ;; --ignore-joins) ignore_joins=1 ;; @@ -206,9 +211,13 @@ debug "opts: {$*}" debug cache_setup () { - cachedir="$GIT_DIR/subtree-cache/$$" - rm -rf "$cachedir" || - die "Can't delete old cachedir: $cachedir" + cachedir="$GIT_DIR/subtree-cache/$prefix" + if test -n "$clearcache" + then + debug "Clearing cache" + rm -rf "$cachedir" || + die "Can't delete old cachedir: $cachedir" + fi mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir" mkdir -p "$cachedir/notree" || @@ -266,6 +275,16 @@ cache_set () { echo "$newrev" >"$cachedir/$oldrev" } +cache_set_if_unset () { + oldrev="$1" + newrev="$2" + if test -e "$cachedir/$oldrev" + then + return + fi + echo "$newrev" >"$cachedir/$oldrev" +} + rev_exists () { if git rev-parse "$1" >/dev/null 2>&1 then @@ -375,13 +394,13 @@ find_existing_splits () { then # squash commits refer to a subtree debug " Squash: $sq from $sub" - cache_set "$sq" "$sub" + cache_set_if_unset "$sq" "$sub" fi if test -n "$main" -a -n "$sub" then debug " Prior: $main -> $sub" - cache_set $main $sub - cache_set $sub $sub + cache_set_if_unset $main $sub + cache_set_if_unset $sub $sub try_remove_previous "$main" try_remove_previous "$sub" fi @@ -688,6 +707,8 @@ process_split_commit () { if test -n "$newparents" then cache_set "$rev" "$rev" + else + cache_set "$rev" "" fi return fi @@ -785,7 +806,7 @@ cmd_split () { # the 'onto' history is already just the subdir, so # any parent we find there can be used verbatim debug " cache: $rev" - cache_set "$rev" "$rev" + cache_set_if_unset "$rev" "$rev" done fi @@ -798,7 +819,7 @@ cmd_split () { git rev-list --topo-order --skip=1 $mainline | while read rev do - cache_set "$rev" "" + cache_set_if_unset "$rev" "" done || exit $? fi From 1490ce111463b56eed35016c2e6232eea1faa3db Mon Sep 17 00:00:00 2001 From: Tom Clarkson Date: Thu, 19 Dec 2019 14:43:03 +1100 Subject: [PATCH 4/7] subtree: add git subtree map command Adds an entry to the subtree cache so that subsequent split runs can skip any commits that turn out to be problematic. Signed-off-by: Tom Clarkson --- contrib/subtree/git-subtree.sh | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index c21d620610c034..1559100c0e4c4d 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -15,6 +15,7 @@ git subtree merge --prefix= git subtree pull --prefix= git subtree push --prefix= git subtree split --prefix= +git subtree map --prefix= -- h,help show the help q quiet @@ -161,7 +162,7 @@ command="$1" shift case "$command" in -add|merge|pull) +add|merge|pull|map) default= ;; split|push) @@ -192,7 +193,8 @@ dir="$(dirname "$prefix/.")" if test "$command" != "pull" && test "$command" != "add" && - test "$command" != "push" + test "$command" != "push" && + test "$command" != "map" then revs=$(git rev-parse $default --revs-only "$@") || exit $? dirs=$(git rev-parse --no-revs --no-flags "$@") || exit $? @@ -793,6 +795,21 @@ cmd_add_commit () { say "Added dir '$dir'" } +cmd_map () { + oldrev="$1" + newrev="$2" + + if test -z "$oldrev" + then + die "You must provide a revision to map" + fi + + cache_setup || exit $? + cache_set "$oldrev" "$newrev" + + say "Mapped $oldrev => $newrev" +} + cmd_split () { debug "Splitting $dir..." cache_setup || exit $? From 2d103292cecfef3b29d221b3d8f05adb0add4475 Mon Sep 17 00:00:00 2001 From: Tom Clarkson Date: Thu, 19 Dec 2019 15:45:34 +1100 Subject: [PATCH 5/7] subtree: add git subtree use and ignore commands Tell split to use or ignore larger sections of the history. In most cases split does this automatically based on metadata from subtree add. Signed-off-by: Tom Clarkson --- contrib/subtree/git-subtree.sh | 78 ++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 12 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index 1559100c0e4c4d..e56621a986748c 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -9,13 +9,15 @@ then set -- -h fi OPTS_SPEC="\ -git subtree add --prefix= -git subtree add --prefix= -git subtree merge --prefix= -git subtree pull --prefix= -git subtree push --prefix= -git subtree split --prefix= -git subtree map --prefix= +git subtree add --prefix= +git subtree add --prefix= +git subtree merge --prefix= +git subtree pull --prefix= +git subtree push --prefix= +git subtree split --prefix= +git subtree map --prefix= +git subtree ignore --prefix= +git subtree use --prefix= -- h,help show the help q quiet @@ -162,7 +164,7 @@ command="$1" shift case "$command" in -add|merge|pull|map) +add|merge|pull|map|ignore|use) default= ;; split|push) @@ -431,6 +433,18 @@ find_mainline_ref () { done } +exclude_processed_refs () { + if test -r "$cachedir/processed" + then + cat "$cachedir/processed" | + while read rev + do + debug "read $rev" + echo "^$rev" + done + fi +} + copy_commit () { # We're going to set some environment vars here, so # do it in a subshell to get rid of them safely later @@ -796,20 +810,60 @@ cmd_add_commit () { } cmd_map () { - oldrev="$1" - newrev="$2" - if test -z "$oldrev" + if test -z "$1" then die "You must provide a revision to map" fi + oldrev=$(git rev-parse --revs-only "$1") || exit $? + newrev= + + if test -n "$2" + then + newrev=$(git rev-parse --revs-only "$2") || exit $? + fi + cache_setup || exit $? cache_set "$oldrev" "$newrev" say "Mapped $oldrev => $newrev" } +cmd_ignore () { + revs=$(git rev-parse $default --revs-only "$@") || exit $? + ensure_single_rev $revs + + say "Ignoring $revs" + + cache_setup || exit $? + + git rev-list $revs | + while read rev + do + cache_set "$rev" "" + done + + echo "$revs" >>"$cachedir/processed" +} + +cmd_use () { + revs=$(git rev-parse $default --revs-only "$@") || exit $? + ensure_single_rev $revs + + say "Using existing subtree $revs" + + cache_setup || exit $? + + git rev-list $revs | + while read rev + do + cache_set "$rev" "$rev" + done + + echo "$revs" >>"$cachedir/processed" +} + cmd_split () { debug "Splitting $dir..." cache_setup || exit $? @@ -827,7 +881,7 @@ cmd_split () { done fi - unrevs="$(find_existing_splits "$dir" "$revs")" + unrevs="$(find_existing_splits "$dir" "$revs") $(exclude_processed_refs)" mainline="$(find_mainline_ref "$dir" "$revs")" if test -n "$mainline" From a7aaedfed3785c6ca693f60f05e76156f68a5d39 Mon Sep 17 00:00:00 2001 From: Tom Clarkson Date: Fri, 20 Dec 2019 15:26:02 +1100 Subject: [PATCH 6/7] subtree: more robustly distinguish subtree and mainline commits Prevent a mainline commit without $dir being treated as a subtree commit and pulling in the entire mainline history. Any valid subtree commit will have only valid subtree commits as parents, which will be unchanged by check_parents. Signed-off-by: Tom Clarkson --- contrib/subtree/git-subtree.sh | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index e56621a986748c..fa6293b3727ea8 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -224,8 +224,6 @@ cache_setup () { fi mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir" - mkdir -p "$cachedir/notree" || - die "Can't create new cachedir: $cachedir/notree" debug "Using cachedir: $cachedir" >&2 } @@ -255,18 +253,11 @@ check_parents () { local indent=$(($2 + 1)) for miss in $missed do - if ! test -r "$cachedir/notree/$miss" - then - debug " unprocessed parent commit: $miss ($indent)" - process_split_commit "$miss" "" "$indent" - fi + debug " unprocessed parent commit: $miss ($indent)" + process_split_commit "$miss" "" "$indent" done } -set_notree () { - echo "1" > "$cachedir/notree/$1" -} - cache_set () { oldrev="$1" newrev="$2" @@ -719,11 +710,18 @@ process_split_commit () { # vs. a mainline commit? Does it matter? if test -z "$tree" then - set_notree "$rev" if test -n "$newparents" then - cache_set "$rev" "$rev" + if test "$newparents" = "$parents" + then + # if all parents were subtrees, this can be a subtree commit + cache_set "$rev" "$rev" + else + # a mainline commit with tree missing is equivalent to the initial commit + cache_set "$rev" "" + fi else + # no parents with valid subtree mappings means a commit prior to subtree add cache_set "$rev" "" fi return From fe2e4819b869725f870cd3ce99f1f8150fe17dc1 Mon Sep 17 00:00:00 2001 From: Tom Clarkson Date: Mon, 11 May 2020 12:24:44 +1000 Subject: [PATCH 7/7] subtree: document new subtree commands Signed-off-by: Tom Clarkson --- contrib/subtree/git-subtree.txt | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/contrib/subtree/git-subtree.txt b/contrib/subtree/git-subtree.txt index 352deda69dcfd0..a5a76e8ce69c46 100644 --- a/contrib/subtree/git-subtree.txt +++ b/contrib/subtree/git-subtree.txt @@ -52,6 +52,12 @@ useful elsewhere, you can extract its entire history and publish that as its own git repository, without accidentally intermingling the history of your application project. +Although the relationship between subtree and mainline commits is stored +in regular git history, it is also cached between subtree runs. In most +cases this is merely a performance improvement, but for projects with +large and complex histories the cache can be manipulated directly +with the use, ignore and map commands. + [TIP] In order to keep your commit messages clean, we recommend that people split their commits between the subtrees and the main @@ -120,6 +126,21 @@ and friends will work as expected. Note that if you use '--squash' when you merge, you should usually not just '--rejoin' when you split. +ignore:: + Mark a commit and all of its history as irrelevant to subtree split. + In most cases this would be handled automatically based on metadata + from subtree join commits. Intended for improving performance on + extremely large repos and excluding complex history that turns out + to be otherwise problematic. + +use:: + Mark a commit and all of its history as part of an existing subtree. + In normal circumstances this would be handled based on the metadata + from the subtree join commit. Similar to the --onto option of split. + +map:: + Manually override the normal output of split for a particular commit. + Extreme flexibility for advanced troubleshooting purposes only. OPTIONS ------- @@ -142,6 +163,9 @@ OPTIONS This option is only valid for add, merge and pull (unsure). Specify as the commit message for the merge commit. +--clear-cache:: + Reset the subtree cache and recalculate all subtree mappings from the + commit history OPTIONS FOR add, merge, push, pull ----------------------------------