From 9fcaf64126f8e2c27e9e473e6fb2d418fa8384c7 Mon Sep 17 00:00:00 2001 From: Adam Saponara Date: Mon, 8 May 2023 16:44:34 -0400 Subject: [PATCH] Memoize git shell commands We have a shell script that invokes `chef install` and `chef export` against a setup with ~30 cookbooks. It was annoyingly slow so we dug into why that was the case. We found that these commands call `ChefCLI::CookbookProfiler::Git` in a loop which ends up shelling out the same handful of git commands repeatedly. For example, one run produced the following duplicate git commands (sorted by dupe count): ``` 74 execve("/usr/bin/git", ["git", "rev-parse", "HEAD"], 46 execve("/usr/bin/git", ["git", "rev-parse", "HEAD"], 41 execve("/usr/bin/git", ["git", "rev-parse", "--abbrev-ref", "HEAD"], 39 execve("/usr/bin/git", ["git", "diff-files", "--quiet"], 25 execve("/usr/bin/git", ["git", "branch", "-r", "--contains", ""], 23 execve("/usr/lib/git-core/git", ["/usr/lib/git-core/git", "status", "--porcelain=2", "-uno"], 23 execve("/usr/bin/git", ["git", "config", "--get", "branch.HEAD.remote"], 21 execve("/usr/bin/git", ["git", "diff-files", "--quiet"], 19 execve("/usr/bin/git", ["git", "rev-parse", "--abbrev-ref", "HEAD"], 17 execve("/usr/bin/git", ["git", "config", "--get", "branch..remote"], 16 execve("/usr/bin/git", ["git", "config", "--get", "remote.origin.url"], 14 execve("/usr/bin/git", ["git", "branch", "-r", "--contains", ""], 14 execve("/usr/bin/git", ["git", "branch", "-r", "--contains", ""], 12 execve("/usr/bin/git", ["git", "config", "--get", "remote.origin.url"], 11 execve("/usr/bin/git", ["git", "config", "--get", "branch..remote"], 9 execve("/usr/bin/git", ["git", "config", "--get", "branch.HEAD.remote"], 7 execve("/usr/bin/git", ["git", "branch", "-r", "--contains", ""], 5 execve("/usr/lib/git-core/git", ["/usr/lib/git-core/git", "status", "--porcelain=2", "-uno"], ``` Adding a simple memoization brought down our run from ~45s to ~1s. To be a little safer, we could wrap this is a `git_memo` method to make it more explicit. I thought I'd collect some initial feedback on the general idea first. Signed-off-by: Adam Saponara --- lib/chef-cli/cookbook_profiler/git.rb | 17 +++++++++++++++-- spec/unit/cookbook_profiler/git_spec.rb | 3 ++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/lib/chef-cli/cookbook_profiler/git.rb b/lib/chef-cli/cookbook_profiler/git.rb index ba0a52f2..65893e19 100644 --- a/lib/chef-cli/cookbook_profiler/git.rb +++ b/lib/chef-cli/cookbook_profiler/git.rb @@ -23,8 +23,14 @@ class Git include Helpers + @@git_memo = {} + attr_reader :cookbook_path + def self.uncache + @@git_memo = {} + end + def initialize(cookbook_path) @cookbook_path = cookbook_path @unborn_branch = nil @@ -111,8 +117,15 @@ def git!(subcommand, options = {}) end def git(subcommand, options = {}) - options = { cwd: cookbook_path }.merge(options) - system_command("git #{subcommand}", options) + memo_key = [subcommand, options] + if @@git_memo.has_key?(memo_key) + rv = @@git_memo[memo_key] + else + options = { cwd: cookbook_path }.merge(options) + rv = system_command("git #{subcommand}", options) + @@git_memo[memo_key] = rv + end + rv end def detect_current_branch diff --git a/spec/unit/cookbook_profiler/git_spec.rb b/spec/unit/cookbook_profiler/git_spec.rb index 1a45f6b3..9a960b00 100644 --- a/spec/unit/cookbook_profiler/git_spec.rb +++ b/spec/unit/cookbook_profiler/git_spec.rb @@ -25,7 +25,8 @@ include ChefCLI::Helpers - let(:git_profiler) do + let!(:git_profiler) do + ChefCLI::CookbookProfiler::Git.uncache ChefCLI::CookbookProfiler::Git.new(cookbook_path) end