Skip to content

Cache command output

Jan Katins edited this page Jun 5, 2023 · 2 revisions

Sometimes you need to run a command to do something with the result, e.g. query poetry for the place of the current virtualenv which should be loaded. This can be slow, making cd'ing into the directory slow. The below is a recipe to cache the output of a command call and replay it, if the cache exists:

cache_output() {

  local file_dependencies=()
  local command=()
  local name="cache"
  local cache_dir=${HOME}/.cache/direnv_command_cache
  local validation_func=_expect_file
  local capture_stderr=false

  _expect_file(){
    if [[ ! -f ${1} ]] ; then
      log_error "Cached value (${1}) is not a file."
      return 1
    fi
    return 0
  }

  _expect_dir(){
    if [[ ! -d ${1} ]] ; then
      log_error "Cached value (${1}) is not a directory."
      return 1
    fi
    return 0
  }


  while [[ $# -gt 0 ]]; do
    case $1 in
      -n|--name)
        name="$2"
        shift # past argument
        shift # past value
        ;;
      -f|--file-dependency)
        file_dependencies+=("$2")
        shift # past argument
        shift # past value
        ;;
      --cache-dir)
        cache_dir+=("$2")
        shift # past argument
        shift # past value
        ;;
      --expect-file)
        validation_func=_expect_file
        shift # past argument
        ;;
      --expect-dir)
        validation_func=_expect_dir
        shift # past argument
        ;;
      --validation-func)
        validation_func="$2"
        shift # past argument
        shift # past value
        ;;
      --capture-stderr)
        capture_stderr=true
        shift # past argument
        ;;
      --)
        shift # past argument and break: the rest is the command
        break
        ;;
      *)
        log_error  "Unknown option ${1}"
        return 1
        ;;
    esac
  done
  
  local pwd_basename=$(basename ${PWD})
  local hash_part=$(echo "${PWD}" '$$$' "$*" | md5 )
  for f in "${file_dependencies[@]}"
  do
    hash_part=$((cat ${f} ; echo "${hash_part}") | md5)
  done

  mkdir -p "${cache_dir}"
  local cachefile=${cache_dir}/${name}_${pwd_basename}_${hash_part}

  local result=""

  if [[ ! -f "${cachefile}" ]]; then
    log_status "No cache found (${cachefile})."
  else
    local result=$(< "${cachefile}")
    if ! ${validation_func} "${result}" ; then
      rm "${cachefile}"
      log_error "Cachefile content '${result}' did not validate. Deleted cachefile ${cachefile}."
    else
      #log_status "Using cached value for '${*}' from ${cachefile} (${result})"
      log_status "Using cached value for '${*}'"
      printf '%s' "${result}"
      return
    fi
  fi

  # No cachefile or cachefile content didn't validate -> run the command and cache and return the content
  if ! ${capture_stderr} ; then
    result=$(${*} 2>/dev/null);
  else
    result=$(${*} 2>&1);
  fi
  log_status "Caching value '${result}' to '${cachefile}'"
  printf '%s' "${result}" > "${cachefile}"
  printf '%s' "${result}"
  return
}

An example usage:

# caches the output of `poetry env info --path` 
# --file-dependency pyproject.toml: make that cache dependent on the pyproject.toml file: if it changes, a new run is needed
# --expect-dir: only use the cache if the value is a directory
# --name poetry: for easier debugging, the name is used in the cache file name
local poetry_envdir=$(cache_output --file-dependency pyproject.toml --name poetry --expect-dir -- poetry env info --path)