Skip to content

Commit

Permalink
fix(git): fix repo scan result caching (#6179)
Browse files Browse the repository at this point in the history
* chore: remove unnecessary field from `ScanRepoParams`

* refactor: extract function to compute filter params hash

* test(git): tests for `getHashedFilterParams` function

* fix(git): fix stability of cache keys for include/exclude filters

* chore: allow profiling for `GitRepoHandler`

* chore: amend some comments
  • Loading branch information
vvagaytsev committed Jun 19, 2024
1 parent 70b8ca8 commit c276e86
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 12 deletions.
39 changes: 28 additions & 11 deletions core/src/vcs/git-repo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ import { FileTree } from "./file-tree.js"
import { normalize, sep } from "path"
import { stableStringify } from "../util/string.js"
import { hashString } from "../util/util.js"
import { Profile } from "../util/profiling.js"

const { pathExists } = fsExtra

type ScanRepoParams = Pick<GetFilesParams, "log" | "path" | "pathDescription" | "failOnPrompt" | "exclude">
type ScanRepoParams = Pick<GetFilesParams, "log" | "path" | "pathDescription" | "failOnPrompt">

interface GitRepoGetFilesParams extends GetFilesParams {
scanFromProjectRoot: boolean
Expand All @@ -44,7 +45,7 @@ const getIncludeExcludeFiles: IncludeExcludeFilesHandler<GitRepoGetFilesParams,
// Make sure action config is not mutated.
let exclude = !params.exclude ? [] : [...params.exclude]

// Do the same normalization of the excluded paths like in `GitHandler`.
// Do the same normalization of the excluded paths like in "subtree" scanning mode.
// This might be redundant because the non-normalized paths will be handled by `augmentGlobs` below.
// But this brings no harm and makes the implementation more clear.
exclude = exclude.map(normalize)
Expand All @@ -60,7 +61,25 @@ const getIncludeExcludeFiles: IncludeExcludeFilesHandler<GitRepoGetFilesParams,
return { include, exclude, augmentedIncludes, augmentedExcludes }
}

// @Profile()
export function getHashedFilterParams({
filter,
augmentedIncludes,
augmentedExcludes,
}: {
filter: ((path: string) => boolean) | undefined
augmentedIncludes: string[]
augmentedExcludes: string[]
}) {
return hashString(
stableStringify({
filter: filter ? filter.toString() : undefined, // We hash the source code of the filter function if provided.
augmentedIncludes: augmentedIncludes.sort(),
augmentedExcludes: augmentedExcludes.sort(),
})
)
}

@Profile()
export class GitRepoHandler extends AbstractGitHandler {
private readonly gitHandlerDelegate: GitSubTreeHandler
override readonly name = "git-repo"
Expand All @@ -71,7 +90,7 @@ export class GitRepoHandler extends AbstractGitHandler {
}

/**
* This has the same signature as the GitHandler super class method but instead of scanning the individual directory
* This has the same signature as the `GitSubTreeHandler` class method but instead of scanning the individual directory
* path directly, we scan the entire enclosing git repository, cache that file list and then filter down to the
* sub-path. This results in far fewer git process calls but in turn collects more data in memory.
*/
Expand All @@ -97,13 +116,11 @@ export class GitRepoHandler extends AbstractGitHandler {
const scanFromProjectRoot = scanRoot === this.projectRoot
const { augmentedExcludes, augmentedIncludes } = await getIncludeExcludeFiles({ ...params, scanFromProjectRoot })

const hashedFilterParams = hashString(
stableStringify({
filter: filter ? filter.toString() : undefined, // We hash the source code of the filter function if provided.
augmentedIncludes,
augmentedExcludes,
})
)
const hashedFilterParams = getHashedFilterParams({
filter,
augmentedIncludes,
augmentedExcludes,
})
const filteredFilesCacheKey = ["git-repo-files", path, hashedFilterParams]

const cached = this.cache.get(log, filteredFilesCacheKey) as VcsFile[] | undefined
Expand Down
2 changes: 1 addition & 1 deletion core/src/vcs/vcs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ export function describeConfig(config: ModuleConfig | BaseActionConfig): ActionD
* Checks if the {@code subPathCandidate} is a sub-path of {@code basePath}.
* Sub-path means that a candidate must be located inside a reference path.
*
* Both {@basePath} and {@ subPathCandidate} must be absolute paths
* Both {@code basePath} and {@code subPathCandidate} must be absolute paths
*
* @param basePath the reference path (absolute)
* @param subPathCandidate the path to be checked (absolute)
Expand Down
41 changes: 41 additions & 0 deletions core/test/unit/src/vcs/vcs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import { DEFAULT_BUILD_TIMEOUT_SEC, GardenApiVersion } from "../../../../src/con
import { defaultDotIgnoreFile, fixedProjectExcludes } from "../../../../src/util/fs.js"
import type { BaseActionConfig } from "../../../../src/actions/types.js"
import { TreeCache } from "../../../../src/cache.js"
import { getHashedFilterParams } from "../../../../src/vcs/git-repo.js"

const { readFile, writeFile, rm, rename } = fsExtra

Expand Down Expand Up @@ -671,4 +672,44 @@ describe("helpers", () => {
expect(subPath).to.be.true
})
})

describe("getHashedFilterParams", () => {
it("should return the same hashes for fully equal objects", () => {
const params1 = { filter: undefined, augmentedIncludes: ["yes.txt"], augmentedExcludes: ["no.txt"] }
const hash1 = getHashedFilterParams(params1)

const params2 = { filter: undefined, augmentedIncludes: ["yes.txt"], augmentedExcludes: ["no.txt"] }
const hash2 = getHashedFilterParams(params2)

expect(hash1).to.eql(hash2)
})

it("should return the different hashes for non-equal objects", () => {
const params1 = { filter: undefined, augmentedIncludes: ["yes1.txt"], augmentedExcludes: ["no1.txt"] }
const hash1 = getHashedFilterParams(params1)

const params2 = { filter: undefined, augmentedIncludes: ["yes2.txt"], augmentedExcludes: ["no2.txt"] }
const hash2 = getHashedFilterParams(params2)

expect(hash1).not.to.eql(hash2)
})

it("should not depend on the order of the include/exclude file lists", () => {
const params1 = {
filter: undefined,
augmentedIncludes: ["yes1.txt", "yes2.txt"],
augmentedExcludes: ["no1.txt", "no2.txt"],
}
const hash1 = getHashedFilterParams(params1)

const params2 = {
filter: undefined,
augmentedIncludes: ["yes2.txt", "yes1.txt"],
augmentedExcludes: ["no2.txt", "no1.txt"],
}
const hash2 = getHashedFilterParams(params2)

expect(hash1).to.eql(hash2)
})
})
})

0 comments on commit c276e86

Please sign in to comment.