Skip to content

Commit

Permalink
thoughtworks#416 Fix - Files of the same name in sub-folders will be …
Browse files Browse the repository at this point in the history
…considered as different files
  • Loading branch information
deepthirera committed May 21, 2024
1 parent d3de10b commit 262dc6e
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 26 deletions.
36 changes: 33 additions & 3 deletions checksumcalculator/checksumcalculator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,48 @@ func TestNewChecksumCalculator(t *testing.T) {
t.Run("should return CollectiveChecksum when existing file name pattern is sent", func(t *testing.T) {
gitAdditions := []gitrepo.Addition{
{
Path: "GitRepoPath1",
Path: "GitRepoPath1/GitRepoName1",
Name: "GitRepoName1",
},
}
expectedCC := "54bbf09e5c906e2d7cc0808729f8120cfa3c4bad3fb6a85689ae23ca00e5a3c8"
fileNamePattern := "*RepoName1"
expectedCC := "19250a996e1200d33e91454bc662efae7682410e5347cfc56b0ff386dfbc10ae"
fileNamePattern := "GitRepoPath1/"
cc := NewChecksumCalculator(defaultSHA256Hasher, gitAdditions)

actualCC := cc.CalculateCollectiveChecksumForPattern(fileNamePattern)

assert.Equal(t, expectedCC, actualCC)
})

t.Run("should return the files own CollectiveChecksum when same file name is present in subfolders", func(t *testing.T) {
gitAdditions := []gitrepo.Addition{
{
Path: "hello.txt",
Name: "hello.txt",
},
{
Path: "subfolder/hello.txt",
Name: "hello.txt",
},
}
hello_expectedCC := "9d30c2e4bcf181bba07374cc416f1892d89918038bce5172776475347c4d2d69"
fileNamePattern1 := "hello.txt"
cc1 := NewChecksumCalculator(defaultSHA256Hasher, gitAdditions)
actualCC := cc1.CalculateCollectiveChecksumForPattern(fileNamePattern1)
assert.Equal(t, hello_expectedCC, actualCC)

subfolder_hello_expectedCC := "6c779c16bcc2e63c659be7649a531650210d6b96ae590a146f9ccdca383587f6"
fileNamePattern2 := "subfolder/hello.txt"
cc2 := NewChecksumCalculator(defaultSHA256Hasher, gitAdditions)
subfolder_actualCC := cc2.CalculateCollectiveChecksumForPattern(fileNamePattern2)
assert.Equal(t, subfolder_hello_expectedCC, subfolder_actualCC)

all_txt_expectedCC := "aba77c9077539130e21a8f275fc5f1f43b7f0589e392bc89e4b96c578f0a9184"
fileNamePattern3 := "*.txt"
cc3 := NewChecksumCalculator(defaultSHA256Hasher, gitAdditions)
all_txt_actualCC := cc3.CalculateCollectiveChecksumForPattern(fileNamePattern3)
assert.Equal(t, all_txt_expectedCC, all_txt_actualCC)
})
}

func TestDefaultChecksumCalculator_SuggestTalismanRC(t *testing.T) {
Expand Down
24 changes: 24 additions & 0 deletions cmd/acceptance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ fileignoreconfig:
ignore_detectors: []
`

const talismanRCForHelloTxtFile = `
fileignoreconfig:
- filename: hello.txt
checksum: edf37f1d33525d1710c3f5dc3437778c483def1d2e98b5a3495fc627eb1be588
ignore_detectors: []
`

func init() {
git_testing.Logger = logrus.WithField("Environment", "Debug")
git_testing.Logger.Debug("Acceptance test started")
Expand Down Expand Up @@ -261,6 +268,23 @@ func TestPatternFindsSecretInNestedFile(t *testing.T) {
})
}

func TestFilesWithSameNameWithinRepositoryAreHandledAsSeparateFiles(t *testing.T) {
withNewTmpGitRepo(func(git *git_testing.GitTesting) {
git.SetupBaselineFiles("simple-file", "some-dir/hello.txt")
git.CreateFileWithContents("hello.txt", awsAccessKeyIDExample)
git.AddAndcommit("*", "Start of Scan before talismanrc")
assert.Equal(t, 1, runTalismanInPrePushMode(git), "Expected run() to return 1 since secret is detected in hello")
git.CreateFileWithContents(".talismanrc", talismanRCForHelloTxtFile)

git.AppendFileContent("some-dir/hello.txt", "More safe content")
assert.Equal(t, 0, runTalismanInPrePushMode(git), "Expected run() to return 0 since hello checksum is added to fileignoreconfig in talismanrc")
git.AddAndcommit("some-dir/hello.txt", "Start of second scan")
assert.Equal(t, 0, runTalismanInPrePushMode(git), "Expected run() to return 0 since hello in subfolder was only changed")
git.AppendFileContent("hello.txt", "More safe content in unsafe file")
git.AddAndcommit("hello.txt", "Start of third scan - new hash is required")
assert.Equal(t, 1, runTalismanInPrePushMode(git), "Expected run() to return 1 since hello checksum is changed")
})
}
func TestIgnoreHistoryDoesNotDetectRemovedSecrets(t *testing.T) {
withNewTmpGitRepo(func(git *git_testing.GitTesting) {
options.Debug = false
Expand Down
2 changes: 1 addition & 1 deletion detector/helpers/checksum_compare_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func TestChecksumCompare_IsScanNotRequired(t *testing.T) {
},
}
cc := NewChecksumCompare(checksumCalculator, mockSHA256Hasher, &ignoreConfig)
addition := gitrepo.Addition{Name: "some.txt"}
addition := gitrepo.Addition{Name: "some.txt", Path: "some.txt"}
mockSHA256Hasher.EXPECT().CollectiveSHA256Hash([]string{string(addition.Path)}).Return("somesha")
checksumCalculator.EXPECT().CalculateCollectiveChecksumForPattern("some.txt").Return("sha1")

Expand Down
42 changes: 22 additions & 20 deletions gitrepo/gitrepo.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,27 @@ import (
log "github.com/sirupsen/logrus"
)

//FilePath represents the absolute path of an added file
// FilePath represents the absolute path of an added file
type FilePath string

//FileName represents the base name of an added file
// FileName represents the base name of an added file
type FileName string

//Addition represents the end state of a file
// Addition represents the end state of a file
type Addition struct {
Path FilePath
Name FileName
Commits []string
Data []byte
}

//GitRepo represents a Git repository located at the absolute path represented by root
// GitRepo represents a Git repository located at the absolute path represented by root
type GitRepo struct {
root string
}

//RepoLocatedAt returns a new GitRepo with it's root located at the location specified by the argument.
//If the argument is not an absolute path, it will be turned into one.
// RepoLocatedAt returns a new GitRepo with it's root located at the location specified by the argument.
// If the argument is not an absolute path, it will be turned into one.
func RepoLocatedAt(path string) GitRepo {
absoluteRoot, _ := filepath.Abs(path)
return GitRepo{absoluteRoot}
Expand Down Expand Up @@ -117,7 +117,7 @@ func MatchGitDiffLine(gitDiffString string) (bool, string) {
return false, ""
}

//StagedAdditions returns the files staged for commit in a GitRepo
// StagedAdditions returns the files staged for commit in a GitRepo
func (repo GitRepo) StagedAdditions() []Addition {
files := repo.stagedFiles()
result := make([]Addition, len(files))
Expand All @@ -132,7 +132,7 @@ func (repo GitRepo) StagedAdditions() []Addition {
return result
}

//AllAdditions returns all the outgoing additions and modifications in a GitRepo. This does not include files that were deleted.
// AllAdditions returns all the outgoing additions and modifications in a GitRepo. This does not include files that were deleted.
func (repo GitRepo) AllAdditions() []Addition {
result := string(repo.executeRepoCommand("git", "rev-parse", "--abbrev-ref", "origin/HEAD"))
log.Debugf("Result of getting default branch %v", result)
Expand All @@ -141,7 +141,7 @@ func (repo GitRepo) AllAdditions() []Addition {
return repo.AdditionsWithinRange(oldCommit, newCommit)
}

//AdditionsWithinRange returns the outgoing additions and modifications in a GitRepo that are in the given commit range. This does not include files that were deleted.
// AdditionsWithinRange returns the outgoing additions and modifications in a GitRepo that are in the given commit range. This does not include files that were deleted.
func (repo GitRepo) AdditionsWithinRange(oldCommit string, newCommit string) []Addition {
files := repo.outgoingNonDeletedFiles(oldCommit, newCommit)
result := make([]Addition, len(files))
Expand All @@ -157,7 +157,7 @@ func (repo GitRepo) AdditionsWithinRange(oldCommit string, newCommit string) []A
return result
}

//NewAddition returns a new Addition for a file with supplied name and contents
// NewAddition returns a new Addition for a file with supplied name and contents
func NewAddition(filePath string, content []byte) Addition {
return Addition{
Path: FilePath(filePath),
Expand All @@ -166,7 +166,7 @@ func NewAddition(filePath string, content []byte) Addition {
}
}

//NewScannerAddition returns an new Addition for a file with supplied contents and all of the commits the file is in
// NewScannerAddition returns an new Addition for a file with supplied contents and all of the commits the file is in
func NewScannerAddition(filePath string, commits []string, content []byte) Addition {
return Addition{
Path: FilePath(filePath),
Expand All @@ -176,9 +176,9 @@ func NewScannerAddition(filePath string, commits []string, content []byte) Addit
}
}

//CheckIfFileExists checks if the file exists on the file system. Does not look into the file contents
//Returns TRUE if file exists
//Returns FALSE if the file is not found
// CheckIfFileExists checks if the file exists on the file system. Does not look into the file contents
// Returns TRUE if file exists
// Returns FALSE if the file is not found
func (repo GitRepo) CheckIfFileExists(fileName string) bool {
filepath := path.Join(repo.root, fileName)
if _, err := os.Stat(filepath); os.IsNotExist(err) {
Expand All @@ -187,18 +187,20 @@ func (repo GitRepo) CheckIfFileExists(fileName string) bool {
return true
}

//Matches states whether the addition matches the given pattern.
//If the pattern ends in a path separator, then all files inside a directory with that name are matched. However, files with that name itself will not be matched.
//If a pattern contains the path separator in any other location, the match works according to the pattern logic of the default golang glob mechanism
//If there is no path separator anywhere in the pattern, the pattern is matched against the base name of the file. Thus, the pattern will match files with that name anywhere in the repository.
// Matches states whether the addition matches the given pattern.
// If the pattern ends in a path separator, then all files inside a directory with that name are matched. However, files with that name itself will not be matched.
// If a pattern contains the path separator in any other location, the match works according to the pattern logic of the default golang glob mechanism
// If there is no path separator anywhere in the pattern, the pattern is matched against the base name of the file. Thus, the pattern will match files with that name anywhere in the repository.
func (a Addition) Matches(pattern string) bool {
var result bool
if pattern[len(pattern)-1] == '/' {
result = strings.HasPrefix(string(a.Path), pattern)
} else if strings.ContainsRune(pattern, '/') {
result, _ = path.Match(pattern, string(a.Path))
} else {
} else if strings.ContainsRune(pattern, '*') {
result, _ = path.Match(pattern, string(a.Name))
} else {
result = strings.Compare(string(a.Path), pattern) == 0
}
log.WithFields(log.Fields{
"pattern": pattern,
Expand All @@ -208,7 +210,7 @@ func (a Addition) Matches(pattern string) bool {
return result
}

//TrackedFilesAsAdditions returns all of the tracked files in a GitRepo as Additions
// TrackedFilesAsAdditions returns all of the tracked files in a GitRepo as Additions
func (repo GitRepo) TrackedFilesAsAdditions() []Addition {
trackedFilePaths := repo.trackedFilePaths()
var additions []Addition
Expand Down
36 changes: 35 additions & 1 deletion gitrepo/gitrepo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,45 @@ func TestMatchShouldAllowWildcardPatternMatches(t *testing.T) {
assert.False(t, file3.Matches(pattern))
}

func TestMatchShouldMatchExactFileIfNoPatternIsProvided(t *testing.T) {
file1 := Addition{Path: "bigfile", Name: "bigfile"}
file2 := Addition{Path: "subfolder/bigfile", Name: "bigfile"}
file3 := Addition{Path: "somefile", Name: "somefile"}
pattern := "bigfile"

assert.True(t, file1.Matches(pattern))
assert.False(t, file2.Matches(pattern))
assert.False(t, file3.Matches(pattern))
}

func TestMatchShouldAllowStarPattern(t *testing.T) {
file1 := Addition{Path: "GitRepoPath1/File1.txt", Name: "File1.txt"}
file2 := Addition{Path: "GitRepoPath1/File2.txt", Name: "File2.txt"}
file3 := Addition{Path: "GitRepoPath1/somefile", Name: "somefile"}
file4 := Addition{Path: "somefile.jpg", Name: "somefile.jpg"}
file5 := Addition{Path: "somefile.txt", Name: "somefile.txt"}

pattern := "GitRepoPath1/*.txt"

assert.True(t, file1.Matches(pattern))
assert.True(t, file2.Matches(pattern))
assert.False(t, file3.Matches(pattern))
assert.False(t, file4.Matches(pattern))
assert.False(t, file5.Matches(pattern))

pattern1 := "*.txt"
assert.True(t, file1.Matches(pattern1))
assert.True(t, file2.Matches(pattern1))
assert.False(t, file3.Matches(pattern1))
assert.False(t, file4.Matches(pattern1))
assert.True(t, file5.Matches(pattern1))
}

func setupOriginAndClones(originLocation, cloneLocation string) (*git_testing.GitTesting, GitRepo) {
origin := RepoLocatedAt(originLocation)
git := git_testing.Init(origin.root)
git.SetupBaselineFiles("a.txt", filepath.Join("alice", "bob", "b.txt"))
git.SetupBaselineFiles("c.txt", filepath.Join( "folder b","c.txt"))
git.SetupBaselineFiles("c.txt", filepath.Join("folder b", "c.txt"))
cwd, _ := os.Getwd()
gitClone := git.GitClone(filepath.Join(cwd, cloneLocation))
return gitClone, RepoLocatedAt(cloneLocation)
Expand Down
2 changes: 1 addition & 1 deletion talismanrc/scopes.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ var knownScopes = map[string][]string{
"go": {"makefile", "go.mod", "go.sum", "Gopkg.toml", "Gopkg.lock", "glide.yaml", "glide.lock"},
"images": {"*.jpeg", "*.jpg", "*.png", "*.tiff", "*.bmp"},
"bazel": {"*.bzl"},
"terraform": {".terraform.lock.hcl"},
"terraform": {".terraform.lock.hcl", "*.terraform.lock.hcl"},
"php": {"composer.lock"},
"python": {"poetry.lock", "Pipfile.lock", "requirements.txt"},
}

0 comments on commit 262dc6e

Please sign in to comment.