Skip to content

Commit

Permalink
fix #5 (#6)
Browse files Browse the repository at this point in the history
fix #5
  • Loading branch information
davidt99 authored and glaslos committed Dec 6, 2017
1 parent 3debc12 commit cf3f9ba
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 18 deletions.
39 changes: 26 additions & 13 deletions ssdeep.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ const (
spamSumLength = 64
minFileSize = 4096
hashPrime uint32 = 0x01000193
hashIinit uint32 = 0x28021967
hashInit uint32 = 0x28021967
b64String = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
)

Expand All @@ -28,6 +28,10 @@ type rollingState struct {
n uint32
}

func (rs rollingState) rollSum() uint32 {
return rs.h1 + rs.h2 + rs.h3
}

// FuzzyHash struct for comparison
type FuzzyHash struct {
blockSize int
Expand Down Expand Up @@ -56,8 +60,8 @@ type SSDEEP struct {
// NewSSDEEP creates a new SSDEEP hash
func NewSSDEEP() SSDEEP {
return SSDEEP{
blockHash1: hashIinit,
blockHash2: hashIinit,
blockHash1: hashInit,
blockHash2: hashInit,
rollingState: rollingState{
window: make([]byte, rollingWindow),
},
Expand All @@ -75,7 +79,7 @@ func sumHash(c byte, h uint32) uint32 {
}

// rollHash based on Adler checksum
func (sdeep *SSDEEP) rollHash(c byte) uint32 {
func (sdeep *SSDEEP) rollHash(c byte) {
rs := &sdeep.rollingState
rs.h2 -= rs.h1
rs.h2 += rollingWindow * uint32(c)
Expand All @@ -88,7 +92,6 @@ func (sdeep *SSDEEP) rollHash(c byte) uint32 {
}
rs.h3 = rs.h3 << 5
rs.h3 ^= uint32(c)
return rs.h1 + rs.h2 + rs.h3
}

// GetBlockSize calculates the block size based on file size
Expand All @@ -112,18 +115,17 @@ func GetFileSize(f *os.File) (int, error) {
func (sdeep *SSDEEP) processByte(b byte) {
sdeep.blockHash1 = sumHash(b, sdeep.blockHash1)
sdeep.blockHash2 = sumHash(b, sdeep.blockHash2)
rh := int(sdeep.rollHash(b))
sdeep.rollHash(b)
rh := int(sdeep.rollingState.rollSum())
if rh%sdeep.blockSize == (sdeep.blockSize - 1) {
if len(sdeep.hashString1) < spamSumLength-1 {
sdeep.hashString1 += string(b64[sdeep.blockHash1%64])
sdeep.blockHash1 = hashIinit
sdeep.newRollingState()
sdeep.blockHash1 = hashInit
}
if rh%(sdeep.blockSize*2) == ((sdeep.blockSize * 2) - 1) {
if len(sdeep.hashString2) < spamSumLength/2-1 {
sdeep.hashString2 += string(b64[sdeep.blockHash2%64])
sdeep.blockHash2 = hashIinit
sdeep.newRollingState()
sdeep.blockHash2 = hashInit
}
}
}
Expand All @@ -142,9 +144,12 @@ func (sdeep *SSDEEP) process(r *bufio.Reader) {
sdeep.processByte(b)
b, err = r.ReadByte()
}
// Finalize the hash string with the remaining data
sdeep.hashString1 += string(b64[sdeep.blockHash1%64])
sdeep.hashString2 += string(b64[sdeep.blockHash2%64])
rh := sdeep.rollingState.rollSum()
if rh != 0 {
// Finalize the hash string with the remaining data
sdeep.hashString1 += string(b64[sdeep.blockHash1%64])
sdeep.hashString2 += string(b64[sdeep.blockHash2%64])
}
}

// FuzzyReader hash of a provided reader
Expand All @@ -161,6 +166,10 @@ func (sdeep *SSDEEP) FuzzyReader(f fuzzyReader, fileLocation string) (*FuzzyHash
}
if len(sdeep.hashString1) < spamSumLength/2 {
sdeep.blockSize = sdeep.blockSize / 2
sdeep.blockHash1 = hashInit
sdeep.blockHash2 = hashInit
sdeep.hashString1 = ""
sdeep.hashString2 = ""
} else {
break
}
Expand Down Expand Up @@ -195,6 +204,10 @@ func (sdeep *SSDEEP) FuzzyFile(f *os.File, fileLocation string) (*FuzzyHash, err
}
if len(sdeep.hashString1) < spamSumLength/2 {
sdeep.blockSize = sdeep.blockSize / 2
sdeep.blockHash1 = hashInit
sdeep.blockHash2 = hashInit
sdeep.hashString1 = ""
sdeep.hashString2 = ""
} else {
break
}
Expand Down
12 changes: 7 additions & 5 deletions ssdeep_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ func TestRollingHash(t *testing.T) {
window: make([]byte, rollingWindow),
},
}
if s.rollHash(byte('A')) != 585 {
s.rollHash(byte('A'))
rh := s.rollingState.rollSum()
if rh != 585 {
t.Error("Rolling hash not matching")
}
}
Expand All @@ -61,10 +63,10 @@ func TestCompareHashFile(t *testing.T) {
if err != nil {
t.Error(err)
}
if libhash.String() != "96:PuNQzo6pYsJWsJ6NA5xpYTYqhuNQzo6pYsJWsJ6NA5xpYTYA:+QzrpYgWg6NQ7aYZQzrpYgWg6NQ7aYA" {
expectedResult := "96:PuNQHTo6pYrYJWrYJ6N3w53hpYTdhuNQHTo6pYrYJWrYJ6N3w53hpYTP:+QHTrpYrsWrs6N3g3LaGQHTrpYrsWrsa"
if libhash.String() != expectedResult {
t.Errorf(
"Hash mismatch: %s vs %s", libhash.String(),
"96:PuNQzo6pYsJWsJ6NA5xpYTYqhuNQzo6pYsJWsJ6NA5xpYTYA:+QzrpYgWg6NQ7aYZQzrpYgWg6NQ7aYA",
"Hash mismatch: %s vs %s", libhash.String(), expectedResult,
)
}
}
Expand Down Expand Up @@ -127,7 +129,7 @@ func BenchmarkRollingHash(b *testing.B) {
}

func BenchmarkSumHash(b *testing.B) {
testHash := hashIinit
testHash := hashInit
data := []byte("Hereyougojustsomedatatomakeyouhappy")
for i := 0; i < b.N; i++ {
testHash = sumHash(data[rand.Intn(len(data))], testHash)
Expand Down

0 comments on commit cf3f9ba

Please sign in to comment.