Skip to content

Commit

Permalink
Add SHA256 and MD5 hashing support (close #4)
Browse files Browse the repository at this point in the history
  • Loading branch information
etix committed Apr 27, 2015
1 parent 81313b7 commit e566e93
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 26 deletions.
4 changes: 3 additions & 1 deletion cache.go
Expand Up @@ -116,7 +116,7 @@ func (c *Cache) GetFileInfo(path string) (f FileInfo, err error) {
func (c *Cache) fetchFileInfo(path string) (f FileInfo, err error) {
rconn := c.r.pool.Get()
defer rconn.Close()
reply, err := redis.Strings(rconn.Do("HMGET", fmt.Sprintf("FILE_%s", path), "size", "modTime", "sha1"))
reply, err := redis.Strings(rconn.Do("HMGET", fmt.Sprintf("FILE_%s", path), "size", "modTime", "sha1", "sha256", "md5"))
if err != nil {
// Put at least the path in the response
f.Path = path
Expand All @@ -126,6 +126,8 @@ func (c *Cache) fetchFileInfo(path string) (f FileInfo, err error) {
f.Size, _ = strconv.ParseInt(reply[0], 10, 64)
f.ModTime, _ = time.Parse("2006-01-02 15:04:05.999999999 -0700 MST", reply[1])
f.Sha1 = reply[2]
f.Sha256 = reply[3]
f.Md5 = reply[4]
c.fiCache.Set(path, &fileInfoValue{value: f})
return
}
Expand Down
36 changes: 24 additions & 12 deletions config.go
Expand Up @@ -14,18 +14,23 @@ import (

var (
defaultConfig = configuration{
Repository: "",
Templates: "",
OutputMode: "auto",
ListenAddress: ":8080",
Gzip: false,
RedisAddress: "127.0.0.1:6379",
RedisPassword: "",
LogDir: "",
GeoipDatabasePath: "/usr/share/GeoIP/",
ConcurrentSync: 2,
ScanInterval: 30,
CheckInterval: 1,
Repository: "",
Templates: "",
OutputMode: "auto",
ListenAddress: ":8080",
Gzip: false,
RedisAddress: "127.0.0.1:6379",
RedisPassword: "",
LogDir: "",
GeoipDatabasePath: "/usr/share/GeoIP/",
ConcurrentSync: 2,
ScanInterval: 30,
CheckInterval: 1,
Hashes: hashing{
SHA1: true,
SHA256: false,
MD5: false,
},
DisallowRedirects: false,
WeightDistributionRange: 1.5,
DisableOnMissingFile: false,
Expand All @@ -47,6 +52,7 @@ type configuration struct {
ConcurrentSync int `yaml:"ConcurrentSync"`
ScanInterval int `yaml:"ScanInterval"`
CheckInterval int `yaml:"CheckInterval"`
Hashes hashing `yaml:"Hashes"`
DisallowRedirects bool `yaml:"DisallowRedirects"`
WeightDistributionRange float32 `yaml:"WeightDistributionRange"`
DisableOnMissingFile bool `yaml:"DisableOnMissingFile"`
Expand All @@ -66,6 +72,12 @@ type sentinels struct {
Host string `yaml:"Host"`
}

type hashing struct {
SHA1 bool `yaml:"SHA1"`
SHA256 bool `yaml:"SHA256"`
MD5 bool `yaml:"MD5"`
}

// LoadConfig loads the configuration file if it has not yet been loaded
func LoadConfig() {
if config != nil {
Expand Down
2 changes: 2 additions & 0 deletions http.go
Expand Up @@ -44,6 +44,8 @@ type FileInfo struct {
Size int64 `redis:"size" json:",omitempty"`
ModTime time.Time `redis:"modTime" json:",omitempty"`
Sha1 string `redis:"sha1" json:",omitempty"`
Sha256 string `redis:"sha256" json:",omitempty"`
Md5 string `redis:"md5" json:",omitempty"`
}

// MirrorlistPage is the resulting struct of a request and is
Expand Down
4 changes: 4 additions & 0 deletions mirrorbits.conf
Expand Up @@ -17,6 +17,10 @@ GeoipDatabasePath: /usr/share/GeoIP/
ConcurrentSync: 5
ScanInterval: 30
CheckInterval: 1
Hashes:
SHA1: On
SHA256: Off
MD5: Off
DisallowRedirects: false
WeightDistributionRange: 1.5
DisableOnMissingFile: false
Expand Down
36 changes: 29 additions & 7 deletions scan.go
Expand Up @@ -32,6 +32,8 @@ type Scanner interface {
type filedata struct {
path string
sha1 string
sha256 string
md5 string
size int64
modTime time.Time
}
Expand Down Expand Up @@ -224,7 +226,7 @@ func (s *scan) walkSource(path string, f os.FileInfo, err error) error {
d.modTime = f.ModTime()

// Get the previous file properties
properties, err := redis.Strings(s.walkRedisConn.Do("HMGET", fmt.Sprintf("FILE_%s", d.path), "size", "modTime", "sha1"))
properties, err := redis.Strings(s.walkRedisConn.Do("HMGET", fmt.Sprintf("FILE_%s", d.path), "size", "modTime", "sha1", "sha256", "md5"))
if err != nil && err != redis.ErrNil {
return err
}
Expand All @@ -233,17 +235,35 @@ func (s *scan) walkSource(path string, f os.FileInfo, err error) error {
size, _ := strconv.ParseInt(properties[0], 10, 64)
modTime, _ := time.Parse("2006-01-02 15:04:05.999999999 -0700 MST", properties[1])
sha1 := properties[2]
if size != d.size || modTime != d.modTime {
sha256 := properties[3]
md5 := properties[4]

rehash := (GetConfig().Hashes.SHA1 && len(sha1) == 0) ||
(GetConfig().Hashes.SHA256 && len(sha256) == 0) ||
(GetConfig().Hashes.MD5 && len(md5) == 0)

if rehash || size != d.size || modTime != d.modTime {
h, err := hashFile(GetConfig().Repository + d.path)
if err != nil {
log.Warning("%s: hashing failed: %s", d.path, err.Error())
} else {
log.Info("%s: %s", d.path, h)
d.sha1 = h
d.sha1 = h.Sha1
d.sha256 = h.Sha256
d.md5 = h.Md5
if len(d.sha1) > 0 {
log.Info("%s: SHA1 %s", d.path, d.sha1)
}
if len(d.sha256) > 0 {
log.Info("%s: SHA256 %s", d.path, d.sha256)
}
if len(d.md5) > 0 {
log.Info("%s: MD5 %s", d.path, d.md5)
}
}
}
if d.sha1 == "" {
} else {
d.sha1 = sha1
d.sha256 = sha256
d.md5 = md5
}
}

Expand Down Expand Up @@ -311,7 +331,9 @@ func ScanSource(r *redisobj, stop chan bool) (err error) {
s.walkRedisConn.Send("HMSET", fmt.Sprintf("FILE_%s", e.path),
"size", e.size,
"modTime", e.modTime,
"sha1", e.sha1)
"sha1", e.sha1,
"sha256", e.sha256,
"md5", e.md5)

// Publish update
SendPublish(s.walkRedisConn, FILE_UPDATE, e.path)
Expand Down
30 changes: 24 additions & 6 deletions utils.go
Expand Up @@ -6,7 +6,9 @@ package main
import (
"bufio"
"bytes"
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"encoding/hex"
"fmt"
"github.com/garyburd/redigo/redis"
Expand Down Expand Up @@ -93,20 +95,36 @@ func normalizeURL(url string) string {
}

// Generate a human readable sha1 hash of the given file path
func hashFile(path string) (hash string, err error) {
func hashFile(path string) (hashes FileInfo, err error) {
f, err := os.Open(path)
if err != nil {
return
}
defer f.Close()

reader := bufio.NewReader(f)
sha1Hash := sha1.New()
_, err = io.Copy(sha1Hash, reader)
if err != nil {
return

if GetConfig().Hashes.SHA1 {
sha1Hash := sha1.New()
_, err = io.Copy(sha1Hash, reader)
if err == nil {
hashes.Sha1 = hex.EncodeToString(sha1Hash.Sum(nil))
}
}
if GetConfig().Hashes.SHA256 {
sha256Hash := sha256.New()
_, err = io.Copy(sha256Hash, reader)
if err == nil {
hashes.Sha256 = hex.EncodeToString(sha256Hash.Sum(nil))
}
}
if GetConfig().Hashes.MD5 {
md5Hash := md5.New()
_, err = io.Copy(md5Hash, reader)
if err == nil {
hashes.Md5 = hex.EncodeToString(md5Hash.Sum(nil))
}
}
hash = hex.EncodeToString(sha1Hash.Sum(nil))
return
}

Expand Down

0 comments on commit e566e93

Please sign in to comment.