diff --git a/duplicacy/duplicacy_main.go b/duplicacy/duplicacy_main.go index de29f2ed..09bfa8a1 100644 --- a/duplicacy/duplicacy_main.go +++ b/duplicacy/duplicacy_main.go @@ -371,6 +371,8 @@ func configRepository(context *cli.Context, init bool) { } else if existingConfig.CompressionLevel != 100 { duplicacy.LOG_ERROR("STORAGE_COMPRESSION", "This storage is configured with an invalid compression level %d", existingConfig.CompressionLevel) return + } else if existingConfig.CompressionLevel != duplicacy.DEFAULT_COMPRESSION_LEVEL { + duplicacy.LOG_INFO("STORAGE_COMPRESSION", "Compression level: %d", existingConfig.CompressionLevel) } // Don't print config in the background mode @@ -378,8 +380,6 @@ func configRepository(context *cli.Context, init bool) { existingConfig.Print() } } else { - compressionLevel := 100 - averageChunkSize := duplicacy.AtoSize(context.String("chunk-size")) if averageChunkSize == 0 { fmt.Fprintf(context.App.Writer, "Invalid average chunk size: %s.\n\n", context.String("chunk-size")) @@ -487,6 +487,18 @@ func configRepository(context *cli.Context, init bool) { } } + compressionLevel := 100 + zstdLevel := context.String("zstd-level") + if zstdLevel != "" { + if level, found := duplicacy.ZSTD_COMPRESSION_LEVELS[zstdLevel]; found { + compressionLevel = level + } else { + duplicacy.LOG_ERROR("STORAGE_COMPRESSION", "Invalid zstd compression level: %s", zstdLevel) + } + } else if context.Bool("zstd") { + compressionLevel = duplicacy.ZSTD_COMPRESSION_LEVEL_DEFAULT + } + duplicacy.ConfigStorage(storage, iterations, compressionLevel, averageChunkSize, maximumChunkSize, minimumChunkSize, storagePassword, otherConfig, bitCopy, context.String("key"), dataShards, parityShards) } @@ -786,6 +798,17 @@ func backupRepository(context *cli.Context) { backupManager.SetupSnapshotCache(preference.Name) backupManager.SetDryRun(dryRun) + zstdLevel := context.String("zstd-level") + if zstdLevel != "" { + if level, found := duplicacy.ZSTD_COMPRESSION_LEVELS[zstdLevel]; found { + backupManager.SetCompressionLevel(level) + } else { + duplicacy.LOG_ERROR("STORAGE_COMPRESSION", "Invalid zstd compression level: %s", zstdLevel) + } + } else if context.Bool("zstd") { + backupManager.SetCompressionLevel(duplicacy.ZSTD_COMPRESSION_LEVEL_DEFAULT) + } + metadataChunkSize := context.Int("metadata-chunk-size") maximumInMemoryEntries := context.Int("max-in-memory-entries") backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly, metadataChunkSize, maximumInMemoryEntries) @@ -1428,6 +1451,15 @@ func main() { Usage: "the minimum size of chunks (defaults to chunk-size/4)", Argument: "", }, + cli.StringFlag{ + Name: "zstd-level", + Usage: "set zstd compression level (fast, default, better, or best)", + Argument: "", + }, + cli.BoolFlag{ + Name: "zstd", + Usage: "short for -zstd default", + }, cli.IntFlag{ Name: "iterations", Usage: "the number of iterations used in storage key derivation (default is 16384)", @@ -1495,6 +1527,15 @@ func main() { Name: "dry-run", Usage: "dry run for testing, don't backup anything. Use with -stats and -d", }, + cli.StringFlag{ + Name: "zstd-level", + Usage: "set zstd compression level (fast, default, better, or best)", + Argument: "", + }, + cli.BoolFlag{ + Name: "zstd", + Usage: "short for -zstd default", + }, cli.BoolFlag{ Name: "vss", Usage: "enable the Volume Shadow Copy service (Windows and macOS using APFS only)", @@ -1938,6 +1979,15 @@ func main() { Usage: "the minimum size of chunks (default is chunk-size/4)", Argument: "", }, + cli.StringFlag{ + Name: "zstd-level", + Usage: "set zstd compression level (fast, default, better, or best)", + Argument: "", + }, + cli.BoolFlag{ + Name: "zstd", + Usage: "short for -zstd default", + }, cli.IntFlag{ Name: "iterations", Usage: "the number of iterations used in storage key derivation (default is 16384)", diff --git a/go.mod b/go.mod index 0e121a6f..8a6bfe69 100644 --- a/go.mod +++ b/go.mod @@ -41,6 +41,7 @@ require ( github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect github.com/googleapis/gax-go/v2 v2.0.5 // indirect github.com/jmespath/go-jmespath v0.3.0 // indirect + github.com/klauspost/compress v1.16.3 // indirect github.com/klauspost/cpuid v1.3.1 // indirect github.com/kr/fs v0.1.0 // indirect github.com/kr/text v0.2.0 // indirect diff --git a/go.sum b/go.sum index 89529e38..86dc9a19 100644 --- a/go.sum +++ b/go.sum @@ -143,6 +143,8 @@ github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCV github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY= +github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid v1.2.4/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s= github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4= diff --git a/src/duplicacy_backupmanager.go b/src/duplicacy_backupmanager.go index 0f8e659e..2413494d 100644 --- a/src/duplicacy_backupmanager.go +++ b/src/duplicacy_backupmanager.go @@ -47,6 +47,10 @@ func (manager *BackupManager) SetDryRun(dryRun bool) { manager.config.dryRun = dryRun } +func (manager *BackupManager) SetCompressionLevel(level int) { + manager.config.CompressionLevel = level +} + // CreateBackupManager creates a backup manager using the specified 'storage'. 'snapshotID' is a unique id to // identify snapshots created for this repository. 'top' is the top directory of the repository. 'password' is the // master key which can be nil if encryption is not enabled. @@ -138,6 +142,8 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta LOG_DEBUG("BACKUP_PARAMETERS", "top: %s, quick: %t, tag: %s", top, quickMode, tag) + manager.config.PrintCompressionLevel() + if manager.config.DataShards != 0 && manager.config.ParityShards != 0 { LOG_INFO("BACKUP_ERASURECODING", "Erasure coding is enabled with %d data shards and %d parity shards", manager.config.DataShards, manager.config.ParityShards) diff --git a/src/duplicacy_chunk.go b/src/duplicacy_chunk.go index 06eec322..2a8be66e 100644 --- a/src/duplicacy_chunk.go +++ b/src/duplicacy_chunk.go @@ -24,6 +24,7 @@ import ( "github.com/bkaradzic/go-lz4" "github.com/minio/highwayhash" "github.com/klauspost/reedsolomon" + "github.com/klauspost/compress/zstd" // This is a fork of github.com/minio/highwayhash at 1.0.1 that computes incorrect hash on // arm64 machines. We need this fork to be able to read the chunks created by Duplicacy @@ -267,6 +268,38 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetada deflater, _ := zlib.NewWriterLevel(encryptedBuffer, chunk.config.CompressionLevel) deflater.Write(chunk.buffer.Bytes()) deflater.Close() + } else if chunk.config.CompressionLevel >= ZSTD_COMPRESSION_LEVEL_FASTEST && chunk.config.CompressionLevel <= ZSTD_COMPRESSION_LEVEL_BEST { + encryptedBuffer.Write([]byte("ZSTD")) + + compressionLevel := zstd.SpeedDefault + if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_FASTEST { + compressionLevel = zstd.SpeedFastest + } else if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_BETTER { + compressionLevel = zstd.SpeedBetterCompression + } else if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_BEST { + compressionLevel = zstd.SpeedBestCompression + } + + deflater, err := zstd.NewWriter(encryptedBuffer, zstd.WithEncoderLevel(compressionLevel)) + if err != nil { + return err + } + + // Make sure we have enough space in encryptedBuffer + availableLength := encryptedBuffer.Cap() - len(encryptedBuffer.Bytes()) + maximumLength := deflater.MaxEncodedSize(chunk.buffer.Len()) + if availableLength < maximumLength { + encryptedBuffer.Grow(maximumLength - availableLength) + } + _, err = deflater.Write(chunk.buffer.Bytes()) + if err != nil { + return fmt.Errorf("ZSTD compression error: %v", err) + } + + err = deflater.Close() + if err != nil { + return fmt.Errorf("ZSTD compression error: %v", err) + } } else if chunk.config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL { encryptedBuffer.Write([]byte("LZ4 ")) // Make sure we have enough space in encryptedBuffer @@ -361,7 +394,6 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetada chunk.buffer.Write(header) return nil - } // This is to ensure compatibility with Vertical Backup, which still uses HMAC-SHA256 (instead of HMAC-BLAKE2) to @@ -633,6 +665,24 @@ func (chunk *Chunk) Decrypt(encryptionKey []byte, derivationKey string) (err err chunk.hash = nil return nil, rewriteNeeded } + + if len(compressed) > 4 && string(compressed[:4]) == "ZSTD" { + chunk.buffer.Reset() + chunk.hasher = chunk.config.NewKeyedHasher(chunk.config.HashKey) + chunk.hash = nil + + encryptedBuffer.Read(encryptedBuffer.Bytes()[:4]) + inflater, err := zstd.NewReader(encryptedBuffer) + if err != nil { + return err, false + } + defer inflater.Close() + if _, err = io.Copy(chunk, inflater); err != nil { + return err, false + } + return nil, rewriteNeeded + } + inflater, err := zlib.NewReader(encryptedBuffer) if err != nil { return err, false diff --git a/src/duplicacy_config.go b/src/duplicacy_config.go index 81e4b64f..e75ee401 100644 --- a/src/duplicacy_config.go +++ b/src/duplicacy_config.go @@ -35,6 +35,19 @@ var DEFAULT_KEY = []byte("duplicacy") // standard zlib levels of -1 to 9. var DEFAULT_COMPRESSION_LEVEL = 100 +// zstd compression levels starting from 200 +var ZSTD_COMPRESSION_LEVEL_FASTEST = 200 +var ZSTD_COMPRESSION_LEVEL_DEFAULT = 201 +var ZSTD_COMPRESSION_LEVEL_BETTER = 202 +var ZSTD_COMPRESSION_LEVEL_BEST = 203 + +var ZSTD_COMPRESSION_LEVELS = map[string]int { + "fastest": ZSTD_COMPRESSION_LEVEL_FASTEST, + "default": ZSTD_COMPRESSION_LEVEL_DEFAULT, + "better": ZSTD_COMPRESSION_LEVEL_BETTER, + "best": ZSTD_COMPRESSION_LEVEL_BEST, +} + // The new banner of the config file (to differentiate from the old format where the salt and iterations are fixed) var CONFIG_BANNER = "duplicacy\001" @@ -202,6 +215,14 @@ func (config *Config) Print() { } +func (config *Config) PrintCompressionLevel() { + for name, level := range ZSTD_COMPRESSION_LEVELS { + if level == config.CompressionLevel { + LOG_INFO("COMPRESSION_LEVEL", "Zstd compression is enabled (level: %s)", name) + } + } +} + func CreateConfigFromParameters(compressionLevel int, averageChunkSize int, maximumChunkSize int, mininumChunkSize int, isEncrypted bool, copyFrom *Config, bitCopy bool) (config *Config) { @@ -294,7 +315,10 @@ func (config *Config) PutChunk(chunk *Chunk) { } func (config *Config) NewKeyedHasher(key []byte) hash.Hash { - if config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL { + // Early versions of Duplicacy used SHA256 as the hash function for chunk IDs at the time when + // only zlib compression was supported. Later SHA256 was replaced by Blake2b and LZ4 was used + // for compression (with compression level set to 100). + if config.CompressionLevel >= DEFAULT_COMPRESSION_LEVEL { hasher, err := blake2.New(&blake2.Config{Size: 32, Key: key}) if err != nil { LOG_ERROR("HASH_KEY", "Invalid hash key: %x", key) @@ -339,7 +363,7 @@ func (hasher *DummyHasher) BlockSize() int { func (config *Config) NewFileHasher() hash.Hash { if SkipFileHash { return &DummyHasher{} - } else if config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL { + } else if config.CompressionLevel >= DEFAULT_COMPRESSION_LEVEL { hasher, _ := blake2.New(&blake2.Config{Size: 32}) return hasher } else {