From 2e13146043dc94607c40eed7ac4619964e9c7fc4 Mon Sep 17 00:00:00 2001 From: micha Date: Thu, 26 Mar 2026 15:33:03 +0100 Subject: [PATCH 1/3] feat: wire up existing IgnoreTables support to CLI and env vars The mysql dump library already had IgnoreTables support (Data.IgnoreTables field and isIgnoredTable() method), but it was never exposed to users. This commit threads IgnoreTables through all layers: - pkg/database/dump.go: add IgnoreTables to DumpOpts, pass to mysql.Data - pkg/core/dumpoptions.go: add IgnoreTables to DumpOptions - pkg/core/dump.go: pass IgnoreTables to database.DumpOpts - cmd/dump.go: add --ignore-tables CLI flag (auto-binds to DB_DUMP_IGNORE_TABLES env var via viper) Also fixes isIgnoredTable() to support qualified "database.table" format (e.g. "mydb.mytable") in addition to bare table names, so tables can be excluded per-database rather than globally. Usage: CLI: --ignore-tables=mydb.mytable,otherdb.bigtable ENV: DB_DUMP_IGNORE_TABLES=mydb.mytable,otherdb.bigtable Closes #309 Signed-off-by: micha --- cmd/dump.go | 9 ++++++++ cmd/dump_test.go | 32 +++++++++++++++++++++++++++ pkg/core/dump.go | 1 + pkg/core/dumpoptions.go | 3 ++- pkg/database/dump.go | 2 ++ pkg/database/mysql/dump.go | 7 +++++- pkg/database/mysql/dump_test.go | 38 +++++++++++++++++++++++++++++++++ 7 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 pkg/database/mysql/dump_test.go diff --git a/cmd/dump.go b/cmd/dump.go index 9668659..bacf5b3 100644 --- a/cmd/dump.go +++ b/cmd/dump.go @@ -133,6 +133,11 @@ func dumpCmd(passedExecs execs, cmdConfig *cmdConfiguration) (*cobra.Command, er if !v.IsSet("routines") && dumpConfig != nil && dumpConfig.Routines != nil { routines = *dumpConfig.Routines } + ignoreTables := v.GetStringSlice("ignore-tables") + if len(ignoreTables) == 0 { + ignoreTables = nil + } + maxAllowedPacket := v.GetInt("max-allowed-packet") if !v.IsSet("max-allowed-packet") && dumpConfig != nil && dumpConfig.MaxAllowedPacket != nil && *dumpConfig.MaxAllowedPacket != 0 { maxAllowedPacket = *dumpConfig.MaxAllowedPacket @@ -305,6 +310,7 @@ func dumpCmd(passedExecs execs, cmdConfig *cmdConfiguration) (*cobra.Command, er Run: uid, FilenamePattern: filenamePattern, Parallelism: parallel, + IgnoreTables: ignoreTables, } results, err := executor.Dump(tracerCtx, dumpOpts) if err != nil { @@ -403,6 +409,9 @@ S3: If it is a URL of the format s3://bucketname/path then it will connect via S // retention flags.String("retention", "", "Retention period for backups. Optional. If not specified, no pruning will be done. Can be number of backups or time-based. For time-based, the format is: 1d, 1w, 1m, 1y for days, weeks, months, years, respectively. For number-based, the format is: 1c, 2c, 3c, etc. for the count of backups to keep.") + // ignore-tables: tables to exclude from the dump (format: database.table) + flags.StringSlice("ignore-tables", []string{}, "Tables to exclude from the dump. Format: database.table (e.g. mydb.mytable). Can be specified multiple times or as a comma-separated list.") + // encryption options flags.String("encryption", "", fmt.Sprintf("Encryption algorithm to use, none if blank. Supported are: %s. Format must match the specific algorithm.", strings.Join(encrypt.All, ", "))) flags.String("encryption-key", "", "Encryption key to use, base64-encoded. Useful for debugging, not recommended for production. If encryption is enabled, and both are provided or neither is provided, returns an error.") diff --git a/cmd/dump_test.go b/cmd/dump_test.go index 3b11ca8..7386051 100644 --- a/cmd/dump_test.go +++ b/cmd/dump_test.go @@ -196,6 +196,38 @@ func TestDumpCmd(t *testing.T) { Routines: true, Parallelism: 1, }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, + + // ignore-tables + {"ignore-tables single", []string{"--server", "abc", "--target", "file:///foo/bar", "--ignore-tables", "mydb.mytable"}, "", false, core.DumpOptions{ + Targets: []storage.Storage{file.New(*fileTargetURL)}, + MaxAllowedPacket: defaultMaxAllowedPacket, + Compressor: &compression.GzipCompressor{}, + DBConn: &database.Connection{Host: "abc", Port: defaultPort}, + FilenamePattern: "db_backup_{{ .now }}.{{ .compression }}", + Routines: true, + Parallelism: 1, + IgnoreTables: []string{"mydb.mytable"}, + }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, + {"ignore-tables comma-separated", []string{"--server", "abc", "--target", "file:///foo/bar", "--ignore-tables", "db1.table1,db2.table2"}, "", false, core.DumpOptions{ + Targets: []storage.Storage{file.New(*fileTargetURL)}, + MaxAllowedPacket: defaultMaxAllowedPacket, + Compressor: &compression.GzipCompressor{}, + DBConn: &database.Connection{Host: "abc", Port: defaultPort}, + FilenamePattern: "db_backup_{{ .now }}.{{ .compression }}", + Routines: true, + Parallelism: 1, + IgnoreTables: []string{"db1.table1", "db2.table2"}, + }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, + {"ignore-tables multiple flags", []string{"--server", "abc", "--target", "file:///foo/bar", "--ignore-tables", "db1.table1", "--ignore-tables", "db2.table2"}, "", false, core.DumpOptions{ + Targets: []storage.Storage{file.New(*fileTargetURL)}, + MaxAllowedPacket: defaultMaxAllowedPacket, + Compressor: &compression.GzipCompressor{}, + DBConn: &database.Connection{Host: "abc", Port: defaultPort}, + FilenamePattern: "db_backup_{{ .now }}.{{ .compression }}", + Routines: true, + Parallelism: 1, + IgnoreTables: []string{"db1.table1", "db2.table2"}, + }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, } for _, tt := range tests { diff --git a/pkg/core/dump.go b/pkg/core/dump.go index fa614ac..beb57bd 100644 --- a/pkg/core/dump.go +++ b/pkg/core/dump.go @@ -116,6 +116,7 @@ func (e *Executor) Dump(ctx context.Context, opts DumpOptions) (DumpResults, err MaxAllowedPacket: maxAllowedPacket, PostDumpDelay: opts.PostDumpDelay, Parallelism: parallelism, + IgnoreTables: opts.IgnoreTables, }, dw); err != nil { dbDumpSpan.SetStatus(codes.Error, err.Error()) dbDumpSpan.End() diff --git a/pkg/core/dumpoptions.go b/pkg/core/dumpoptions.go index 408734d..0e65e3c 100644 --- a/pkg/core/dumpoptions.go +++ b/pkg/core/dumpoptions.go @@ -31,5 +31,6 @@ type DumpOptions struct { // PostDumpDelay inafter each dump is complete, while holding connection open. Do not use outside of tests. PostDumpDelay time.Duration // Parallelism how many databases to back up at once, consuming that number of threads - Parallelism int + Parallelism int + IgnoreTables []string } diff --git a/pkg/database/dump.go b/pkg/database/dump.go index 0fab972..379eef5 100644 --- a/pkg/database/dump.go +++ b/pkg/database/dump.go @@ -19,6 +19,7 @@ type DumpOpts struct { // PostDumpDelay after each dump is complete, while holding connection open. Do not use outside of tests. PostDumpDelay time.Duration Parallelism int + IgnoreTables []string } func Dump(ctx context.Context, dbconn *Connection, opts DumpOpts, writers []DumpWriter) error { @@ -63,6 +64,7 @@ func Dump(ctx context.Context, dbconn *Connection, opts DumpOpts, writers []Dump SkipExtendedInsert: opts.SkipExtendedInsert, MaxAllowedPacket: opts.MaxAllowedPacket, PostDumpDelay: opts.PostDumpDelay, + IgnoreTables: opts.IgnoreTables, } // return on any error if err := dumper.Dump(); err != nil { diff --git a/pkg/database/mysql/dump.go b/pkg/database/mysql/dump.go index 02279bc..97315ef 100644 --- a/pkg/database/mysql/dump.go +++ b/pkg/database/mysql/dump.go @@ -400,7 +400,12 @@ func (data *Data) getCharsetCollections() error { func (data *Data) isIgnoredTable(name string) bool { for _, item := range data.IgnoreTables { - if item == name { + if strings.Contains(item, ".") { + parts := strings.SplitN(item, ".", 2) + if parts[0] == data.Schema && parts[1] == name { + return true + } + } else if item == name { return true } } diff --git a/pkg/database/mysql/dump_test.go b/pkg/database/mysql/dump_test.go new file mode 100644 index 0000000..e15de1a --- /dev/null +++ b/pkg/database/mysql/dump_test.go @@ -0,0 +1,38 @@ +package mysql + +import "testing" + +func TestIsIgnoredTable(t *testing.T) { + tests := []struct { + name string + schema string + ignoreTables []string + tableName string + expected bool + }{ + {"exact table name match", "mydb", []string{"mytable"}, "mytable", true}, + {"table name no match", "mydb", []string{"othertable"}, "mytable", false}, + {"qualified match same schema", "backuppc", []string{"backuppc.hosts"}, "hosts", true}, + {"qualified match wrong schema", "otherdb", []string{"backuppc.hosts"}, "hosts", false}, + {"qualified match wrong table", "backuppc", []string{"backuppc.hosts"}, "summary", false}, + {"multiple entries with qualified match", "backuppc", []string{"otherdb.foo", "backuppc.hosts"}, "hosts", true}, + {"multiple entries no match", "backuppc", []string{"otherdb.foo", "otherdb.bar"}, "hosts", false}, + {"mixed qualified and unqualified", "mydb", []string{"backuppc.hosts", "globaltable"}, "globaltable", true}, + {"empty ignore list", "mydb", []string{}, "mytable", false}, + {"nil ignore list", "mydb", nil, "mytable", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + data := &Data{ + Schema: tt.schema, + IgnoreTables: tt.ignoreTables, + } + got := data.isIgnoredTable(tt.tableName) + if got != tt.expected { + t.Errorf("isIgnoredTable(%q) = %v, want %v (schema=%q, ignoreTables=%v)", + tt.tableName, got, tt.expected, tt.schema, tt.ignoreTables) + } + }) + } +} From 9a7426bf27239b2fb9721b22a6e35a4ef08161a6 Mon Sep 17 00:00:00 2001 From: micha Date: Thu, 26 Mar 2026 16:05:17 +0100 Subject: [PATCH 2/3] docs: clarify --ignore-tables supports both qualified and unqualified formats Signed-off-by: micha --- cmd/dump.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/dump.go b/cmd/dump.go index bacf5b3..11139ae 100644 --- a/cmd/dump.go +++ b/cmd/dump.go @@ -409,8 +409,8 @@ S3: If it is a URL of the format s3://bucketname/path then it will connect via S // retention flags.String("retention", "", "Retention period for backups. Optional. If not specified, no pruning will be done. Can be number of backups or time-based. For time-based, the format is: 1d, 1w, 1m, 1y for days, weeks, months, years, respectively. For number-based, the format is: 1c, 2c, 3c, etc. for the count of backups to keep.") - // ignore-tables: tables to exclude from the dump (format: database.table) - flags.StringSlice("ignore-tables", []string{}, "Tables to exclude from the dump. Format: database.table (e.g. mydb.mytable). Can be specified multiple times or as a comma-separated list.") + // ignore-tables: tables to exclude from the dump (formats: database.table or table) + flags.StringSlice("ignore-tables", []string{}, "Tables to exclude from the dump. Formats: database.table (e.g. mydb.mytable) or table (applies to all databases/schemas). Can be specified multiple times or as a comma-separated list.") // encryption options flags.String("encryption", "", fmt.Sprintf("Encryption algorithm to use, none if blank. Supported are: %s. Format must match the specific algorithm.", strings.Join(encrypt.All, ", "))) From 7b33109896e9c0102f1313af896b2a2db8053302 Mon Sep 17 00:00:00 2001 From: micha Date: Fri, 27 Mar 2026 13:06:11 +0100 Subject: [PATCH 3/3] fix: apply exclude filter in Dump() - DB_DUMP_EXCLUDE was completely broken The Exclude field in DumpOptions was populated from CLI/env but never read in the Dump() function, causing all databases to be dumped regardless of the exclude list. Extract filterExcludedDatabases() and apply it after schema discovery. Add unit and cmd tests for exclude. Signed-off-by: micha --- cmd/dump_test.go | 32 +++++++++++++++++++ docs/backup.md | 2 +- pkg/core/dump.go | 20 ++++++++++++ pkg/core/dump_test.go | 71 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 pkg/core/dump_test.go diff --git a/cmd/dump_test.go b/cmd/dump_test.go index 7386051..f831d0c 100644 --- a/cmd/dump_test.go +++ b/cmd/dump_test.go @@ -197,6 +197,38 @@ func TestDumpCmd(t *testing.T) { Parallelism: 1, }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, + // exclude + {"exclude single", []string{"--server", "abc", "--target", "file:///foo/bar", "--exclude", "mydb"}, "", false, core.DumpOptions{ + Targets: []storage.Storage{file.New(*fileTargetURL)}, + MaxAllowedPacket: defaultMaxAllowedPacket, + Compressor: &compression.GzipCompressor{}, + DBConn: &database.Connection{Host: "abc", Port: defaultPort}, + FilenamePattern: "db_backup_{{ .now }}.{{ .compression }}", + Routines: true, + Parallelism: 1, + Exclude: []string{"mydb"}, + }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, + {"exclude comma-separated", []string{"--server", "abc", "--target", "file:///foo/bar", "--exclude", "db1,db2"}, "", false, core.DumpOptions{ + Targets: []storage.Storage{file.New(*fileTargetURL)}, + MaxAllowedPacket: defaultMaxAllowedPacket, + Compressor: &compression.GzipCompressor{}, + DBConn: &database.Connection{Host: "abc", Port: defaultPort}, + FilenamePattern: "db_backup_{{ .now }}.{{ .compression }}", + Routines: true, + Parallelism: 1, + Exclude: []string{"db1", "db2"}, + }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, + {"exclude multiple flags", []string{"--server", "abc", "--target", "file:///foo/bar", "--exclude", "db1", "--exclude", "db2"}, "", false, core.DumpOptions{ + Targets: []storage.Storage{file.New(*fileTargetURL)}, + MaxAllowedPacket: defaultMaxAllowedPacket, + Compressor: &compression.GzipCompressor{}, + DBConn: &database.Connection{Host: "abc", Port: defaultPort}, + FilenamePattern: "db_backup_{{ .now }}.{{ .compression }}", + Routines: true, + Parallelism: 1, + Exclude: []string{"db1", "db2"}, + }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, + // ignore-tables {"ignore-tables single", []string{"--server", "abc", "--target", "file:///foo/bar", "--ignore-tables", "mydb.mytable"}, "", false, core.DumpOptions{ Targets: []storage.Storage{file.New(*fileTargetURL)}, diff --git a/docs/backup.md b/docs/backup.md index 7b48e7a..0b96804 100644 --- a/docs/backup.md +++ b/docs/backup.md @@ -14,7 +14,7 @@ to a target. That target can be one of: By default, all databases in the database server are backed up, and the system databases named `information_schema`, `performance_schema`, `sys` and `mysql` are excluded. -For example, if you set `DB_DUMP_EXCLUDE=database1 db2` then these two databases will not be dumped. +For example, if you set `DB_DUMP_EXCLUDE=database1,db2` then these two databases will not be dumped. **Dumping just some databases** diff --git a/pkg/core/dump.go b/pkg/core/dump.go index beb57bd..5f77899 100644 --- a/pkg/core/dump.go +++ b/pkg/core/dump.go @@ -93,6 +93,8 @@ func (e *Executor) Dump(ctx context.Context, opts DumpOptions) (DumpResults, err return results, fmt.Errorf("failed to list database schemas: %v", err) } } + // filter out excluded databases + dbnames = filterExcludedDatabases(dbnames, opts.Exclude) span.SetAttributes(attribute.StringSlice("actual-schemas", dbnames)) for _, s := range dbnames { outFile := path.Join(workdir, fmt.Sprintf("%s_%s.sql", s, timepart)) @@ -258,3 +260,21 @@ func ProcessFilenamePattern(pattern string, now time.Time, timestamp, ext string } return buf.String(), nil } + +// filterExcludedDatabases removes databases in the exclude list from dbnames. +func filterExcludedDatabases(dbnames, exclude []string) []string { + if len(exclude) == 0 { + return dbnames + } + excludeMap := make(map[string]bool, len(exclude)) + for _, e := range exclude { + excludeMap[e] = true + } + filtered := make([]string, 0, len(dbnames)) + for _, db := range dbnames { + if !excludeMap[db] { + filtered = append(filtered, db) + } + } + return filtered +} diff --git a/pkg/core/dump_test.go b/pkg/core/dump_test.go new file mode 100644 index 0000000..6fc8667 --- /dev/null +++ b/pkg/core/dump_test.go @@ -0,0 +1,71 @@ +package core + +import ( + "testing" +) + +func TestFilterExcludedDatabases(t *testing.T) { + tests := []struct { + name string + dbnames []string + exclude []string + expected []string + }{ + { + name: "no exclusions", + dbnames: []string{"db1", "db2", "db3"}, + exclude: nil, + expected: []string{"db1", "db2", "db3"}, + }, + { + name: "empty exclusions", + dbnames: []string{"db1", "db2", "db3"}, + exclude: []string{}, + expected: []string{"db1", "db2", "db3"}, + }, + { + name: "exclude one", + dbnames: []string{"db1", "db2", "db3"}, + exclude: []string{"db2"}, + expected: []string{"db1", "db3"}, + }, + { + name: "exclude multiple", + dbnames: []string{"db1", "db2", "db3", "db4"}, + exclude: []string{"db2", "db4"}, + expected: []string{"db1", "db3"}, + }, + { + name: "exclude all", + dbnames: []string{"db1", "db2"}, + exclude: []string{"db1", "db2"}, + expected: []string{}, + }, + { + name: "exclude nonexistent", + dbnames: []string{"db1", "db2"}, + exclude: []string{"db99"}, + expected: []string{"db1", "db2"}, + }, + { + name: "exclude with mixed existing and nonexistent", + dbnames: []string{"db1", "db2", "db3"}, + exclude: []string{"db2", "db99"}, + expected: []string{"db1", "db3"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := filterExcludedDatabases(tt.dbnames, tt.exclude) + if len(result) != len(tt.expected) { + t.Fatalf("expected %v, got %v", tt.expected, result) + } + for i, v := range result { + if v != tt.expected[i] { + t.Fatalf("expected %v, got %v", tt.expected, result) + } + } + }) + } +}