From f595159edd8f9b83236c4737f083afd6c2a2105c Mon Sep 17 00:00:00 2001 From: Silvano Ravotto Date: Thu, 21 Aug 2025 21:43:37 -0400 Subject: [PATCH] cli: add command to validate S3 object storage Introduce a new `s3` subcommand that performs validation tests against an S3-compatible object store. The command: - Initializes an S3 store from environment variables. - Runs validation checks via the `validate` package. - Outputs a formatted report to stdout. This provides users with an easy way to confirm connectivity and configuration of their S3 object storage directly from the CLI. --- .github/docker-compose.yml | 6 +- README.md | 112 +++++++++++++++++++++- cmd/root.go | 84 +++++++++++++++++ cmd/s3/s3.go | 57 +++++++++++ go.mod | 6 ++ go.sum | 15 +++ internal/blob/s3.go | 93 +++++++++++++----- internal/blob/s3_test.go | 87 ++++++++++++++++- internal/env/env.go | 1 + internal/format/format.go | 56 +++++++++++ internal/format/format_test.go | 126 +++++++++++++++++++++++++ internal/format/testdata/no_stats.txt | 10 ++ internal/format/testdata/one_node.txt | 18 ++++ internal/format/testdata/two_nodes.txt | 19 ++++ internal/validate/validate.go | 1 + main.go | 4 +- 16 files changed, 663 insertions(+), 32 deletions(-) create mode 100644 cmd/root.go create mode 100644 cmd/s3/s3.go create mode 100644 internal/format/format.go create mode 100644 internal/format/format_test.go create mode 100644 internal/format/testdata/no_stats.txt create mode 100644 internal/format/testdata/one_node.txt create mode 100644 internal/format/testdata/two_nodes.txt diff --git a/.github/docker-compose.yml b/.github/docker-compose.yml index badbc60..6fed7b2 100644 --- a/.github/docker-compose.yml +++ b/.github/docker-compose.yml @@ -39,9 +39,9 @@ services: image: minio/minio healthcheck: test: ["CMD", "curl", "--fail", "http://localhost:29001/"] - interval: 30s - timeout: 10s - retries: 3 + interval: 2s + timeout: 2s + retries: 10 start_period: 5s command: server --console-address ":29001" --address ":29000" /data network_mode: host diff --git a/README.md b/README.md index b1fd18a..6e29e1c 100644 --- a/README.md +++ b/README.md @@ -1 +1,111 @@ -# blobcheck \ No newline at end of file +# blobcheck + +**blobcheck** is a diagnostic tool for validating object storage connectivity and integration with CockroachDB backup/restore workflows. It verifies that the storage provider is correctly configured, runs synthetic workloads, and produces network performance statistics. + +--- + +## Usage + +```bash +blobcheck s3 [flags] +``` + +### Flags + +``` + -h, --help help for s3 +``` + +### Global Flags + +``` + --db string PostgreSQL connection URL + (default "postgresql://root@localhost:26257?sslmode=disable") + --endpoint string http endpoint, if uri is not specified + --path string destination path (e.g. bucket/folder), if uri is not specified + --uri string in the [scheme]://[host]/[path]?[parameters] format + --verbose increase logging verbosity to debug +``` + +### Credentials + +Credentials must be provided in one of the locations supported by `config.LoadDefaultConfig`. +For example, they can be exported before running: + +```bash +export AWS_ACCESS_KEY_ID=.. +export AWS_SECRET_ACCESS_KEY=.. +``` + +--- + +## Examples + +### Using endpoint and path + +```bash +blobcheck s3 --endpoint http://provider:9000 --path mybucket/cluster1_backup +``` + +### Using full URI + +```bash +blobcheck s3 --uri 's3://mybucket/cluster1_backup?AWS_ACCESS_KEY_ID=..&AWS_SECRET_ACCESS_KEY=..&AWS_ENDPOINT=http://provider:9000' +``` + +### Sample Output + +```text +┌────────────────────────────────────────────────┐ +│ Suggested Parameters │ +├───────────────────────┬────────────────────────┤ +│ parameter │ value │ +├───────────────────────┼────────────────────────┤ +│ AWS_ACCESS_KEY_ID │ AKIA... │ +│ AWS_ENDPOINT │ https://s3.example.com │ +│ AWS_REGION │ us-west-2 │ +│ AWS_SECRET_ACCESS_KEY │ ****** │ +│ AWS_SKIP_CHECKSUM │ true │ +└───────────────────────┴────────────────────────┘ +┌──────────────────────────────────────────┐ +│ Statistics │ +├──────┬────────────┬─────────────┬────────┤ +│ node │ read speed │ write speed │ status │ +├──────┼────────────┼─────────────┼────────┤ +│ 1 │ 100MB/s │ 50MB/s │ OK │ +│ 2 │ 200MB/s │ 100MB/s │ OK │ +└──────┴────────────┴─────────────┴────────┘ +``` + +--- + +## High-Level Architecture + +### Components + +- **Validator (`internal/validate`)** + The central orchestrator for validation. Responsible for: + - Database and table creation (source and restored) + - Running synthetic workloads + - Initiating full and incremental backups + - Restoring from backups + - Comparing original and restored table fingerprints for integrity verification + +- **Database Layer (`internal/db`)** + - Manages creation, dropping, and schema definition for test databases/tables + - Handles external connections to the object store + +- **Blob Storage Layer (`internal/blob`)** + - Abstracts interactions with the S3 provider + - Executes backup/restore commands + - Performs quick tests directly on the S3 storage (put/get/list) + +- **Workload Generator (`internal/workload`)** + - Populates the source table with synthetic data during tests + - Simulates table activity between backups to ensure incremental backups are meaningful + +--- + +## License + +This project is licensed under the Apache 2.0 License. See [LICENSE](LICENSE.txt) for details. diff --git a/cmd/root.go b/cmd/root.go new file mode 100644 index 0000000..d33bb1f --- /dev/null +++ b/cmd/root.go @@ -0,0 +1,84 @@ +// Copyright 2025 Cockroach Labs, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "errors" + "fmt" + "log/slog" + "os" + + "github.com/spf13/cobra" + + "github.com/cockroachlabs-field/blobcheck/cmd/s3" + "github.com/cockroachlabs-field/blobcheck/internal/env" +) + +var verbosity int +var envConfig = &env.Env{ + DatabaseURL: "postgresql://root@localhost:26257?sslmode=disable", + LookupEnv: os.LookupEnv, +} + +// rootCmd represents the base command when called without any subcommands +var rootCmd = &cobra.Command{ + Use: "blobcheck", + Short: "blobcheck validates backup/restore operation against blob storage", + Long: `blobcheck is a diagnostic tool for validating object storage connectivity +and integration with CockroachDB backup/restore workflows. +It verifies that the storage provider is correctly configured, +runs synthetic workloads, and produces network performance statistics.`, + PersistentPreRunE: func(_ *cobra.Command, _ []string) error { + if envConfig.DatabaseURL == "" { + return errors.New("database URL cannot be blank") + } + if envConfig.URI != "" { + if envConfig.Endpoint != "" || envConfig.Path != "" { + return errors.New("URI and (endpoint + path) cannot be set simultaneously") + } + } else { + if envConfig.Endpoint == "" { + return errors.New("set (endpoint + path) or URI") + } + if envConfig.Path == "" { + return errors.New("set (endpoint + path) or URI") + } + } + if verbosity > 0 { + slog.SetLogLoggerLevel(slog.LevelDebug) + } + if verbosity > 1 { + envConfig.Verbose = true + } + return nil + }, +} + +// Execute runs the root command. +func Execute() { + s3.Add(envConfig, rootCmd) + f := rootCmd.PersistentFlags() + f.StringVar(&envConfig.DatabaseURL, "db", envConfig.DatabaseURL, "PostgreSQL connection URL") + f.StringVar(&envConfig.Path, "path", envConfig.Path, "destination path (e.g. bucket/folder)") + f.StringVar(&envConfig.Endpoint, "endpoint", envConfig.Path, "http endpoint") + f.StringVar(&envConfig.URI, "uri", envConfig.URI, "S3 URI") + f.CountVarP(&verbosity, "verbosity", "v", "increase logging verbosity to debug") + err := rootCmd.Execute() + + if err != nil { + fmt.Println(err) + os.Exit(1) + } +} diff --git a/cmd/s3/s3.go b/cmd/s3/s3.go new file mode 100644 index 0000000..5fc0907 --- /dev/null +++ b/cmd/s3/s3.go @@ -0,0 +1,57 @@ +// Copyright 2025 Cockroach Labs, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package s3 + +import ( + "github.com/spf13/cobra" + + "github.com/cockroachdb/field-eng-powertools/stopper" + "github.com/cockroachlabs-field/blobcheck/internal/blob" + "github.com/cockroachlabs-field/blobcheck/internal/env" + "github.com/cockroachlabs-field/blobcheck/internal/format" + "github.com/cockroachlabs-field/blobcheck/internal/validate" +) + +func command(env *env.Env) *cobra.Command { + cmd := &cobra.Command{ + Use: "s3", + Short: "Performs a validation test for a s3 object store", + RunE: func(cmd *cobra.Command, args []string) error { + ctx := stopper.WithContext(cmd.Context()) + store, err := blob.S3FromEnv(ctx, env) + if err != nil { + return err + } + validator, err := validate.New(ctx, env, store) + if err != nil { + return err + } + defer validator.Clean(ctx) + report, err := validator.Validate(ctx) + if err != nil { + return err + } + format.Report(cmd.OutOrStdout(), report) + return nil + }, + } + return cmd +} + +// Add the command. +func Add(env *env.Env, parent *cobra.Command) { + cmd := command(env) + parent.AddCommand(cmd) +} diff --git a/go.mod b/go.mod index 49de232..2cda694 100644 --- a/go.mod +++ b/go.mod @@ -42,16 +42,20 @@ require ( github.com/go-ini/ini v1.67.0 // indirect github.com/goccy/go-json v0.10.5 // indirect github.com/gogo/protobuf v1.3.2 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/klauspost/compress v1.18.0 // indirect github.com/klauspost/cpuid/v2 v2.2.11 // indirect github.com/kr/text v0.2.0 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect github.com/minio/crc64nvme v1.0.2 // indirect github.com/minio/md5-simd v1.1.2 // indirect github.com/philhofer/fwd v1.2.0 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rivo/uniseg v0.4.7 // indirect github.com/rogpeppe/go-internal v1.13.1 // indirect github.com/rs/xid v1.6.0 // indirect + github.com/spf13/pflag v1.0.6 // indirect github.com/tinylib/msgp v1.3.0 // indirect golang.org/x/exp/typeparams v0.0.0-20231108232855-2478ac86f678 // indirect golang.org/x/mod v0.27.0 // indirect @@ -72,7 +76,9 @@ require ( github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect + github.com/jedib0t/go-pretty/v6 v6.6.8 github.com/minio/minio-go/v7 v7.0.95 + github.com/spf13/cobra v1.9.1 github.com/stretchr/testify v1.10.0 golang.org/x/crypto v0.40.0 // indirect golang.org/x/sync v0.16.0 // indirect diff --git a/go.sum b/go.sum index 71218a6..c31251e 100644 --- a/go.sum +++ b/go.sum @@ -50,6 +50,7 @@ github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZe github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30= github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -72,6 +73,8 @@ github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= @@ -80,6 +83,8 @@ github.com/jackc/pgx/v5 v5.7.5 h1:JHGfMnQY+IEtGM63d+NGMjoRpysB2JBwDr5fsngwmJs= github.com/jackc/pgx/v5 v5.7.5/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M= github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/jedib0t/go-pretty/v6 v6.6.8 h1:JnnzQeRz2bACBobIaa/r+nqjvws4yEhcmaZ4n1QzsEc= +github.com/jedib0t/go-pretty/v6 v6.6.8/go.mod h1:YwC5CE4fJ1HFUDeivSV1r//AmANFHyqczZk+U6BDALU= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= @@ -91,6 +96,8 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/minio/crc64nvme v1.0.2 h1:6uO1UxGAD+kwqWWp7mBFsi5gAse66C4NXO8cmcVculg= github.com/minio/crc64nvme v1.0.2/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= @@ -106,11 +113,19 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU= github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= +github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= diff --git a/internal/blob/s3.go b/internal/blob/s3.go index 6df5e34..188476a 100644 --- a/internal/blob/s3.go +++ b/internal/blob/s3.go @@ -87,20 +87,35 @@ type s3Store struct { // It will try to connect to the S3 service using the environment variables provided, // and adding any parameters that are required. func S3FromEnv(ctx *stopper.Context, env *env.Env) (Storage, error) { - creds, ok := lookupEnv(env, []string{AccountParam, SecretParam}, []string{TokenParam, RegionParam}) - if !ok { - return nil, ErrMissingParam - } - if env.Endpoint != "" { - creds[EndPointParam] = env.Endpoint + var params Params + var dest string + if env.URI != "" { + var err error + params, dest, err = extractFromURI(env.URI) + if err != nil { + return nil, err + } + fmt.Println(params) + } else { + var ok bool + params, ok = lookupEnv(env, []string{AccountParam, SecretParam}, []string{TokenParam, RegionParam}) + if !ok { + return nil, ErrMissingParam + } + if env.Endpoint != "" { + params[EndPointParam] = env.Endpoint + } + dest = env.Path } - if _, ok := creds[RegionParam]; !ok { - creds[RegionParam] = DefaultRegion + + if _, ok := params[RegionParam]; !ok { + params[RegionParam] = DefaultRegion } initial := &s3Store{ - dest: path.Join(env.Path, uuid.NewString()), - params: creds, + dest: path.Join(dest, uuid.NewString()), + params: params, testing: env.Testing, + verbose: env.Verbose, } return initial.try(ctx, initial.BucketName()) } @@ -142,26 +157,45 @@ func (s *s3Store) addParam(key string, value string) error { return errors.Newf("invalid param %q", key) } +// combinations returns all subsets (the power set) of the given slice +func combinations(items []string) [][]string { + var result [][]string + n := len(items) + // total number of subsets = 2^n + total := 1 << n + for mask := 0; mask < total; mask++ { + subset := make([]string, 0) + for i := 0; i < n; i++ { + if mask&(1<